Compare commits

...

No commits in common. 'c9' and 'i10cs' have entirely different histories.
c9 ... i10cs

2
.gitignore vendored

@ -1 +1 @@
SOURCES/mdadm-4.2.tar.xz SOURCES/mdadm-4.3.tar.xz

@ -1 +1 @@
27f240cff200e00c28a486a028bcdb14f67f8790 SOURCES/mdadm-4.2.tar.xz fb0bace919325b42a005372b5a5cfa999da6567a SOURCES/mdadm-4.3.tar.xz

@ -0,0 +1,64 @@
From aec3b907de48be54106600a1ecb69d1231f4801d Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 18 Jan 2024 11:30:15 +0100
Subject: [PATCH 01/41] Remove hardcoded checkpoint interval checking
Mdmon assumes that kernel marks checkpoint every 1/16 of the volume size
and that the checkpoints are equal in size. This is not true, kernel may
mark checkpoints more frequently depending on several factors, including
sync speed. This results in checkpoints reported by mdadm --examine
falling behind the one reported by kernel.
Remove hardcoded checkpoint interval checking.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
monitor.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)
diff --git a/monitor.c b/monitor.c
index 4acec678..b8d9e881 100644
--- a/monitor.c
+++ b/monitor.c
@@ -564,22 +564,10 @@ static int read_and_act(struct active_array *a, fd_set *fds)
}
}
- /* Check for recovery checkpoint notifications. We need to be a
- * minimum distance away from the last checkpoint to prevent
- * over checkpointing. Note reshape checkpointing is handled
- * in the second branch.
+ /* Handle reshape checkpointing
*/
- if (sync_completed > a->last_checkpoint &&
- sync_completed - a->last_checkpoint > a->info.component_size >> 4 &&
- a->curr_action > reshape) {
- /* A (non-reshape) sync_action has reached a checkpoint.
- * Record the updated position in the metadata
- */
- a->last_checkpoint = sync_completed;
- a->container->ss->set_array_state(a, a->curr_state <= clean);
- } else if ((a->curr_action == idle && a->prev_action == reshape) ||
- (a->curr_action == reshape &&
- sync_completed > a->last_checkpoint)) {
+ if ((a->curr_action == idle && a->prev_action == reshape) ||
+ (a->curr_action == reshape && sync_completed > a->last_checkpoint)) {
/* Reshape has progressed or completed so we need to
* update the array state - and possibly the array size
*/
@@ -607,8 +595,10 @@ static int read_and_act(struct active_array *a, fd_set *fds)
a->last_checkpoint = sync_completed;
}
- if (sync_completed > a->last_checkpoint)
+ if (sync_completed > a->last_checkpoint) {
a->last_checkpoint = sync_completed;
+ a->container->ss->set_array_state(a, a->curr_state <= clean);
+ }
if (sync_completed >= a->info.component_size)
a->last_checkpoint = 0;
--
2.40.1

@ -1,47 +0,0 @@
From f1cc8ab9ab6a92c3cd94ab7590b46285e214681e Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Tue, 15 Mar 2022 09:30:30 +0100
Subject: [PATCH 01/83] Unify error message.
Provide the same error message for the same error that can occur in Grow.c and super-intel.c.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 4 ++--
super-intel.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Grow.c b/Grow.c
index 9c6fc95e..9a947204 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1001,8 +1001,8 @@ int remove_disks_for_takeover(struct supertype *st,
rv = 1;
sysfs_free(arrays);
if (rv) {
- pr_err("Error. Cannot perform operation on /dev/%s\n", st->devnm);
- pr_err("For this operation it MUST be single array in container\n");
+ pr_err("Error. Cannot perform operation on %s- for this operation "
+ "it MUST be single array in container\n", st->devnm);
return rv;
}
}
diff --git a/super-intel.c b/super-intel.c
index d5fad102..5ffa7636 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11683,8 +11683,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
struct imsm_super *mpb = super->anchor;
if (mpb->num_raid_devs > 1) {
- pr_err("Error. Cannot perform operation on %s- for this operation it MUST be single array in container\n",
- geo->dev_name);
+ pr_err("Error. Cannot perform operation on %s- for this operation "
+ "it MUST be single array in container\n", geo->dev_name);
change = -1;
}
}
--
2.38.1

@ -1,33 +0,0 @@
From 5ce5a15f0bf007e850e15259bba4f53736605fb2 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 25 Mar 2022 12:48:59 +0100
Subject: [PATCH 02/83] mdadm: Fix double free
If there was a size mismatch after creation it would get fixed on grow
in imsm_fix_size_mismatch(), but due to double free "double free or corruption (fasttop)"
error occurs and grow cannot proceed.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 5ffa7636..6ff336ee 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11783,9 +11783,8 @@ static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
st->update_tail = &st->updates;
} else {
imsm_sync_metadata(st);
+ free(update);
}
-
- free(update);
}
ret_val = 0;
exit:
--
2.38.1

@ -0,0 +1,96 @@
From cf87fe75fd83dac008ea116c2c52ec69783fdf6a Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 18 Jan 2024 11:30:16 +0100
Subject: [PATCH 02/41] monitor: refactor checkpoint update
"if" statements of checkpoint updates have too many responsibilties.
This results in unclear code flow and duplicated code.
Refactor checkpoint update code and simplify "if" statements.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
monitor.c | 51 +++++++++++++++++++++++++--------------------------
1 file changed, 25 insertions(+), 26 deletions(-)
diff --git a/monitor.c b/monitor.c
index b8d9e881..be0bec78 100644
--- a/monitor.c
+++ b/monitor.c
@@ -412,6 +412,7 @@ static int read_and_act(struct active_array *a, fd_set *fds)
int ret = 0;
int count = 0;
struct timeval tv;
+ bool write_checkpoint = false;
a->next_state = bad_word;
a->next_action = bad_action;
@@ -564,40 +565,38 @@ static int read_and_act(struct active_array *a, fd_set *fds)
}
}
- /* Handle reshape checkpointing
- */
- if ((a->curr_action == idle && a->prev_action == reshape) ||
- (a->curr_action == reshape && sync_completed > a->last_checkpoint)) {
- /* Reshape has progressed or completed so we need to
- * update the array state - and possibly the array size
- */
+ /* Update reshape checkpoint, depending if it finished or progressed */
+ if (a->curr_action == idle && a->prev_action == reshape) {
+ char buf[SYSFS_MAX_BUF_SIZE];
+
if (sync_completed != 0)
a->last_checkpoint = sync_completed;
- /* We might need to update last_checkpoint depending on
- * the reason that reshape finished.
- * if array reshape is really finished:
- * set check point to the end, this allows
- * set_array_state() to finalize reshape in metadata
- * if reshape if broken: do not set checkpoint to the end
- * this allows for reshape restart from checkpoint
+
+ /*
+ * If reshape really finished, set checkpoint to the end to finalize it.
+ * Do not set checkpoint if reshape is broken.
+ * Reshape will restart from last checkpoint.
*/
- if ((a->curr_action != reshape) &&
- (a->prev_action == reshape)) {
- char buf[SYSFS_MAX_BUF_SIZE];
- if ((sysfs_get_str(&a->info, NULL,
- "reshape_position",
- buf,
- sizeof(buf)) >= 0) &&
- str_is_none(buf) == true)
+ if (sysfs_get_str(&a->info, NULL, "reshape_position", buf, sizeof(buf)) >= 0)
+ if (str_is_none(buf) == true)
a->last_checkpoint = a->info.component_size;
- }
- a->container->ss->set_array_state(a, a->curr_state <= clean);
- a->last_checkpoint = sync_completed;
+
+ write_checkpoint = true;
}
- if (sync_completed > a->last_checkpoint) {
+ if (a->curr_action >= reshape && sync_completed > a->last_checkpoint) {
+ /* Update checkpoint if neither reshape nor idle action */
a->last_checkpoint = sync_completed;
+
+ write_checkpoint = true;
+ }
+
+ /* Save checkpoint */
+ if (write_checkpoint) {
a->container->ss->set_array_state(a, a->curr_state <= clean);
+
+ if (a->curr_action <= reshape)
+ a->last_checkpoint = sync_completed;
}
if (sync_completed >= a->info.component_size)
--
2.40.1

@ -1,83 +0,0 @@
From fea026b4849182fc8413014c81456e7215af28d9 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Wed, 23 Mar 2022 15:05:19 +0100
Subject: [PATCH 03/83] Grow_reshape: Add r0 grow size error message and update
man
Grow size on r0 is not supported for imsm and native metadata.
Add proper error message.
Update man for proper use of --size.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 6 ++++++
mdadm.8.in | 19 ++++++++++++-------
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/Grow.c b/Grow.c
index 9a947204..aa72490b 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1998,6 +1998,12 @@ int Grow_reshape(char *devname, int fd,
goto release;
}
+ if (array.level == 0) {
+ pr_err("Component size change is not supported for RAID0\n");
+ rv = 1;
+ goto release;
+ }
+
if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
devname, APPLY_METADATA_CHANGES,
c->verbose > 0)) {
diff --git a/mdadm.8.in b/mdadm.8.in
index be902dba..e2a42425 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -459,7 +459,8 @@ number of spare devices.
.TP
.BR \-z ", " \-\-size=
-Amount (in Kilobytes) of space to use from each drive in RAID levels 1/4/5/6.
+Amount (in Kilobytes) of space to use from each drive in RAID levels 1/4/5/6/10
+and for RAID 0 on external metadata.
This must be a multiple of the chunk size, and must leave about 128Kb
of space at the end of the drive for the RAID superblock.
If this is not specified
@@ -478,10 +479,19 @@ To guard against this it can be useful to set the initial size
slightly smaller than the smaller device with the aim that it will
still be larger than any replacement.
+This option can be used with
+.B \-\-create
+for determining initial size of an array. For external metadata,
+it can be used on a volume, but not on a container itself.
+Setting initial size of
+.B RAID 0
+array is only valid for external metadata.
+
This value can be set with
.B \-\-grow
-for RAID level 1/4/5/6 though
+for RAID level 1/4/5/6/10 though
DDF arrays may not be able to support this.
+RAID 0 array size cannot be changed.
If the array was created with a size smaller than the currently
active drives, the extra space can be accessed using
.BR \-\-grow .
@@ -501,11 +511,6 @@ problems the array can be made bigger again with no loss with another
.B "\-\-grow \-\-size="
command.
-This value cannot be used when creating a
-.B CONTAINER
-such as with DDF and IMSM metadata, though it perfectly valid when
-creating an array inside a container.
-
.TP
.BR \-Z ", " \-\-array\-size=
This is only meaningful with
--
2.38.1

@ -0,0 +1,47 @@
From fdb7e802f4cf64d067c3abaafa35056e2bc1ed43 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 18 Jan 2024 11:30:17 +0100
Subject: [PATCH 03/41] Super-intel: Fix first checkpoint restart
When imsm based array is stopped after reaching first checkpoint and
then assembled, first checkpoint is reported as 0.
This behaviour is valid only for initial checkpoint, if the array was
stopped while performing some action.
Last checkpoint value is not taken from metadata but always starts
with 0 and it's incremented when sync_completed in sysfs changes.
In simplification, read_and_act() is responsible for checkpoint updates
and is executed each time sysfs checkpoint update happens. For first
checkpoint it is executed twice and due to marking checkpoint before
triggering any action on the array, it is impossible to read
sync_completed from sysfs in just two iterations.
The workaround to this is not marking any checkpoint for first
sysfs checkpoint after RAID assembly, to preserve checkpoint value
stored in metadata.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
super-intel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index dbea235d..e61f3f6f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8771,6 +8771,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
super->updates_pending++;
}
+ if (a->prev_action == idle)
+ goto skip_mark_checkpoint;
+
mark_checkpoint:
/* skip checkpointing for general migration,
* it is controlled in mdadm
--
2.40.1

@ -1,7 +1,7 @@
From ea2ca7ed3dbbf881ce08d80fe371f2aaf05011c3 Mon Sep 17 00:00:00 2001 From ea2ca7ed3dbbf881ce08d80fe371f2aaf05011c3 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com> From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 18 Jan 2024 11:30:18 +0100 Date: Thu, 18 Jan 2024 11:30:18 +0100
Subject: [PATCH 1/1] Grow: Move update_tail assign to Grow_reshape() Subject: [PATCH 04/41] Grow: Move update_tail assign to Grow_reshape()
Due to e919fb0af245 ("FIX: Enable metadata updates for raid0") code Due to e919fb0af245 ("FIX: Enable metadata updates for raid0") code
can't enter super-intel.c:3415, resulting in checkpoint not being can't enter super-intel.c:3415, resulting in checkpoint not being
@ -57,5 +57,5 @@ index f95dae82..5498e54f 100644
ret_val = 1; ret_val = 1;
goto Grow_continue_command_exit; goto Grow_continue_command_exit;
-- --
2.41.0 2.40.1

@ -1,67 +0,0 @@
From cf9a109209aad285372b67306d54118af6fc522b Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Fri, 14 Jan 2022 16:44:33 +0100
Subject: [PATCH 04/83] udev: adapt rules to systemd v247
New events have been added in kernel 4.14 ("bind" and "unbind").
Systemd maintainer suggests to modify "add|change" branches.
This patches implements their suggestions. There is no issue yet because
new event types are not used in md.
Please see systemd announcement for details[1].
[1] https://lists.freedesktop.org/archives/systemd-devel/2020-November/045646.html
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
udev-md-raid-assembly.rules | 5 +++--
udev-md-raid-safe-timeouts.rules | 2 +-
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index 13c9076e..2967ace1 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -3,7 +3,7 @@
SUBSYSTEM!="block", GOTO="md_end"
# handle md arrays
-ACTION!="add|change", GOTO="md_end"
+ACTION=="remove", GOTO="md_end"
KERNEL!="md*", GOTO="md_end"
# partitions have no md/{array_state,metadata_version}, but should not
diff --git a/udev-md-raid-assembly.rules b/udev-md-raid-assembly.rules
index d668cddd..39b4344b 100644
--- a/udev-md-raid-assembly.rules
+++ b/udev-md-raid-assembly.rules
@@ -30,8 +30,9 @@ LABEL="md_inc"
# remember you can limit what gets auto/incrementally assembled by
# mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY'
-ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot $env{DEVLINKS}"
-ACTION=="add|change", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
+ACTION!="remove", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot $env{DEVLINKS}"
+ACTION!="remove", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
+
ACTION=="remove", ENV{ID_PATH}=="?*", RUN+="BINDIR/mdadm -If $name --path $env{ID_PATH}"
ACTION=="remove", ENV{ID_PATH}!="?*", RUN+="BINDIR/mdadm -If $name"
diff --git a/udev-md-raid-safe-timeouts.rules b/udev-md-raid-safe-timeouts.rules
index 12bdcaa8..2e185cee 100644
--- a/udev-md-raid-safe-timeouts.rules
+++ b/udev-md-raid-safe-timeouts.rules
@@ -50,7 +50,7 @@ ENV{DEVTYPE}!="partition", GOTO="md_timeouts_end"
IMPORT{program}="/sbin/mdadm --examine --export $devnode"
-ACTION=="add|change", \
+ACTION!="remove", \
ENV{ID_FS_TYPE}=="linux_raid_member", \
ENV{MD_LEVEL}=="raid[1-9]*", \
TEST=="/sys/block/$parent/device/timeout", \
--
2.38.1

@ -0,0 +1,56 @@
From 37eeae381a8ed07a1fabb64184fe45d95a861496 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 18 Jan 2024 11:30:19 +0100
Subject: [PATCH 05/41] Add understanding output section in man
Add new section in man for explaining mdadm outputs.
Describe checkpoint entry.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.8.in | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 96a4a08e..9ba66825 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -3179,7 +3179,7 @@ environment. This can be useful for testing or for disaster
recovery. You should be aware that interoperability may be
compromised by setting this value.
-These change can also be suppressed by adding
+These change can also be suppressed by adding
.B mdadm.imsm.test=1
to the kernel command line. This makes it easy to test IMSM
code in a virtual machine that doesn't have IMSM virtual hardware.
@@ -3454,6 +3454,25 @@ is any string. These names are supported by
since version 3.3 provided they are enabled in
.IR mdadm.conf .
+.SH UNDERSTANDING OUTPUT
+
+.TP
+EXAMINE
+
+.TP
+.B checkpoint
+Checkpoint value is reported when array is performing some action including
+resync, recovery or reshape. Checkpoints allow resuming action from certain
+point if it was interrupted.
+
+Checkpoint is reported as combination of two values: current migration unit
+and number of blocks per unit. By multiplying those values and dividing by
+array size checkpoint progress percentage can be obtained in relation to
+current progress reported in /proc/mdstat. Checkpoint is also related to (and
+sometimes based on) sysfs entry sync_completed but depending on action units
+may differ. Even if units are the same, it should not be expected that
+checkpoint and sync_completed will be exact match nor updated simultaneously.
+
.SH NOTE
.I mdadm
was previously known as
--
2.40.1

@ -1,252 +0,0 @@
From 83a379cfbd283b387919fe05d44eb4c49e155ad6 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Mon, 21 Feb 2022 13:05:20 +0100
Subject: [PATCH 05/83] Replace error prone signal() with sigaction()
Up to this date signal() was used which implementation could vary [1].
Sigaction() call is preferred. This commit introduces replacement
from signal() to sigaction() by the use of signal_s() wrapper.
Also remove redundant signal.h header includes.
[1] https://man7.org/linux/man-pages/man2/signal.2.html
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 4 ++--
Monitor.c | 5 +++--
managemon.c | 1 -
mdadm.h | 22 ++++++++++++++++++++++
mdmon.c | 1 -
monitor.c | 1 -
probe_roms.c | 6 +++---
raid6check.c | 25 +++++++++++++++----------
util.c | 1 -
9 files changed, 45 insertions(+), 21 deletions(-)
diff --git a/Grow.c b/Grow.c
index aa72490b..18c5719b 100644
--- a/Grow.c
+++ b/Grow.c
@@ -26,7 +26,6 @@
#include <sys/mman.h>
#include <stddef.h>
#include <stdint.h>
-#include <signal.h>
#include <sys/wait.h>
#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
@@ -3566,7 +3565,8 @@ started:
fd = -1;
mlockall(MCL_FUTURE);
- signal(SIGTERM, catch_term);
+ if (signal_s(SIGTERM, catch_term) == SIG_ERR)
+ goto release;
if (st->ss->external) {
/* metadata handler takes it from here */
diff --git a/Monitor.c b/Monitor.c
index 30c031a2..c0ab5412 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -26,7 +26,6 @@
#include "md_p.h"
#include "md_u.h"
#include <sys/wait.h>
-#include <signal.h>
#include <limits.h>
#include <syslog.h>
#ifndef NO_LIBUDEV
@@ -435,8 +434,10 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
if (mp) {
FILE *mdstat;
char hname[256];
+
gethostname(hname, sizeof(hname));
- signal(SIGPIPE, SIG_IGN);
+ signal_s(SIGPIPE, SIG_IGN);
+
if (info->mailfrom)
fprintf(mp, "From: %s\n", info->mailfrom);
else
diff --git a/managemon.c b/managemon.c
index bb7334cf..0e9bdf00 100644
--- a/managemon.c
+++ b/managemon.c
@@ -106,7 +106,6 @@
#include "mdmon.h"
#include <sys/syscall.h>
#include <sys/socket.h>
-#include <signal.h>
static void close_aa(struct active_array *aa)
{
diff --git a/mdadm.h b/mdadm.h
index c7268a71..26e7e5cd 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -46,6 +46,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <string.h>
#include <syslog.h>
#include <stdbool.h>
+#include <signal.h>
/* Newer glibc requires sys/sysmacros.h directly for makedev() */
#include <sys/sysmacros.h>
#ifdef __dietlibc__
@@ -1729,6 +1730,27 @@ static inline char *to_subarray(struct mdstat_ent *ent, char *container)
return &ent->metadata_version[10+strlen(container)+1];
}
+/**
+ * signal_s() - Wrapper for sigaction() with signal()-like interface.
+ * @sig: The signal to set the signal handler to.
+ * @handler: The signal handler.
+ *
+ * Return: previous handler or SIG_ERR on failure.
+ */
+static inline sighandler_t signal_s(int sig, sighandler_t handler)
+{
+ struct sigaction new_act;
+ struct sigaction old_act;
+
+ new_act.sa_handler = handler;
+ new_act.sa_flags = 0;
+
+ if (sigaction(sig, &new_act, &old_act) == 0)
+ return old_act.sa_handler;
+
+ return SIG_ERR;
+}
+
#ifdef DEBUG
#define dprintf(fmt, arg...) \
fprintf(stderr, "%s: %s: "fmt, Name, __func__, ##arg)
diff --git a/mdmon.c b/mdmon.c
index c71e62c6..5570574b 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -56,7 +56,6 @@
#include <errno.h>
#include <string.h>
#include <fcntl.h>
-#include <signal.h>
#include <dirent.h>
#ifdef USE_PTHREADS
#include <pthread.h>
diff --git a/monitor.c b/monitor.c
index e0d3be67..b877e595 100644
--- a/monitor.c
+++ b/monitor.c
@@ -22,7 +22,6 @@
#include "mdmon.h"
#include <sys/syscall.h>
#include <sys/select.h>
-#include <signal.h>
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto",
diff --git a/probe_roms.c b/probe_roms.c
index 7ea04c7a..94c80c2c 100644
--- a/probe_roms.c
+++ b/probe_roms.c
@@ -22,7 +22,6 @@
#include "probe_roms.h"
#include "mdadm.h"
#include <unistd.h>
-#include <signal.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
@@ -69,7 +68,8 @@ static int probe_address16(const __u16 *ptr, __u16 *val)
void probe_roms_exit(void)
{
- signal(SIGBUS, SIG_DFL);
+ signal_s(SIGBUS, SIG_DFL);
+
if (rom_fd >= 0) {
close(rom_fd);
rom_fd = -1;
@@ -98,7 +98,7 @@ int probe_roms_init(unsigned long align)
if (roms_init())
return -1;
- if (signal(SIGBUS, sigbus) == SIG_ERR)
+ if (signal_s(SIGBUS, sigbus) == SIG_ERR)
rc = -1;
if (rc == 0) {
fd = open("/dev/mem", O_RDONLY);
diff --git a/raid6check.c b/raid6check.c
index a8e6005b..99477761 100644
--- a/raid6check.c
+++ b/raid6check.c
@@ -24,7 +24,6 @@
#include "mdadm.h"
#include <stdint.h>
-#include <signal.h>
#include <sys/mman.h>
#define CHECK_PAGE_BITS (12)
@@ -130,30 +129,36 @@ void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
}
int lock_stripe(struct mdinfo *info, unsigned long long start,
- int chunk_size, int data_disks, sighandler_t *sig) {
+ int chunk_size, int data_disks, sighandler_t *sig)
+{
int rv;
+
+ sig[0] = signal_s(SIGTERM, SIG_IGN);
+ sig[1] = signal_s(SIGINT, SIG_IGN);
+ sig[2] = signal_s(SIGQUIT, SIG_IGN);
+
+ if (sig[0] == SIG_ERR || sig[1] == SIG_ERR || sig[2] == SIG_ERR)
+ return 1;
+
if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
return 2;
}
- sig[0] = signal(SIGTERM, SIG_IGN);
- sig[1] = signal(SIGINT, SIG_IGN);
- sig[2] = signal(SIGQUIT, SIG_IGN);
-
rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
return rv * 256;
}
-int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
+int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig)
+{
int rv;
rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
- signal(SIGQUIT, sig[2]);
- signal(SIGINT, sig[1]);
- signal(SIGTERM, sig[0]);
+ signal_s(SIGQUIT, sig[2]);
+ signal_s(SIGINT, sig[1]);
+ signal_s(SIGTERM, sig[0]);
if(munlockall() != 0)
return 3;
diff --git a/util.c b/util.c
index 3d05d074..cc94f96e 100644
--- a/util.c
+++ b/util.c
@@ -35,7 +35,6 @@
#include <poll.h>
#include <ctype.h>
#include <dirent.h>
-#include <signal.h>
#include <dlfcn.h>
--
2.38.1

@ -0,0 +1,48 @@
From b8f5523a795b8f7e56dfbc139ce7f64728b67726 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:07 +0100
Subject: [PATCH 06/41] Create: add_disk_to_super() fix resource leak
Fixes resource leak in add_disk_to_super().
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Create.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/Create.c b/Create.c
index 8082f54a..7e9170b6 100644
--- a/Create.c
+++ b/Create.c
@@ -279,8 +279,10 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
dv->devname);
return 1;
}
- if (!fstat_is_blkdev(fd, dv->devname, &rdev))
+ if (!fstat_is_blkdev(fd, dv->devname, &rdev)) {
+ close(fd);
return 1;
+ }
info->disk.major = major(rdev);
info->disk.minor = minor(rdev);
}
@@ -289,6 +291,7 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
if (st->ss->add_to_super(st, &info->disk, fd, dv->devname,
dv->data_offset)) {
ioctl(mdfd, STOP_ARRAY, NULL);
+ close(fd);
return 1;
}
st->ss->getinfo_super(st, info, NULL);
@@ -297,6 +300,7 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
*zero_pid = write_zeroes_fork(fd, s, st, dv);
if (*zero_pid <= 0) {
ioctl(mdfd, STOP_ARRAY, NULL);
+ close(fd);
return 1;
}
}
--
2.40.1

File diff suppressed because it is too large Load Diff

@ -1,48 +0,0 @@
From c23400377bb3d8e98e810cd92dba478dac1dff82 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 18 Mar 2022 09:26:05 +0100
Subject: [PATCH 07/83] mdadm: Update ReadMe
Instead of hardcoded config file path give reference to config manual.
Add missing monitordelay and homecluster parameters.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
ReadMe.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/ReadMe.c b/ReadMe.c
index 81399765..8f873c48 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -613,7 +613,6 @@ char Help_incr[] =
;
char Help_config[] =
-"The /etc/mdadm.conf config file:\n\n"
" The config file contains, apart from blank lines and comment lines that\n"
" start with a hash(#), array lines, device lines, and various\n"
" configuration lines.\n"
@@ -636,10 +635,12 @@ char Help_config[] =
" than a device must match all of them to be considered.\n"
"\n"
" Other configuration lines include:\n"
-" mailaddr, mailfrom, program used for --monitor mode\n"
-" create, auto used when creating device names in /dev\n"
-" homehost, policy, part-policy used to guide policy in various\n"
-" situations\n"
+" mailaddr, mailfrom, program, monitordelay used for --monitor mode\n"
+" create, auto used when creating device names in /dev\n"
+" homehost, homecluster, policy, part-policy used to guide policy in various\n"
+" situations\n"
+"\n"
+"For more details see mdadm.conf(5).\n"
"\n"
;
--
2.38.1

@ -0,0 +1,35 @@
From 38cb95dd28fa790ae6d90b169f1fd2b1d09a02f2 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:08 +0100
Subject: [PATCH 07/41] mdadm: signal_s() init variables
Init sigaction structs in signal_s().
This approach might throw warnings for GCC 4.x and lower.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index 1f28b3e7..75c887e4 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1856,11 +1856,10 @@ static inline char *to_subarray(struct mdstat_ent *ent, char *container)
*/
static inline sighandler_t signal_s(int sig, sighandler_t handler)
{
- struct sigaction new_act;
- struct sigaction old_act;
+ struct sigaction new_act = {0};
+ struct sigaction old_act = {0};
new_act.sa_handler = handler;
- new_act.sa_flags = 0;
if (sigaction(sig, &new_act, &old_act) == 0)
return old_act.sa_handler;
--
2.40.1

@ -0,0 +1,48 @@
From b7d7837128e90c8b496ebc3d88eda1a8ff477392 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:09 +0100
Subject: [PATCH 08/41] Monitor: open file before check in check_one_sharer()
Open file before performing checks in check_one_sharer() to avoid
file tampering.
Remove redundant access check.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Monitor.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 824a69fc..7cee95d4 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -451,20 +451,17 @@ static int check_one_sharer(int scan)
return 2;
}
- if (access(AUTOREBUILD_PID_PATH, F_OK) != 0)
- return 0;
-
- if (!is_file(AUTOREBUILD_PID_PATH)) {
- pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH);
- return 2;
- }
-
fp = fopen(AUTOREBUILD_PID_PATH, "r");
if (!fp) {
pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH);
return 2;
}
+ if (!is_file(AUTOREBUILD_PID_PATH)) {
+ pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH);
+ return 2;
+ }
+
if (fscanf(fp, "%d", &pid) != 1) {
pr_err("Cannot read pid from %s file.\n", AUTOREBUILD_PID_PATH);
fclose(fp);
--
2.40.1

@ -1,203 +0,0 @@
From 24e075c659d0a8718aabefe5af4c97195a188af7 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 18 Mar 2022 09:26:06 +0100
Subject: [PATCH 08/83] mdadm: Update config man regarding default files and
multi-keyword behavior
Simplify default and alternative config file and directory location references
from mdadm(8) as references to mdadm.conf(5). Add FILE section in config man
and explain order and conditions in which default and alternative config files
and directories are used.
Update config man behavior regarding parsing order when multiple keywords/config
files are involved.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 30 +++++++++--------------
mdadm.conf.5.in | 65 ++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 71 insertions(+), 24 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 8b21ffd4..0be02e4a 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -266,14 +266,11 @@ the exact meaning of this option in different contexts.
.TP
.BR \-c ", " \-\-config=
-Specify the config file or directory. Default is to use
-.B {CONFFILE}
-and
-.BR {CONFFILE}.d ,
-or if those are missing then
-.B {CONFFILE2}
-and
-.BR {CONFFILE2}.d .
+Specify the config file or directory. If not specified, default config file
+and default conf.d directory will be used. See
+.BR mdadm.conf (5)
+for more details.
+
If the config file given is
.B "partitions"
then nothing will be read, but
@@ -2013,11 +2010,9 @@ The config file is only used if explicitly named with
.B \-\-config
or requested with (a possibly implicit)
.BR \-\-scan .
-In the later case,
-.B {CONFFILE}
-or
-.B {CONFFILE2}
-is used.
+In the later case, default config file is used. See
+.BR mdadm.conf (5)
+for more details.
If
.B \-\-scan
@@ -3346,16 +3341,15 @@ on Monitor mode.
.SS {CONFFILE} (or {CONFFILE2})
-The config file lists which devices may be scanned to see if
-they contain MD super block, and gives identifying information
-(e.g. UUID) about known MD arrays. See
+Default config file. See
.BR mdadm.conf (5)
for more details.
.SS {CONFFILE}.d (or {CONFFILE2}.d)
-A directory containing configuration files which are read in lexical
-order.
+Default directory containing configuration files. See
+.BR mdadm.conf (5)
+for more details.
.SS {MAP_PATH}
When
diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in
index 83edd008..dd331a6a 100644
--- a/mdadm.conf.5.in
+++ b/mdadm.conf.5.in
@@ -88,7 +88,8 @@ but only the major and minor device numbers. It scans
.I /dev
to find the name that matches the numbers.
-If no DEVICE line is present, then "DEVICE partitions containers" is assumed.
+If no DEVICE line is present in any config file,
+then "DEVICE partitions containers" is assumed.
For example:
.IP
@@ -272,6 +273,10 @@ catenated with spaces to form the address.
Note that this value cannot be set via the
.I mdadm
commandline. It is only settable via the config file.
+There should only be one
+.B MAILADDR
+line and it should have only one address. Any subsequent addresses
+are silently ignored.
.TP
.B PROGRAM
@@ -286,7 +291,8 @@ device.
There should only be one
.B program
-line and it should be give only one program.
+line and it should be given only one program. Any subsequent programs
+are silently ignored.
.TP
@@ -295,7 +301,14 @@ The
.B create
line gives default values to be used when creating arrays, new members
of arrays, and device entries for arrays.
-These include:
+
+There should only be one
+.B create
+line. Any subsequent lines will override the previous settings.
+
+Keywords used in the
+.I CREATE
+line and supported values are:
.RS 4
.TP
@@ -475,8 +488,8 @@ The known metadata types are
.B AUTO
should be given at most once. Subsequent lines are silently ignored.
-Thus an earlier config file in a config directory will over-ride
-the setting in a later config file.
+Thus a later config file in a config directory will not overwrite
+the setting in an earlier config file.
.TP
.B POLICY
@@ -594,6 +607,7 @@ The
line lists custom values of MD device's sysfs attributes which will be
stored in sysfs after the array is assembled. Multiple lines are allowed and each
line has to contain the uuid or the name of the device to which it relates.
+Lines are applied in reverse order.
.RS 4
.TP
.B uuid=
@@ -621,7 +635,46 @@ is running in
.B \-\-monitor
mode.
.B \-d/\-\-delay
-command line argument takes precedence over the config file
+command line argument takes precedence over the config file.
+
+If multiple
+.B MINITORDELAY
+lines are provided, only first non-zero value is considered.
+
+.SH FILES
+
+.SS {CONFFILE}
+
+The default config file location, used when
+.I mdadm
+is running without --config option.
+
+.SS {CONFFILE}.d
+
+The default directory with config files. Used when
+.I mdadm
+is running without --config option, after successful reading of the
+.B {CONFFILE}
+default config file. Files in that directory
+are read in lexical order.
+
+
+.SS {CONFFILE2}
+
+Alternative config file that is read, when
+.I mdadm
+is running without --config option and the
+.B {CONFFILE}
+default config file was not opened successfully.
+
+.SS {CONFFILE2}.d
+
+The alternative directory with config files. Used when
+.I mdadm
+is runninng without --config option, after reading the
+.B {CONFFILE2}
+alternative config file whether it was successful or not. Files in
+that directory are read in lexical order.
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
--
2.38.1

@ -0,0 +1,34 @@
From e44d13f466e30c018887cd5aaf1212ed9f510813 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:10 +0100
Subject: [PATCH 09/41] Grow: remove dead condition in Grow_reshape()
Remove dead "if" condition from Grow_reshape(). Sysfs read check is
performed earlier in the code.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Grow.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/Grow.c b/Grow.c
index 5498e54f..c69a342d 100644
--- a/Grow.c
+++ b/Grow.c
@@ -2098,11 +2098,7 @@ int Grow_reshape(char *devname, int fd,
/* got truncated to 32bit, write to
* component_size instead
*/
- if (sra)
- rv = sysfs_set_num(sra, NULL,
- "component_size", s->size);
- else
- rv = -1;
+ rv = sysfs_set_num(sra, NULL, "component_size", s->size);
} else {
rv = md_set_array_info(fd, &array);
--
2.40.1

@ -1,45 +0,0 @@
From c33bbda5b0e127bb161fd4ad44bcfaa2a5daf153 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 18 Mar 2022 09:26:07 +0100
Subject: [PATCH 09/83] mdadm: Update config manual
Add missing HOMECLUSTER keyword description.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.conf.5.in | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in
index dd331a6a..cd4e6a9d 100644
--- a/mdadm.conf.5.in
+++ b/mdadm.conf.5.in
@@ -439,6 +439,23 @@ from any possible local name. e.g.
.B /dev/md/1_1
or
.BR /dev/md/home_0 .
+
+.TP
+.B HOMECLUSTER
+The
+.B homcluster
+line gives a default value for the
+.B \-\-homecluster=
+option to mdadm. It specifies the cluster name for the md device.
+The md device can be assembled only on the cluster which matches
+the name specified. If
+.B homcluster
+is not provided, mdadm tries to detect the cluster name automatically.
+
+There should only be one
+.B homecluster
+line. Any subsequent lines will be silently ignored.
+
.TP
.B AUTO
A list of names of metadata format can be given, each preceded by a
--
2.38.1

@ -1,153 +0,0 @@
From 913f07d1db4a0078acc26d6ccabe1c315cf9273c Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 20 Jan 2022 13:18:32 +0100
Subject: [PATCH 10/83] Create, Build: use default_layout()
This code is duplicated for Build mode so make default_layout() extern
and use it. Simplify the function structure.
It introduced change for Build mode, now for raid0 RAID0_ORIG_LAYOUT
will be returned same as for Create.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Build.c | 23 +------------------
Create.c | 67 ++++++++++++++++++++++++++++++++++----------------------
mdadm.h | 1 +
3 files changed, 43 insertions(+), 48 deletions(-)
diff --git a/Build.c b/Build.c
index 962c2e37..8d6f6f58 100644
--- a/Build.c
+++ b/Build.c
@@ -71,28 +71,7 @@ int Build(char *mddev, struct mddev_dev *devlist,
}
if (s->layout == UnSet)
- switch(s->level) {
- default: /* no layout */
- s->layout = 0;
- break;
- case 10:
- s->layout = 0x102; /* near=2, far=1 */
- if (c->verbose > 0)
- pr_err("layout defaults to n1\n");
- break;
- case 5:
- case 6:
- s->layout = map_name(r5layout, "default");
- if (c->verbose > 0)
- pr_err("layout defaults to %s\n", map_num(r5layout, s->layout));
- break;
- case LEVEL_FAULTY:
- s->layout = map_name(faultylayout, "default");
-
- if (c->verbose > 0)
- pr_err("layout defaults to %s\n", map_num(faultylayout, s->layout));
- break;
- }
+ s->layout = default_layout(NULL, s->level, c->verbose);
/* We need to create the device. It can have no name. */
map_lock(&map);
diff --git a/Create.c b/Create.c
index 0ff1922d..9ea19de0 100644
--- a/Create.c
+++ b/Create.c
@@ -39,39 +39,54 @@ static int round_size_and_verify(unsigned long long *size, int chunk)
return 0;
}
-static int default_layout(struct supertype *st, int level, int verbose)
+/**
+ * default_layout() - Get default layout for level.
+ * @st: metadata requested, could be NULL.
+ * @level: raid level requested.
+ * @verbose: verbose level.
+ *
+ * Try to ask metadata handler first, otherwise use global defaults.
+ *
+ * Return: Layout or &UnSet, return value meaning depends of level used.
+ */
+int default_layout(struct supertype *st, int level, int verbose)
{
int layout = UnSet;
+ mapping_t *layout_map = NULL;
+ char *layout_name = NULL;
if (st && st->ss->default_geometry)
st->ss->default_geometry(st, &level, &layout, NULL);
- if (layout == UnSet)
- switch(level) {
- default: /* no layout */
- layout = 0;
- break;
- case 0:
- layout = RAID0_ORIG_LAYOUT;
- break;
- case 10:
- layout = 0x102; /* near=2, far=1 */
- if (verbose > 0)
- pr_err("layout defaults to n2\n");
- break;
- case 5:
- case 6:
- layout = map_name(r5layout, "default");
- if (verbose > 0)
- pr_err("layout defaults to %s\n", map_num(r5layout, layout));
- break;
- case LEVEL_FAULTY:
- layout = map_name(faultylayout, "default");
+ if (layout != UnSet)
+ return layout;
- if (verbose > 0)
- pr_err("layout defaults to %s\n", map_num(faultylayout, layout));
- break;
- }
+ switch (level) {
+ default: /* no layout */
+ layout = 0;
+ break;
+ case 0:
+ layout = RAID0_ORIG_LAYOUT;
+ break;
+ case 10:
+ layout = 0x102; /* near=2, far=1 */
+ layout_name = "n2";
+ break;
+ case 5:
+ case 6:
+ layout_map = r5layout;
+ break;
+ case LEVEL_FAULTY:
+ layout_map = faultylayout;
+ break;
+ }
+
+ if (layout_map) {
+ layout = map_name(layout_map, "default");
+ layout_name = map_num(layout_map, layout);
+ }
+ if (layout_name && verbose > 0)
+ pr_err("layout defaults to %s\n", layout_name);
return layout;
}
diff --git a/mdadm.h b/mdadm.h
index 26e7e5cd..cd72e711 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1512,6 +1512,7 @@ extern int get_linux_version(void);
extern int mdadm_version(char *version);
extern unsigned long long parse_size(char *size);
extern int parse_uuid(char *str, int uuid[4]);
+int default_layout(struct supertype *st, int level, int verbose);
extern int is_near_layout_10(int layout);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
--
2.38.1

@ -0,0 +1,34 @@
From 7ccf947eb595c1bb729c32ba18ce171dada76a68 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:11 +0100
Subject: [PATCH 10/41] super1: check fd before passing to get_dev_size() in
add_to_super1()
Check if file descriptor is valid before passing it to get_dev_size() in
add_to_super().
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
super1.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/super1.c b/super1.c
index 871d19f0..5439b7bb 100644
--- a/super1.c
+++ b/super1.c
@@ -1752,7 +1752,10 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
di->devname = devname;
di->disk = *dk;
di->data_offset = data_offset;
- get_dev_size(fd, NULL, &di->dev_size);
+
+ if (is_fd_valid(fd))
+ get_dev_size(fd, NULL, &di->dev_size);
+
di->next = NULL;
*dip = di;
--
2.40.1

@ -1,382 +0,0 @@
From 5f21d67472ad08c1e96b4385254adba79aa1c467 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 20 Jan 2022 13:18:33 +0100
Subject: [PATCH 11/83] mdadm: add map_num_s()
map_num() returns NULL if key is not defined. This patch adds
alternative, non NULL version for cases where NULL is not expected.
There are many printf() calls where map_num() is called on variable
without NULL verification. It works, even if NULL is passed because
gcc is able to ignore NULL argument quietly but the behavior is
undefined. For safety reasons such usages will use map_num_s() now.
It is a potential point of regression.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 6 ++----
Create.c | 2 +-
Detail.c | 4 ++--
Grow.c | 16 ++++++++--------
Query.c | 4 ++--
maps.c | 24 ++++++++++++++++++++++++
mdadm.c | 20 ++++++++++----------
mdadm.h | 2 +-
super-ddf.c | 6 +++---
super-intel.c | 2 +-
super0.c | 2 +-
super1.c | 2 +-
sysfs.c | 9 +++++----
13 files changed, 61 insertions(+), 38 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 704b8293..9eac9ce0 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -63,7 +63,7 @@ static void set_array_assembly_status(struct context *c,
struct assembly_array_info *arr)
{
int raid_disks = arr->preexist_cnt + arr->new_cnt;
- char *status_msg = map_num(assemble_statuses, status);
+ char *status_msg = map_num_s(assemble_statuses, status);
if (c->export && result)
*result |= status;
@@ -77,9 +77,7 @@ static void set_array_assembly_status(struct context *c,
fprintf(stderr, " (%d new)", arr->new_cnt);
if (arr->exp_cnt)
fprintf(stderr, " ( + %d for expansion)", arr->exp_cnt);
- if (status_msg)
- fprintf(stderr, " %s", status_msg);
- fprintf(stderr, ".\n");
+ fprintf(stderr, " %s.\n", status_msg);
}
static int name_matches(char *found, char *required, char *homehost, int require_homehost)
diff --git a/Create.c b/Create.c
index 9ea19de0..c84c1ac8 100644
--- a/Create.c
+++ b/Create.c
@@ -83,7 +83,7 @@ int default_layout(struct supertype *st, int level, int verbose)
if (layout_map) {
layout = map_name(layout_map, "default");
- layout_name = map_num(layout_map, layout);
+ layout_name = map_num_s(layout_map, layout);
}
if (layout_name && verbose > 0)
pr_err("layout defaults to %s\n", layout_name);
diff --git a/Detail.c b/Detail.c
index 95d4cc70..ce7a8445 100644
--- a/Detail.c
+++ b/Detail.c
@@ -495,8 +495,8 @@ int Detail(char *dev, struct context *c)
if (array.state & (1 << MD_SB_CLEAN)) {
if ((array.level == 0) ||
(array.level == LEVEL_LINEAR))
- arrayst = map_num(sysfs_array_states,
- sra->array_state);
+ arrayst = map_num_s(sysfs_array_states,
+ sra->array_state);
else
arrayst = "clean";
} else {
diff --git a/Grow.c b/Grow.c
index 18c5719b..8a242b0f 100644
--- a/Grow.c
+++ b/Grow.c
@@ -547,7 +547,7 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
s->consistency_policy != CONSISTENCY_POLICY_PPL) {
pr_err("Operation not supported for consistency policy %s\n",
- map_num(consistency_policies, s->consistency_policy));
+ map_num_s(consistency_policies, s->consistency_policy));
return 1;
}
@@ -578,14 +578,14 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
if (sra->consistency_policy == (unsigned)s->consistency_policy) {
pr_err("Consistency policy is already %s\n",
- map_num(consistency_policies, s->consistency_policy));
+ map_num_s(consistency_policies, s->consistency_policy));
ret = 1;
goto free_info;
} else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
pr_err("Current consistency policy is %s, cannot change to %s\n",
- map_num(consistency_policies, sra->consistency_policy),
- map_num(consistency_policies, s->consistency_policy));
+ map_num_s(consistency_policies, sra->consistency_policy),
+ map_num_s(consistency_policies, s->consistency_policy));
ret = 1;
goto free_info;
}
@@ -704,8 +704,8 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
}
ret = sysfs_set_str(sra, NULL, "consistency_policy",
- map_num(consistency_policies,
- s->consistency_policy));
+ map_num_s(consistency_policies,
+ s->consistency_policy));
if (ret)
pr_err("Failed to change array consistency policy\n");
@@ -2241,7 +2241,7 @@ size_change_error:
info.new_layout = UnSet;
if (info.array.level == 6 && info.new_level == UnSet) {
char l[40], *h;
- strcpy(l, map_num(r6layout, info.array.layout));
+ strcpy(l, map_num_s(r6layout, info.array.layout));
h = strrchr(l, '-');
if (h && strcmp(h, "-6") == 0) {
*h = 0;
@@ -2266,7 +2266,7 @@ size_change_error:
info.new_layout = info.array.layout;
else if (info.array.level == 5 && info.new_level == 6) {
char l[40];
- strcpy(l, map_num(r5layout, info.array.layout));
+ strcpy(l, map_num_s(r5layout, info.array.layout));
strcat(l, "-6");
info.new_layout = map_name(r6layout, l);
} else {
diff --git a/Query.c b/Query.c
index 23fbf8aa..adcd231e 100644
--- a/Query.c
+++ b/Query.c
@@ -93,7 +93,7 @@ int Query(char *dev)
else {
printf("%s: %s %s %d devices, %d spare%s. Use mdadm --detail for more detail.\n",
dev, human_size_brief(larray_size,IEC),
- map_num(pers, level), raid_disks,
+ map_num_s(pers, level), raid_disks,
spare_disks, spare_disks == 1 ? "" : "s");
}
st = guess_super(fd);
@@ -131,7 +131,7 @@ int Query(char *dev)
dev,
info.disk.number, info.array.raid_disks,
activity,
- map_num(pers, info.array.level),
+ map_num_s(pers, info.array.level),
mddev);
if (st->ss == &super0)
put_md_name(mddev);
diff --git a/maps.c b/maps.c
index a4fd2797..20fcf719 100644
--- a/maps.c
+++ b/maps.c
@@ -166,6 +166,30 @@ mapping_t sysfs_array_states[] = {
{ NULL, ARRAY_UNKNOWN_STATE }
};
+/**
+ * map_num_s() - Safer alternative of map_num() function.
+ * @map: map to search.
+ * @num: key to match.
+ *
+ * Shall be used only if key existence is quaranted.
+ *
+ * Return: Pointer to name of the element.
+ */
+char *map_num_s(mapping_t *map, int num)
+{
+ char *ret = map_num(map, num);
+
+ assert(ret);
+ return ret;
+}
+
+/**
+ * map_num() - get element name by key.
+ * @map: map to search.
+ * @num: key to match.
+ *
+ * Return: Pointer to name of the element or NULL.
+ */
char *map_num(mapping_t *map, int num)
{
while (map->name) {
diff --git a/mdadm.c b/mdadm.c
index 26299b2e..be40686c 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -280,8 +280,8 @@ int main(int argc, char *argv[])
else
fprintf(stderr, "-%c", opt);
fprintf(stderr, " would set mdadm mode to \"%s\", but it is already set to \"%s\".\n",
- map_num(modes, newmode),
- map_num(modes, mode));
+ map_num_s(modes, newmode),
+ map_num_s(modes, mode));
exit(2);
} else if (!mode && newmode) {
mode = newmode;
@@ -544,7 +544,7 @@ int main(int argc, char *argv[])
switch(s.level) {
default:
pr_err("layout not meaningful for %s arrays.\n",
- map_num(pers, s.level));
+ map_num_s(pers, s.level));
exit(2);
case UnSet:
pr_err("raid level must be given before layout.\n");
@@ -1248,10 +1248,10 @@ int main(int argc, char *argv[])
if (option_index > 0)
pr_err(":option --%s not valid in %s mode\n",
long_options[option_index].name,
- map_num(modes, mode));
+ map_num_s(modes, mode));
else
pr_err("option -%c not valid in %s mode\n",
- opt, map_num(modes, mode));
+ opt, map_num_s(modes, mode));
exit(2);
}
@@ -1276,7 +1276,7 @@ int main(int argc, char *argv[])
if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN &&
s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
pr_err("--write-journal is not supported with consistency policy: %s\n",
- map_num(consistency_policies, s.consistency_policy));
+ map_num_s(consistency_policies, s.consistency_policy));
exit(2);
}
}
@@ -1285,12 +1285,12 @@ int main(int argc, char *argv[])
s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
if (s.level <= 0) {
pr_err("--consistency-policy not meaningful with level %s.\n",
- map_num(pers, s.level));
+ map_num_s(pers, s.level));
exit(2);
} else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
!s.journaldisks) {
pr_err("--write-journal is required for consistency policy: %s\n",
- map_num(consistency_policies, s.consistency_policy));
+ map_num_s(consistency_policies, s.consistency_policy));
exit(2);
} else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
s.level != 5) {
@@ -1300,14 +1300,14 @@ int main(int argc, char *argv[])
(!s.bitmap_file ||
strcmp(s.bitmap_file, "none") == 0)) {
pr_err("--bitmap is required for consistency policy: %s\n",
- map_num(consistency_policies, s.consistency_policy));
+ map_num_s(consistency_policies, s.consistency_policy));
exit(2);
} else if (s.bitmap_file &&
strcmp(s.bitmap_file, "none") != 0 &&
s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
pr_err("--bitmap is not compatible with consistency policy: %s\n",
- map_num(consistency_policies, s.consistency_policy));
+ map_num_s(consistency_policies, s.consistency_policy));
exit(2);
}
}
diff --git a/mdadm.h b/mdadm.h
index cd72e711..09915a00 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -770,7 +770,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
#endif
#define SYSLOG_FACILITY LOG_DAEMON
-
+extern char *map_num_s(mapping_t *map, int num);
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
extern mapping_t r0layout[], r5layout[], r6layout[],
diff --git a/super-ddf.c b/super-ddf.c
index 3f304cdc..8cda23a7 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1477,13 +1477,13 @@ static void examine_vds(struct ddf_super *sb)
printf("\n");
printf(" unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
printf(" state[%d] : %s, %s%s\n", i,
- map_num(ddf_state, ve->state & 7),
+ map_num_s(ddf_state, ve->state & 7),
(ve->state & DDF_state_morphing) ? "Morphing, ": "",
(ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
printf(" init state[%d] : %s\n", i,
- map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
+ map_num_s(ddf_init_state, ve->init_state & DDF_initstate_mask));
printf(" access[%d] : %s\n", i,
- map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
+ map_num_s(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
printf(" Name[%d] : %.16s\n", i, ve->name);
examine_vd(i, sb, ve->guid);
}
diff --git a/super-intel.c b/super-intel.c
index 6ff336ee..ba3bd41f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -5625,7 +5625,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
free(dev);
free(dv);
pr_err("imsm does not support consistency policy %s\n",
- map_num(consistency_policies, s->consistency_policy));
+ map_num_s(consistency_policies, s->consistency_policy));
return 0;
}
diff --git a/super0.c b/super0.c
index b79b97a9..61c9ec1d 100644
--- a/super0.c
+++ b/super0.c
@@ -288,7 +288,7 @@ static void export_examine_super0(struct supertype *st)
{
mdp_super_t *sb = st->sb;
- printf("MD_LEVEL=%s\n", map_num(pers, sb->level));
+ printf("MD_LEVEL=%s\n", map_num_s(pers, sb->level));
printf("MD_DEVICES=%d\n", sb->raid_disks);
if (sb->minor_version >= 90)
printf("MD_UUID=%08x:%08x:%08x:%08x\n",
diff --git a/super1.c b/super1.c
index a12a5bc8..e3e2f954 100644
--- a/super1.c
+++ b/super1.c
@@ -671,7 +671,7 @@ static void export_examine_super1(struct supertype *st)
int len = 32;
int layout;
- printf("MD_LEVEL=%s\n", map_num(pers, __le32_to_cpu(sb->level)));
+ printf("MD_LEVEL=%s\n", map_num_s(pers, __le32_to_cpu(sb->level)));
printf("MD_DEVICES=%d\n", __le32_to_cpu(sb->raid_disks));
for (i = 0; i < 32; i++)
if (sb->set_name[i] == '\n' || sb->set_name[i] == '\0') {
diff --git a/sysfs.c b/sysfs.c
index 2995713d..0d98a65f 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -689,7 +689,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
if (info->array.level < 0)
return 0; /* FIXME */
rv |= sysfs_set_str(info, NULL, "level",
- map_num(pers, info->array.level));
+ map_num_s(pers, info->array.level));
if (info->reshape_active && info->delta_disks != UnSet)
raid_disks -= info->delta_disks;
rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
@@ -724,9 +724,10 @@ int sysfs_set_array(struct mdinfo *info, int vers)
}
if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
- if (sysfs_set_str(info, NULL, "consistency_policy",
- map_num(consistency_policies,
- info->consistency_policy))) {
+ char *policy = map_num_s(consistency_policies,
+ info->consistency_policy);
+
+ if (sysfs_set_str(info, NULL, "consistency_policy", policy)) {
pr_err("This kernel does not support PPL. Falling back to consistency-policy=resync.\n");
info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
}
--
2.38.1

@ -0,0 +1,66 @@
From c8772da4b53307546a9a374507bcec3398fc82c4 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 11:56:12 +0100
Subject: [PATCH 11/41] mdmon: refactor md device name check in main()
Refactor mdmon main function to verify if fd is valid prior to checking
device name. This is due to static code analysis complaining after
change b938519e7719 ("util: remove obsolete code from get_md_name").
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdmon.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/mdmon.c b/mdmon.c
index a2038fe6..5fdb5cdb 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -302,12 +302,12 @@ static int mdmon(char *devnm, int must_fork, int takeover);
int main(int argc, char *argv[])
{
char *container_name = NULL;
- char *devnm = NULL;
int status = 0;
int opt;
int all = 0;
int takeover = 0;
int dofork = 1;
+ int mdfd = -1;
bool help = false;
static struct option options[] = {
{"all", 0, NULL, 'a'},
@@ -410,19 +410,20 @@ int main(int argc, char *argv[])
free_mdstat(mdstat);
return status;
- } else {
- int mdfd = open_mddev(container_name, 0);
- devnm = fd2devnm(mdfd);
+ }
+
+ mdfd = open_mddev(container_name, 0);
+ if (is_fd_valid(mdfd)) {
+ char *devnm = fd2devnm(mdfd);
close(mdfd);
- }
- if (!devnm) {
- pr_err("%s is not a valid md device name\n",
- container_name);
- return 1;
+ if (devnm)
+ return mdmon(devnm, dofork && do_fork(), takeover);
}
- return mdmon(devnm, dofork && do_fork(), takeover);
+
+ pr_err("%s is not a valid md device name\n", container_name);
+ return 1;
}
static int mdmon(char *devnm, int must_fork, int takeover)
--
2.40.1

@ -1,69 +0,0 @@
From 52c67fcdd6dadc4138ecad73e65599551804d445 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 15 Feb 2022 21:34:15 +0800
Subject: [PATCH 012/125] mdadm/systemd: remove KillMode=none from service file
For mdadm's systemd configuration, current systemd KillMode is "none" in
following service files,
- mdadm-grow-continue@.service
- mdmon@.service
This "none" mode is strongly againsted by systemd developers (see man 5
systemd.kill for "KillMode=" section), and is considering to remove in
future systemd version.
As systemd developer explained in disuccsion, the systemd kill process
is,
1. send the signal specified by KillSignal= to the list of processes (if
any), TERM is the default
2. wait until either the target of process(es) exit or a timeout expires
3. if the timeout expires send the signal specified by FinalKillSignal=,
KILL is the default
For "control-group", all remaining processes will receive the SIGTERM
signal (by default) and if there are still processes after a period f
time, they will get the SIGKILL signal.
For "mixed", only the main process will receive the SIGTERM signal, and
if there are still processes after a period of time, all remaining
processes (including the main one) will receive the SIGKILL signal.
From the above comment, currently KillMode=control-group is a proper
kill mode. Since control-gropu is the default kill mode, the fix can be
simply removing KillMode=none line from the service file, then the
default mode will take effect.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Benjamin Brunner <bbrunner@suse.com>
Cc: Franck Bui <fbui@suse.de>
Cc: Jes Sorensen <jes@trained-monkey.org>
Cc: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdadm-grow-continue@.service | 1 -
systemd/mdmon@.service | 1 -
2 files changed, 2 deletions(-)
diff --git a/systemd/mdadm-grow-continue@.service b/systemd/mdadm-grow-continue@.service
index 5c667d2a..9fdc8ec7 100644
--- a/systemd/mdadm-grow-continue@.service
+++ b/systemd/mdadm-grow-continue@.service
@@ -14,4 +14,3 @@ ExecStart=BINDIR/mdadm --grow --continue /dev/%I
StandardInput=null
StandardOutput=null
StandardError=null
-KillMode=none
diff --git a/systemd/mdmon@.service b/systemd/mdmon@.service
index 85a3a7c5..77533958 100644
--- a/systemd/mdmon@.service
+++ b/systemd/mdmon@.service
@@ -25,4 +25,3 @@ Type=forking
# it out) and systemd will remove it when transitioning from
# initramfs to rootfs.
#PIDFile=/run/mdadm/%I.pid
-KillMode=none
--
2.38.1

@ -0,0 +1,138 @@
From 4c12714d1ca06533fe7a887966df2558fd2f96b2 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 20 Feb 2024 17:04:44 +0100
Subject: [PATCH 12/41] test: run tests on system level mdadm
The tests run with MDADM_NO_SYSTEMCTL flag by default, however it has
no effect on udev. In case of external metadata, even if flag is set,
udev will trigger systemd to launch mdmon.
This commit changes test execution level, so the tests are run on system
level mdadm, meaning local build must be installed prior to running
tests.
Add warning that the tests are run on system level mdadm and local
build must be installed first.
Do not call mdadm with "quiet" as it makes it not display critical
messages necessary for debug.
Remove forcing speed_limit and add restoring system speed_limit_max
after test execution.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
test | 27 ++++++++++++++++++---------
tests/func.sh | 1 -
2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/test b/test
index 49a36c3b..338c2db4 100755
--- a/test
+++ b/test
@@ -1,11 +1,12 @@
#!/bin/bash
#
# run test suite for mdadm
-mdadm=$PWD/mdadm
+mdadm=`which mdadm`
targetdir="/var/tmp"
logdir="$targetdir"
config=/tmp/mdadm.conf
testdir=$PWD/tests
+system_speed_limit=`cat /proc/sys/dev/raid/speed_limit_max`
devlist=
savelogs=0
@@ -20,9 +21,6 @@ DEVTYPE=loop
INTEGRITY=yes
LVM_VOLGROUP=mdtest
-# make sure to test local mdmon, not system one
-export MDADM_NO_SYSTEMCTL=1
-
# assume md0, md1, md2 exist in /dev
md0=/dev/md0
md1=/dev/md1
@@ -41,7 +39,10 @@ ctrl_c() {
ctrl_c_error=1
}
-# mdadm always adds --quiet, and we want to see any unexpected messages
+restore_system_speed_limit() {
+ echo $system_speed_limit > /proc/sys/dev/raid/speed_limit_max
+}
+
mdadm() {
rm -f $targetdir/stderr
case $* in
@@ -63,10 +64,10 @@ mdadm() {
$mdadm --zero $args > /dev/null
}
done
- $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes
+ $mdadm 2> $targetdir/stderr "$@" --auto=yes
;;
* )
- $mdadm 2> $targetdir/stderr --quiet "$@"
+ $mdadm 2> $targetdir/stderr "$@"
;;
esac
rv=$?
@@ -99,8 +100,6 @@ do_test() {
fi
rm -f $targetdir/stderr
- # this might have been reset: restore the default.
- echo 2000 > /proc/sys/dev/raid/speed_limit_max
do_clean
# source script in a subshell, so it has access to our
# namespace, but cannot change it.
@@ -122,6 +121,7 @@ do_test() {
echo " (KNOWN BROKEN TEST: $_broken_msg)"
fi
fi
+ restore_system_speed_limit
[ "$savelogs" == "1" ] &&
mv -f $targetdir/log $logdir/$_basename.log
[ "$ctrl_c_error" == "1" ] && exit 1
@@ -299,7 +299,15 @@ parse_args() {
done
}
+print_warning() {
+ cat <<-EOF
+ Warning! Tests are performed on system level mdadm!
+ If you want to test local build, you need to install it first!
+ EOF
+}
+
main() {
+ print_warning
do_setup
echo "Testing on linux-$(uname -r) kernel"
@@ -329,6 +337,7 @@ main() {
break
fi
done
+
exit 0
}
diff --git a/tests/func.sh b/tests/func.sh
index 1c1a28a2..b474442b 100644
--- a/tests/func.sh
+++ b/tests/func.sh
@@ -213,7 +213,6 @@ do_setup() {
path1=$dev7
ulimit -c unlimited
[ -f /proc/mdstat ] || modprobe md_mod
- echo 2000 > /proc/sys/dev/raid/speed_limit_max
echo 0 > /sys/module/md_mod/parameters/start_ro
}
--
2.40.1

@ -0,0 +1,43 @@
From 3c3ddeeccc1eb4accb62ce9920de430a564be806 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Wed, 28 Feb 2024 16:37:20 +0100
Subject: [PATCH 13/41] Monitor: Allow no PID in check_one_sharer()
Commit 5fb5479ad100 ("Monitor: open file before check in
check_one_sharer()") introduced a regression that prohibits monitor
from starting if PID file does not exist.
Add check for no PID file.
Add missing fclose().
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Monitor.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/Monitor.c b/Monitor.c
index 7cee95d4..9be2b528 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -453,12 +453,17 @@ static int check_one_sharer(int scan)
fp = fopen(AUTOREBUILD_PID_PATH, "r");
if (!fp) {
+ /* PID file does not exist */
+ if (errno == ENOENT)
+ return 0;
+
pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH);
return 2;
}
if (!is_file(AUTOREBUILD_PID_PATH)) {
pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH);
+ fclose(fp);
return 2;
}
--
2.40.1

@ -1,122 +0,0 @@
From 1066ab83dbe9a4cc20f7db44a40aa2cbb9d5eed6 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 13 May 2022 09:19:42 +0200
Subject: [PATCH 13/83] mdmon: Stop parsing duplicate options
Introduce new function is_duplicate_opt() to check if given option
was already used and prevent setting it again along with an error
message.
Move parsing above in_initrd() check to be able to detect --offroot
option duplicates.
Now help option is executed after parsing to prevent executing commands
like: 'mdmon --help --ndlksnlksajndfjksndafasj'.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdmon.c | 44 +++++++++++++++++++++++++++++++++++---------
1 file changed, 35 insertions(+), 9 deletions(-)
diff --git a/mdmon.c b/mdmon.c
index 5570574b..c057da63 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -288,6 +288,15 @@ void usage(void)
exit(2);
}
+static bool is_duplicate_opt(const int opt, const int set_val, const char *long_name)
+{
+ if (opt == set_val) {
+ pr_err("--%s option duplicated!\n", long_name);
+ return true;
+ }
+ return false;
+}
+
static int mdmon(char *devnm, int must_fork, int takeover);
int main(int argc, char *argv[])
@@ -299,6 +308,7 @@ int main(int argc, char *argv[])
int all = 0;
int takeover = 0;
int dofork = 1;
+ bool help = false;
static struct option options[] = {
{"all", 0, NULL, 'a'},
{"takeover", 0, NULL, 't'},
@@ -308,37 +318,50 @@ int main(int argc, char *argv[])
{NULL, 0, NULL, 0}
};
- if (in_initrd()) {
- /*
- * set first char of argv[0] to @. This is used by
- * systemd to signal that the task was launched from
- * initrd/initramfs and should be preserved during shutdown
- */
- argv[0][0] = '@';
- }
-
while ((opt = getopt_long(argc, argv, "thaF", options, NULL)) != -1) {
switch (opt) {
case 'a':
+ if (is_duplicate_opt(all, 1, "all"))
+ exit(1);
container_name = argv[optind-1];
all = 1;
break;
case 't':
+ if (is_duplicate_opt(takeover, 1, "takeover"))
+ exit(1);
takeover = 1;
break;
case 'F':
+ if (is_duplicate_opt(dofork, 0, "foreground"))
+ exit(1);
dofork = 0;
break;
case OffRootOpt:
+ if (is_duplicate_opt(argv[0][0], '@', "offroot"))
+ exit(1);
argv[0][0] = '@';
break;
case 'h':
+ if (is_duplicate_opt(help, true, "help"))
+ exit(1);
+ help = true;
+ break;
default:
usage();
break;
}
}
+
+ if (in_initrd()) {
+ /*
+ * set first char of argv[0] to @. This is used by
+ * systemd to signal that the task was launched from
+ * initrd/initramfs and should be preserved during shutdown
+ */
+ argv[0][0] = '@';
+ }
+
if (all == 0 && container_name == NULL) {
if (argv[optind])
container_name = argv[optind];
@@ -353,6 +376,9 @@ int main(int argc, char *argv[])
if (strcmp(container_name, "/proc/mdstat") == 0)
all = 1;
+ if (help)
+ usage();
+
if (all) {
struct mdstat_ent *mdstat, *e;
int container_len = strlen(container_name);
--
2.38.1

@ -1,41 +0,0 @@
From 20e114e334ed6ed3280c37a9a08fb95578393d1a Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Thu, 19 May 2022 09:16:08 +0200
Subject: [PATCH 14/83] Grow: block -n on external volumes.
Performing --raid-devices on external metadata volume should be blocked
as it causes unwanted behaviour.
Eg. Performing
mdadm -G /dev/md/volume -l10 -n4
on r0_d2 inside 4 disk container, returns
mdadm: Need 2 spares to avoid degraded array, only have 0.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Grow.c b/Grow.c
index 8a242b0f..f6efbc48 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1892,6 +1892,14 @@ int Grow_reshape(char *devname, int fd,
if (retval) {
pr_err("Cannot read superblock for %s\n", devname);
+ close(cfd);
+ free(subarray);
+ return 1;
+ }
+
+ if (s->raiddisks && subarray) {
+ pr_err("--raid-devices operation can be performed on a container only\n");
+ close(cfd);
free(subarray);
return 1;
}
--
2.38.1

@ -0,0 +1,47 @@
From d1cd231ae41d98b2555dbff08d0c79876b5059fe Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 27 Feb 2024 07:36:39 +0100
Subject: [PATCH 14/41] super-intel: respect IMSM_DEVNAME_AS_SERIAL flag
IMSM_DEVNAME_AS_SERIAL flag was respected only when searching
serial using nvme or scsi device wasn't successful. This
flag shall be applied first, to have user settings with
the highest priority.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
super-intel.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index e61f3f6f..4babec9f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -4174,17 +4174,17 @@ static int imsm_read_serial(int fd, char *devname,
memset(buf, 0, sizeof(buf));
+ if (check_env("IMSM_DEVNAME_AS_SERIAL")) {
+ memset(serial, 0, serial_buf_len);
+ fd2devname(fd, (char *) serial);
+ return 0;
+ }
+
rv = nvme_get_serial(fd, buf, sizeof(buf));
if (rv)
rv = scsi_get_serial(fd, buf, sizeof(buf));
- if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
- memset(serial, 0, MAX_RAID_SERIAL_LEN);
- fd2devname(fd, (char *) serial);
- return 0;
- }
-
if (rv != 0) {
if (devname)
pr_err("Failed to retrieve serial for %s\n",
--
2.40.1

@ -1,90 +0,0 @@
From de064c93e3819d72720e4fba6575265ba10e1553 Mon Sep 17 00:00:00 2001
From: Mateusz Grzonka <mateusz.grzonka@intel.com>
Date: Mon, 13 Jun 2022 12:11:25 +0200
Subject: [PATCH 15/83] Incremental: Fix possible memory and resource leaks
map allocated through map_by_uuid() is not freed if mdfd is invalid.
In addition mdfd is not closed, and mdinfo list is not freed too.
Signed-off-by: Mateusz Grzonka <mateusz.grzonka@intel.com>
Change-Id: I25e726f0e2502cf7e8ce80c2bd7944b3b1e2b9dc
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Incremental.c | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index a57fc323..4d0cd9d6 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -1499,7 +1499,7 @@ static int Incremental_container(struct supertype *st, char *devname,
return 0;
}
for (ra = list ; ra ; ra = ra->next) {
- int mdfd;
+ int mdfd = -1;
char chosen_name[1024];
struct map_ent *mp;
struct mddev_ident *match = NULL;
@@ -1514,6 +1514,12 @@ static int Incremental_container(struct supertype *st, char *devname,
if (mp) {
mdfd = open_dev(mp->devnm);
+ if (!is_fd_valid(mdfd)) {
+ pr_err("failed to open %s: %s.\n",
+ mp->devnm, strerror(errno));
+ rv = 2;
+ goto release;
+ }
if (mp->path)
strcpy(chosen_name, mp->path);
else
@@ -1573,21 +1579,25 @@ static int Incremental_container(struct supertype *st, char *devname,
c->autof,
trustworthy,
chosen_name, 0);
+
+ if (!is_fd_valid(mdfd)) {
+ pr_err("create_mddev failed with chosen name %s: %s.\n",
+ chosen_name, strerror(errno));
+ rv = 2;
+ goto release;
+ }
}
- if (only && (!mp || strcmp(mp->devnm, only) != 0))
- continue;
- if (mdfd < 0) {
- pr_err("failed to open %s: %s.\n",
- chosen_name, strerror(errno));
- return 2;
+ if (only && (!mp || strcmp(mp->devnm, only) != 0)) {
+ close_fd(&mdfd);
+ continue;
}
assemble_container_content(st, mdfd, ra, c,
chosen_name, &result);
map_free(map);
map = NULL;
- close(mdfd);
+ close_fd(&mdfd);
}
if (c->export && result) {
char sep = '=';
@@ -1610,7 +1620,11 @@ static int Incremental_container(struct supertype *st, char *devname,
}
printf("\n");
}
- return 0;
+
+release:
+ map_free(map);
+ sysfs_free(list);
+ return rv;
}
static void run_udisks(char *arg1, char *arg2)
--
2.38.1

@ -0,0 +1,236 @@
From a944da4e1a56cd926e6b21f5aaebc13198265419 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Fri, 23 Feb 2024 15:51:42 +0100
Subject: [PATCH 15/41] mdadm: remove TODO
This file is not updated in 16 years.
No reasons to keep it. Remove it.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
TODO | 213 -----------------------------------------------------------
1 file changed, 213 deletions(-)
delete mode 100644 TODO
diff --git a/TODO b/TODO
deleted file mode 100644
index 279d20db..00000000
--- a/TODO
+++ /dev/null
@@ -1,213 +0,0 @@
- - add 'name' field to metadata type and use it.
- - use validate_geometry more
- - metadata should be able to check/reject bitmap stuff.
-
-DDF:
- Three new metadata types:
- ddf - used only to create a container.
- ddf-bvd - used to create an array in a container
- ddf-svd - used to create a secondary array from bvds.
-
- Usage:
- mdadm -C /dev/ddf1 /dev/sd[abcdef]
- mdadm -C /dev/md1 -e ddf /dev/sd[a-f]
- mdadm -C /dev/md1 -l container /dev/sd[a-f]
-
- Each of these create a new ddf container using all those
- devices. The name 'ddf*' signals that ddf metadata should be used.
- '-e ddf' only supports one level - 'container'. 'container' is only
- supported by ddf.
-
- mdadm -C /dev/md1 -l0 -n4 /dev/ddf1 # or maybe not ???
- mdadm -C /dev/md1 -l1 -n2 /dev/sda /dev/sdb
- If exactly one device is given, and it is a container, we select
- devices from that container.
- If devices are given that are already in use, they must be in use by
- a container, and the array is created in the container.
- If devices given are bvds, we slip under the hood to make
- the svd arrays.
-
- mdadm -A /dev/ddf ......
- base drives make a container. Anything in that container is started
- auto-read-only.
- if /dev/ddf is already assembled, we assemble bvds and svds inside it.
-
-
-2005-dec-20
- Want an incremental assembly mode to work nicely with udev.
- Core usage would be something like
- mdadm --incr-assemble /dev/newdevice
- This would
- - examine the device to determine uuid etc.
- - look for a match in /etc/mdadm.conf, abort if not found
- - find that device and collect current contents
- - perform an 'assemble' analysis to make sure we have the best set of devices.
- - remove or add devices as appropriate
- - possibly start the array if it was complete
-
- Other usages could involve
- - specify which array to auto-add to.
- This requires an existing array for uuid matching... is there any point?
-
- -
-
-
-2004-june-02
- * Don't print 'errors' flag, it is meaningless. DONE
- * Handle new superblock format
- * create device file on demand, particularly partitionable devices. DONE
- BUT figure a way to create the partition devices.
- auto=partN
- * Use Event: interface to listen for events. DONE, untested
- * Make sure mdadm -As can assemble multi-level RAIDs ok.
- * --build to build raid1 or multipath arrays
- clean or not ???
-
-----------------------------------------------------------------------------
-* mdadm --monitor to monitor failed multipath paths and re-instate them.
-
-* Maybe make "--help" fit in 80x24 and have a --long-help with more info. DONE
-
-
-* maybe "missing" instead of <bold>missing</> in doco DONE
-* possibly wait for resync to start, or even finish while assembling.- NO
-
-* -Db should have a devices= entry if possible. - DONE
-* when assembling multipath arrays, ignore any error indicators. - DONE
-* rationalise --monitor usage:
- mdadm --monitor
- doesn't do as expected. DONE
-
-* --assemble could have a --update option. - DONE
- following word can be:
- sparc2.2
- super-minor
-
-* mdadm /dev/md11, where md11 is raid0 can segfault, particularly when looking in the
- [UU_UUU] string ... which doesn't exist !
-It should be more sensible. DONE
-
-Example:
-
-from Raimund Sacherer <raimund.sacherer@ngit.at>
-
-mke2fs -m0 -q /dev/ram1 300
-mount -n -t ext2 /dev/ram1 /tmp
-echo DEVICE /dev/[sh]* >> /tmp/mdadm.conf
-mdadm -Esb /dev/[sh]* 2>/dev/null >> /tmp/mdadm.conf
-mdadm -ARsc /tmp/mdadm.conf
-umount /tmp
-
-
-?? Allow -S /dev/md? - current complains subsequent not a/d/r - DONE
-
-* new "Query" mode to subsume --detail and --examine.
- --query or -Q, takes a device and tells if it is an MD device,
- and also tells in a raid superblock is found.
- DONE
-
-* write mdstat.c to parse /proc/mdstat file
- Build list of arrays: name, rebuild-percent
- DONE
-
-* parse /proc/partitions and map major/minor into /dev/* names,
- and use that for default DEVICE list ????
-
-* --detail --scan to read /proc/mdstat, and then iterate over these,
- but assume --brief. --verbose can override
- check each subdevice to see if it is in conf_get_devs.
- Warn if not.
- DONE, but don't warn yet...
-
-* Support multipath ... maybe...
- maybe DONE
-
-* --follow to syslog
-
-* --follow to move spares around DONE
-
-* --follow to notice other events: DONE
- rebuild started
- spare activated
- spare removed
- spare added
-
-------------------------------------
-- --examine --scan scans all drives and build an mdadm.conf file DONE
-
-- check superblock checksum in examine DONE
-- report "chunk" or "rounding" depending on raid level DONE
-- report "linear" instead of "-1" for raid level DONE
-- decode ayout depending on raid level DONE
-- --verbose and --force flags. DONE
-
-- set md_minor, *_disks for Create - DONE
-- for create raid5, how to choose between
- all working, but not insync
- one missing, one spare, insync DONE (--force)
-- and for raid1 - some failed drives... (missing)
-
-- when RUN_ARRAY, make sure *_disks counts are right
-
-- get --detail to extract extra stuff from superblock,
- like uuid DONE
-- --detail --brief to give a config file line DONE
-- parse config file. DONE
-- test...
-
-- when --assemble --scan, if an underlying device is an md device,
- then try to assemble that device first.
-
-
-- mdadm -S /dev/md0 /dev/md1 gives internal error FIXED
-
-- mdadm --detail --scan print summary of what it can find? DONE
-
-
----------
-Assemble doesn't add spares. - DONE
-Create to allow "missing" name for devices.
-Create to accept "--force" for do exactly what is requested
-- get Assemble to upgrade devices if force flag.
-ARRAY lines in config file to have super_minor=n
-ARRAY lines in config file to have device=pattern, and only accept
- those devices
- If UUID given, insist on that
- If not, but super_minor given, require all found with that minor
- to have same uuid
- If only device given, all valid supers on those devices must have
- same uuid
-allow /dev/mdX as first argument before any options
-Possible --dry-run option for create and assemble--force
-
-Assemble to check that all devices mentioned in superblock
- are present.
-
-New mode: --Monitor (or --Follow)
- Periodically check status of all arrays (listed in config file).
- Log every event and apparent cause - or differences
- Email and alert - or run a program - for important events
- Move spares around if necessary.
-
- An Array line can have a spare-group= field that indicates that
- the array shares spares with other arrays with the same
- spare-group name.
- If an array has a failed and no spares, then check all other
- arrays in the spare group. If one has no failures and a spare,
- then consider that spare.
- Choose the smallest considered spare that is large enough.
- If there is one, then hot-remove it from it's home, and
- hot-add it to the array in question.
-
- --mail-to address
- --alert-handler program
-
- Will also extract information from /proc/mdstat if present,
- and consider 20% marks in rebuild as events.
-
- Events are:
- drive fails - causes mail to be sent
- rebuild started
- spare activated
- spare removed
- spare added
--
2.40.1

@ -1,98 +0,0 @@
From e702f392959d1c2ad2089e595b52235ed97b4e18 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Mon, 6 Jun 2022 12:32:12 +0200
Subject: [PATCH 16/83] Mdmonitor: Fix segfault
Mdadm with "--monitor" parameter requires md device
as an argument to be monitored. If given argument is
not a md device, error shall be returned. Previously
it was not checked and invalid argument caused
segmentation fault. This commit adds checking
that devices passed to mdmonitor are md devices.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 10 +++++++++-
mdadm.h | 1 +
mdopen.c | 17 +++++++++++++++++
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/Monitor.c b/Monitor.c
index c0ab5412..4e5802b5 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -182,6 +182,7 @@ int Monitor(struct mddev_dev *devlist,
continue;
if (strcasecmp(mdlist->devname, "<ignore>") == 0)
continue;
+
st = xcalloc(1, sizeof *st);
if (mdlist->devname[0] == '/')
st->devname = xstrdup(mdlist->devname);
@@ -190,6 +191,8 @@ int Monitor(struct mddev_dev *devlist,
strcpy(strcpy(st->devname, "/dev/md/"),
mdlist->devname);
}
+ if (!is_mddev(mdlist->devname))
+ return 1;
st->next = statelist;
st->devnm[0] = 0;
st->percent = RESYNC_UNKNOWN;
@@ -203,7 +206,12 @@ int Monitor(struct mddev_dev *devlist,
struct mddev_dev *dv;
for (dv = devlist; dv; dv = dv->next) {
- struct state *st = xcalloc(1, sizeof *st);
+ struct state *st;
+
+ if (!is_mddev(dv->devname))
+ return 1;
+
+ st = xcalloc(1, sizeof *st);
mdlist = conf_get_ident(dv->devname);
st->devname = xstrdup(dv->devname);
st->next = statelist;
diff --git a/mdadm.h b/mdadm.h
index 09915a00..d53df169 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1636,6 +1636,7 @@ extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
#define FOREIGN 2
#define METADATA 3
extern int open_mddev(char *dev, int report_errors);
+extern int is_mddev(char *dev);
extern int open_container(int fd);
extern int metadata_container_matches(char *metadata, char *devnm);
extern int metadata_subdev_matches(char *metadata, char *devnm);
diff --git a/mdopen.c b/mdopen.c
index 245be537..d18c9319 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -475,6 +475,23 @@ int open_mddev(char *dev, int report_errors)
return mdfd;
}
+/**
+ * is_mddev() - check that file name passed is an md device.
+ * @dev: file name that has to be checked.
+ * Return: 1 if file passed is an md device, 0 if not.
+ */
+int is_mddev(char *dev)
+{
+ int fd = open_mddev(dev, 1);
+
+ if (fd >= 0) {
+ close(fd);
+ return 1;
+ }
+
+ return 0;
+}
+
char *find_free_devnm(int use_partitions)
{
static char devnm[32];
--
2.38.1

@ -0,0 +1,119 @@
From 84d5e05d6fa6bbe6f4a3bdbdb1165dcc463b5207 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Fri, 23 Feb 2024 15:51:43 +0100
Subject: [PATCH 16/41] mdadm: remove makedist
Archives are generated kernel.org automation, no need to submit
them manually, so remove legacy solution.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
makedist | 96 --------------------------------------------------------
1 file changed, 96 deletions(-)
delete mode 100755 makedist
diff --git a/makedist b/makedist
deleted file mode 100755
index 0c4b39eb..00000000
--- a/makedist
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/bin/sh
-# avoid silly sorting
-export LANG=C
-arg=$1
-target=~/public_html/source/mdadm
-if [ " $arg" = " test" ]
-then
- target=/tmp/mdadm-test
- rm -rf $target
- mkdir -p $target
-fi
-if [ -d $target ]
-then :
-else echo $target is not a directory
- exit 2
-fi
-set `grep '^#define VERSION' ReadMe.c `
-version=`echo $3 | sed -e 's/"//g'`
-grep "^.TH MDADM 8 .. v$version" mdadm.8.in > /dev/null 2>&1 ||
- {
- echo mdadm.8.in does not mention version $version.
- exit 1
- }
-grep "^.TH MDMON 8 .. v$version" mdmon.8 > /dev/null 2>&1 ||
- {
- echo mdmon.8 does not mention version $version.
- exit 1
- }
-rpmv=`echo $version | tr - _`
-grep "^Version: *$rpmv$" mdadm.spec > /dev/null 2>&1 ||
- {
- echo mdadm.spec does not mention version $version.
- exit 1
- }
-if [ -f ANNOUNCE-$version ]
-then :
-else
- echo ANNOUNCE-$version does not exist
- exit 1
-fi
-if grep "^ANNOUNCE-$version\$" inventory
-then :
-else { cat inventory ; echo ANNOUNCE-$version ; } | sort -o inventory
-fi
-
-echo version = $version
-base=mdadm-$rpmv.tar.gz
-if [ " $arg" != " diff" ]
-then
- if [ -f $target/$base ]
- then
- echo $target/$base exists.
- exit 1
- fi
- trap "rm $target/$base; exit" 1 2 3
- git archive --prefix=mdadm-$rpmv/ HEAD | gzip --best > $target/$base
- chmod a+r $target/$base
- ls -l $target/$base
- if tar tzf $target/$base | sed 's,[^/]*/,,' | sort | diff -u inventory -
- then : correct files found
- else echo "Extra files, or inventory is out-of-date"
- rm $target/$base
- exit 1
- fi
- rpmbuild -ta $target/$base || exit 1
- find ~/rpmbuild/RPMS -name "*mdadm-$version-*" \
- -exec cp {} $target/RPM \;
- cp ANNOUNCE-$version $target/ANNOUNCE
- cp ChangeLog $target/ChangeLog
- if [ " $arg" != " test" ]
- then
- echo -n "Confirm signing this release? "
- read a
- if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi
- if zcat $target/$base | gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE
- then
- kup put $target/$base $target/$base.sign \
- /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz
- kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE
- else
- echo signing failed
- exit 1
- fi
- fi
-else
- if [ ! -f $target/$base ]
- then
- echo $target/$base does not exist.
- exit 1
- fi
- ( cd .. ; ln -s mdadm.v2 mdadm-$version ; tar chf - --exclude=.git --exclude="TAGS" --exclude='*,v' --exclude='*~' --exclude='*.o' --exclude mdadm --exclude=mdadm'.[^ch0-9]' --exclude=RCS mdadm-$version ; rm mdadm-$version ) | gzip --best > /var/tmp/mdadm-new.tgz
- mkdir /var/tmp/mdadm-old ; zcat $target/$base | ( cd /var/tmp/mdadm-old ; tar xf - )
- mkdir /var/tmp/mdadm-new ; zcat /var/tmp/mdadm-new.tgz | ( cd /var/tmp/mdadm-new ; tar xf - )
- diff -ru /var/tmp/mdadm-old /var/tmp/mdadm-new
- rm -rf /var/tmp/mdadm-old /var/tmp/mdadm-new /var/tmp/mdadm-new.tgz
-fi
--
2.40.1

@ -1,61 +0,0 @@
From f5ff2988761625b43eb15555993f2797af29f166 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Mon, 6 Jun 2022 12:32:13 +0200
Subject: [PATCH 17/83] Mdmonitor: Improve logging method
Change logging, and as a result, mdmonitor in verbose
mode will report its configuration.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Oleksandr Shchirskyi <oleksandr.shchirskyi@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 4e5802b5..6ca1ebe5 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -136,24 +136,27 @@ int Monitor(struct mddev_dev *devlist,
struct mddev_ident *mdlist;
int delay_for_event = c->delay;
- if (!mailaddr) {
+ if (!mailaddr)
mailaddr = conf_get_mailaddr();
- if (mailaddr && ! c->scan)
- pr_err("Monitor using email address \"%s\" from config file\n",
- mailaddr);
- }
- mailfrom = conf_get_mailfrom();
- if (!alert_cmd) {
+ if (!alert_cmd)
alert_cmd = conf_get_program();
- if (alert_cmd && !c->scan)
- pr_err("Monitor using program \"%s\" from config file\n",
- alert_cmd);
- }
+
+ mailfrom = conf_get_mailfrom();
+
if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
pr_err("No mail address or alert command - not monitoring.\n");
return 1;
}
+
+ if (c->verbose) {
+ pr_err("Monitor is started with delay %ds\n", c->delay);
+ if (mailaddr)
+ pr_err("Monitor using email address %s\n", mailaddr);
+ if (alert_cmd)
+ pr_err("Monitor using program %s\n", alert_cmd);
+ }
+
info.alert_cmd = alert_cmd;
info.mailaddr = mailaddr;
info.mailfrom = mailfrom;
--
2.38.1

@ -0,0 +1,69 @@
From 9cdcc193cec92c624841d5b70f1b96daafdc4314 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Fri, 23 Feb 2024 15:51:44 +0100
Subject: [PATCH 17/41] mdadm: remove mdadm.spec
This file is outdated, distributions have their own specs.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.spec | 47 -----------------------------------------------
1 file changed, 47 deletions(-)
delete mode 100644 mdadm.spec
diff --git a/mdadm.spec b/mdadm.spec
deleted file mode 100644
index 12e7859a..00000000
--- a/mdadm.spec
+++ /dev/null
@@ -1,47 +0,0 @@
-Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
-Name: mdadm
-Version: 4.3
-Release: 1
-Source: https://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
-URL: https://neil.brown.name/blog/mdadm
-License: GPL
-Group: Utilities/System
-BuildRoot: %{_tmppath}/%{name}-root
-Obsoletes: mdctl
-
-%description
-mdadm is a program that can be used to create, manage, and monitor
-Linux MD (Software RAID) devices.
-
-%prep
-%setup -q
-# we want to install in /sbin, not /usr/sbin...
-%define _exec_prefix %{nil}
-
-%build
-# This is a debatable issue. The author of this RPM spec file feels that
-# people who install RPMs (especially given that the default RPM options
-# will strip the binary) are not going to be running gdb against the
-# program.
-make CXFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}"
-
-%install
-make DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} BINDIR=%{_sbindir} install
-install -D -m644 mdadm.conf-example $RPM_BUILD_ROOT/%{_sysconfdir}/mdadm.conf
-
-%clean
-rm -rf $RPM_BUILD_ROOT
-
-%files
-%defattr(-,root,root)
-%doc TODO ChangeLog mdadm.conf-example COPYING
-%{_sbindir}/mdadm
-%{_sbindir}/mdmon
-/usr/lib/udev/rules.d/01-md-raid-creating.rules
-/usr/lib/udev/rules.d/63-md-raid-arrays.rules
-/usr/lib/udev/rules.d/64-md-raid-assembly.rules
-/usr/lib/udev/rules.d/69-md-clustered-confirm-device.rules
-%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf
-%{_mandir}/man*/md*
-
-%changelog
--
2.40.1

@ -1,73 +0,0 @@
From 626bc45396c4959f2c4685c2faa7c4f553f4efdf Mon Sep 17 00:00:00 2001
From: Mateusz Grzonka <mateusz.grzonka@intel.com>
Date: Mon, 13 Jun 2022 11:59:34 +0200
Subject: [PATCH 18/83] Fix possible NULL ptr dereferences and memory leaks
In Assemble there was a NULL check for sra variable,
which effectively didn't stop the execution in every case.
That might have resulted in a NULL pointer dereference.
Also in super-ddf, mu variable was set to NULL for some condition,
and then immidiately dereferenced.
Additionally some memory wasn't freed as well.
Signed-off-by: Mateusz Grzonka <mateusz.grzonka@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 7 ++++++-
super-ddf.c | 9 +++++++--
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9eac9ce0..4b213560 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1982,7 +1982,12 @@ int assemble_container_content(struct supertype *st, int mdfd,
}
sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
- if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) {
+ if (sra == NULL) {
+ pr_err("Failed to read sysfs parameters\n");
+ return 1;
+ }
+
+ if (strcmp(sra->text_version, content->text_version) != 0) {
if (content->array.major_version == -1 &&
content->array.minor_version == -2 &&
c->readonly &&
diff --git a/super-ddf.c b/super-ddf.c
index 8cda23a7..abbc8b09 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -5125,13 +5125,16 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
*/
vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
&n_bvd, &vcl);
- if (vc == NULL)
+ if (vc == NULL) {
+ free(rv);
return NULL;
+ }
mu = xmalloc(sizeof(*mu));
if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
free(mu);
- mu = NULL;
+ free(rv);
+ return NULL;
}
mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
@@ -5161,6 +5164,8 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
pr_err("BUG: can't find disk %d (%d/%d)\n",
di->disk.raid_disk,
di->disk.major, di->disk.minor);
+ free(mu);
+ free(rv);
return NULL;
}
vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
--
2.38.1

@ -0,0 +1,209 @@
From 9282e1169f19676553a82dd49f780285a16e3b9a Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Fri, 23 Feb 2024 15:51:45 +0100
Subject: [PATCH 18/41] mdadm: remove mkinitramfs stuff
This script uses mdadm.static which is known to not be abandoned
(probably not working) from years. Mdadm is integrated with dracut
and mkinitramfs these days.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
README.initramfs | 122 -----------------------------------------------
mkinitramfs | 55 ---------------------
2 files changed, 177 deletions(-)
delete mode 100644 README.initramfs
delete mode 100644 mkinitramfs
diff --git a/README.initramfs b/README.initramfs
deleted file mode 100644
index c5fa6680..00000000
--- a/README.initramfs
+++ /dev/null
@@ -1,122 +0,0 @@
-Assembling md arrays at boot time.
----------------------------------
-December 2005
-
-These notes apply to 2.6 kernels only and, in some cases,
-to 2.6.15 or later.
-
-Md arrays can be assembled at boot time using the 'autodetect' functionality
-which is triggered by storing components of an array in partitions of type
-'fd' - Linux Raid Autodetect.
-They can also be assembled by specifying the component devices in a
-kernel parameter such as
- md=0,/dev/sda,/dev/sdb
-In this case, /dev/md0 will be assembled (because of the 0) from the listed
-devices.
-
-These mechanisms, while useful, do not provide complete functionality
-and are unlikely to be extended. The preferred way to assemble md
-arrays at boot time is using 'mdadm'. To assemble an array which
-contains the root filesystem, mdadm needs to be run before that
-filesystem is mounted, and so needs to be run from an initial-ram-fs.
-It is how this can work that is the primary focus of this document.
-
-It should be noted up front that only the array containing the root
-filesystem should be assembled from the initramfs. Any other arrays
-should be assembled under the control of files on the main filesystem
-as this enhanced flexibility and maintainability.
-
-A minimal initramfs for assembling md arrays can be created using 3
-files and one directory. These are:
-
-/bin Directory
-/bin/mdadm statically linked mdadm binary
-/bin/busybox statically linked busybox binary
-/bin/sh hard link to /bin/busybox
-/init a shell script which call mdadm appropriately.
-
-An example init script is:
-
-==============================================
-#!/bin/sh
-
-echo 'Auto-assembling boot md array'
-mkdir /proc
-mount -t proc proc /proc
-if [ -n "$rootuuid" ]
-then arg=--uuid=$rootuuid
-elif [ -n "$mdminor" ]
-then arg=--super-minor=$mdminor
-else arg=--super-minor=0
-fi
-echo "Using $arg"
-mdadm -Acpartitions $arg --auto=part /dev/mda
-cd /
-mount /dev/mda1 /root || mount /dev/mda /root
-umount /proc
-cd /root
-exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
-=============================================
-
-This could certainly be extended, or merged into a larger init script.
-Though tested and in production use, it is not presented here as
-"The Right Way" to do it, but as a useful example.
-Some key points are:
-
- /proc needs to be mounted so that /proc/partitions can be accessed
- by mdadm, and so that /proc/filesystems can be accessed by mount.
-
- The uuid of the array can be passed in as a kernel parameter
- (rootuuid). As the kernel doesn't use this value, it is made available
- in the environment for /init
-
- If no uuid is given, we default to md0, (--super-minor=0) which is a
- commonly used to store the root filesystem. This may not work in
- all situations.
-
- We assemble the array as a partitionable array (/dev/mda) even if we
- end up using the whole array. There is no cost in using the partitionable
- interface, and in this context it is simpler.
-
- We try mounting both /dev/mda1 and /dev/mda as they are the most like
- part of the array to contain the root filesystem.
-
- The --auto flag is given to mdadm so that it will create /dev/md*
- files automatically. This is needed as /dev will not contain
- and md files, and udev will not create them (as udev only created device
- files after the device exists, and mdadm need the device file to create
- the device). Note that the created md files may not exist in /dev
- of the mounted root filesystem. This needs to be deal with separately
- from mdadm - possibly using udev.
-
- We do not need to create device files for the components which will
- be assembled into /dev/mda. mdadm finds the major/minor numbers from
- /proc/partitions and creates a temporary /dev file if one doesn't already
- exist.
-
-The script "mkinitramfs" which is included with the mdadm distribution
-can be used to create a minimal initramfs. It creates a file called
-'init.cpio.gz' which can be specified as an 'initrd' to lilo or grub
-(or whatever boot loader is being used).
-
-
-
-
-Resume from an md array
------------------------
-
-If you want to make use of the suspend-to-disk/resume functionality in Linux,
-and want to have swap on an md array, you will need to assemble the array
-before resume is possible.
-However, because the array is active in the resumed image, you do not want
-anything written to any drives during the resume process, such as superblock
-updates or array resync.
-
-This can be achieved in 2.6.15-rc1 and later kernels using the
-'start_readonly' module parameter.
-Simply include the command
- echo 1 > /sys/module/md_mod/parameters/start_ro
-before assembling the array with 'mdadm'.
-You can then echo
- 9:0
-or whatever is appropriate to /sys/power/resume to trigger the resume.
diff --git a/mkinitramfs b/mkinitramfs
deleted file mode 100644
index c6275ddb..00000000
--- a/mkinitramfs
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/sh
-
-# make sure we are being run in the right directory...
-if [ -f mkinitramfs ]
-then :
-else
- echo >&2 mkinitramfs must be run from the mdadm source directory.
- exit 1
-fi
-if [ -f /bin/busybox ]
-then : good, it exists
- case `file /bin/busybox` in
- *statically* ) : good ;;
- * ) echo >&2 mkinitramfs: /bin/busybox is not statically linked: cannot proceed.
- exit 1
- esac
-else
- echo >&2 "mkinitramfs: /bin/busybox doesn't exist - please install it statically linked."
- exit 1
-fi
-
-rm -rf initramfs
-mkdir initramfs
-mkdir initramfs/bin
-make mdadm.static
-cp mdadm.static initramfs/bin/mdadm
-cp /bin/busybox initramfs/bin/busybox
-ln initramfs/bin/busybox initramfs/bin/sh
-cat <<- END > initramfs/init
- #!/bin/sh
-
- echo 'Auto-assembling boot md array'
- mkdir /proc
- mount -t proc proc /proc
- if [ -n "$rootuuid" ]
- then arg=--uuid=$rootuuid
- elif [ -n "$mdminor" ]
- then arg=--super-minor=$mdminor
- else arg=--super-minor=0
- fi
- echo "Using $arg"
- mdadm -Acpartitions $arg --auto=part /dev/mda
- cd /
- mount /dev/mda1 /root || mount /dev/mda /root
- umount /proc
- cd /root
- exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
-END
-chmod +x initramfs/init
-
-(cd initramfs
- find init bin | cpio -o -H newc | gzip --best
-) > init.cpio.gz
-rm -rf initramfs
-ls -l init.cpio.gz
--
2.40.1

@ -1,301 +0,0 @@
From 756a15f32338fdf0c562678694bc8991ad6afb90 Mon Sep 17 00:00:00 2001
From: Mateusz Grzonka <mateusz.grzonka@intel.com>
Date: Mon, 13 Jun 2022 12:00:09 +0200
Subject: [PATCH 19/83] imsm: Remove possibility for get_imsm_dev to return
NULL
Returning NULL from get_imsm_dev or __get_imsm_dev will cause segfault.
Guarantee that it never happens.
Signed-off-by: Mateusz Grzonka <mateusz.grzonka@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 153 +++++++++++++++++++++++++-------------------------
1 file changed, 78 insertions(+), 75 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index ba3bd41f..3788feb9 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -851,6 +851,21 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
return inf;
}
+/**
+ * __get_imsm_dev() - Get device with index from imsm_super.
+ * @mpb: &imsm_super pointer, not NULL.
+ * @index: Device index.
+ *
+ * Function works as non-NULL, aborting in such a case,
+ * when NULL would be returned.
+ *
+ * Device index should be in range 0 up to num_raid_devs.
+ * Function assumes the index was already verified.
+ * Index must be valid, otherwise abort() is called.
+ *
+ * Return: Pointer to corresponding imsm_dev.
+ *
+ */
static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
{
int offset;
@@ -858,30 +873,47 @@ static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
void *_mpb = mpb;
if (index >= mpb->num_raid_devs)
- return NULL;
+ goto error;
/* devices start after all disks */
offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
- for (i = 0; i <= index; i++)
+ for (i = 0; i <= index; i++, offset += sizeof_imsm_dev(_mpb + offset, 0))
if (i == index)
return _mpb + offset;
- else
- offset += sizeof_imsm_dev(_mpb + offset, 0);
-
- return NULL;
+error:
+ pr_err("cannot find imsm_dev with index %u in imsm_super\n", index);
+ abort();
}
+/**
+ * get_imsm_dev() - Get device with index from intel_super.
+ * @super: &intel_super pointer, not NULL.
+ * @index: Device index.
+ *
+ * Function works as non-NULL, aborting in such a case,
+ * when NULL would be returned.
+ *
+ * Device index should be in range 0 up to num_raid_devs.
+ * Function assumes the index was already verified.
+ * Index must be valid, otherwise abort() is called.
+ *
+ * Return: Pointer to corresponding imsm_dev.
+ *
+ */
static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
{
struct intel_dev *dv;
if (index >= super->anchor->num_raid_devs)
- return NULL;
+ goto error;
+
for (dv = super->devlist; dv; dv = dv->next)
if (dv->index == index)
return dv->dev;
- return NULL;
+error:
+ pr_err("cannot find imsm_dev with index %u in intel_super\n", index);
+ abort();
}
static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
@@ -4364,8 +4396,7 @@ int check_mpb_migr_compatibility(struct intel_super *super)
for (i = 0; i < super->anchor->num_raid_devs; i++) {
struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
- if (dev_iter &&
- dev_iter->vol.migr_state == 1 &&
+ if (dev_iter->vol.migr_state == 1 &&
dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
/* This device is migrating */
map0 = get_imsm_map(dev_iter, MAP_0);
@@ -4514,8 +4545,6 @@ static void clear_hi(struct intel_super *super)
}
for (i = 0; i < mpb->num_raid_devs; ++i) {
struct imsm_dev *dev = get_imsm_dev(super, i);
- if (!dev)
- return;
for (n = 0; n < 2; ++n) {
struct imsm_map *map = get_imsm_map(dev, n);
if (!map)
@@ -5836,7 +5865,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
_disk = __get_imsm_disk(mpb, dl->index);
- if (!_dev || !_disk) {
+ if (!_disk) {
pr_err("BUG mpb setup error\n");
return 1;
}
@@ -6171,10 +6200,10 @@ static int write_super_imsm(struct supertype *st, int doclose)
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
struct imsm_dev *dev2 = get_imsm_dev(super, i);
- if (dev && dev2) {
- imsm_copy_dev(dev, dev2);
- mpb_size += sizeof_imsm_dev(dev, 0);
- }
+
+ imsm_copy_dev(dev, dev2);
+ mpb_size += sizeof_imsm_dev(dev, 0);
+
if (is_gen_migration(dev2))
clear_migration_record = 0;
}
@@ -9033,29 +9062,26 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
__u8 state;
dev2 = get_imsm_dev(cont->sb, dev_idx);
- if (dev2) {
- state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
- if (state == IMSM_T_STATE_FAILED) {
- map = get_imsm_map(dev2, MAP_0);
- if (!map)
- return 1;
- for (slot = 0; slot < map->num_members; slot++) {
- /*
- * Check if failed disks are deleted from intel
- * disk list or are marked to be deleted
- */
- idx = get_imsm_disk_idx(dev2, slot, MAP_X);
- idisk = get_imsm_dl_disk(cont->sb, idx);
- /*
- * Do not rebuild the array if failed disks
- * from failed sub-array are not removed from
- * container.
- */
- if (idisk &&
- is_failed(&idisk->disk) &&
- (idisk->action != DISK_REMOVE))
- return 0;
- }
+
+ state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
+ if (state == IMSM_T_STATE_FAILED) {
+ map = get_imsm_map(dev2, MAP_0);
+ for (slot = 0; slot < map->num_members; slot++) {
+ /*
+ * Check if failed disks are deleted from intel
+ * disk list or are marked to be deleted
+ */
+ idx = get_imsm_disk_idx(dev2, slot, MAP_X);
+ idisk = get_imsm_dl_disk(cont->sb, idx);
+ /*
+ * Do not rebuild the array if failed disks
+ * from failed sub-array are not removed from
+ * container.
+ */
+ if (idisk &&
+ is_failed(&idisk->disk) &&
+ (idisk->action != DISK_REMOVE))
+ return 0;
}
}
return 1;
@@ -10089,7 +10115,6 @@ static void imsm_process_update(struct supertype *st,
int victim = u->dev_idx;
struct active_array *a;
struct intel_dev **dp;
- struct imsm_dev *dev;
/* sanity check that we are not affecting the uuid of
* active arrays, or deleting an active array
@@ -10105,8 +10130,7 @@ static void imsm_process_update(struct supertype *st,
* is active in the container, so checking
* mpb->num_raid_devs is just extra paranoia
*/
- dev = get_imsm_dev(super, victim);
- if (a || !dev || mpb->num_raid_devs == 1) {
+ if (a || mpb->num_raid_devs == 1 || victim >= super->anchor->num_raid_devs) {
dprintf("failed to delete subarray-%d\n", victim);
break;
}
@@ -10140,7 +10164,7 @@ static void imsm_process_update(struct supertype *st,
if (a->info.container_member == target)
break;
dev = get_imsm_dev(super, u->dev_idx);
- if (a || !dev || !check_name(super, name, 1)) {
+ if (a || !check_name(super, name, 1)) {
dprintf("failed to rename subarray-%d\n", target);
break;
}
@@ -10169,10 +10193,6 @@ static void imsm_process_update(struct supertype *st,
struct imsm_update_rwh_policy *u = (void *)update->buf;
int target = u->dev_idx;
struct imsm_dev *dev = get_imsm_dev(super, target);
- if (!dev) {
- dprintf("could not find subarray-%d\n", target);
- break;
- }
if (dev->rwh_policy != u->new_policy) {
dev->rwh_policy = u->new_policy;
@@ -11397,8 +11417,10 @@ static int imsm_create_metadata_update_for_migration(
{
struct intel_super *super = st->sb;
int update_memory_size;
+ int current_chunk_size;
struct imsm_update_reshape_migration *u;
- struct imsm_dev *dev;
+ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
int previous_level = -1;
dprintf("(enter) New Level = %i\n", geo->level);
@@ -11415,23 +11437,15 @@ static int imsm_create_metadata_update_for_migration(
u->new_disks[0] = -1;
u->new_chunksize = -1;
- dev = get_imsm_dev(super, u->subdev);
- if (dev) {
- struct imsm_map *map;
+ current_chunk_size = __le16_to_cpu(map->blocks_per_strip) / 2;
- map = get_imsm_map(dev, MAP_0);
- if (map) {
- int current_chunk_size =
- __le16_to_cpu(map->blocks_per_strip) / 2;
-
- if (geo->chunksize != current_chunk_size) {
- u->new_chunksize = geo->chunksize / 1024;
- dprintf("imsm: chunk size change from %i to %i\n",
- current_chunk_size, u->new_chunksize);
- }
- previous_level = map->raid_level;
- }
+ if (geo->chunksize != current_chunk_size) {
+ u->new_chunksize = geo->chunksize / 1024;
+ dprintf("imsm: chunk size change from %i to %i\n",
+ current_chunk_size, u->new_chunksize);
}
+ previous_level = map->raid_level;
+
if (geo->level == 5 && previous_level == 0) {
struct mdinfo *spares = NULL;
@@ -12519,9 +12533,6 @@ static int validate_internal_bitmap_imsm(struct supertype *st)
unsigned long long offset;
struct dl *d;
- if (!dev)
- return -1;
-
if (dev->rwh_policy != RWH_BITMAP)
return 0;
@@ -12567,16 +12578,8 @@ static int add_internal_bitmap_imsm(struct supertype *st, int *chunkp,
return -1;
dev = get_imsm_dev(super, vol_idx);
-
- if (!dev) {
- dprintf("cannot find the device for volume index %d\n",
- vol_idx);
- return -1;
- }
dev->rwh_policy = RWH_BITMAP;
-
*chunkp = calculate_bitmap_chunksize(st, dev);
-
return 0;
}
--
2.38.1

File diff suppressed because it is too large Load Diff

@ -0,0 +1,75 @@
From 60c19530dd7cc6b38a75695a0a3d004bbe60d430 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 27 Feb 2024 03:36:14 +0100
Subject: [PATCH 20/41] Detail: remove duplicated code
Remove duplicated code from Detail(), where MD_UUID and MD_DEVNAME
are being set. Superblock is no longer required to print system
properties. Now it tries to obtain map in two ways.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Detail.c | 33 +++++++++++++--------------------
1 file changed, 13 insertions(+), 20 deletions(-)
diff --git a/Detail.c b/Detail.c
index aaa3dd6e..f23ec16f 100644
--- a/Detail.c
+++ b/Detail.c
@@ -226,6 +226,9 @@ int Detail(char *dev, struct context *c)
str = map_num(pers, array.level);
if (c->export) {
+ char nbuf[64];
+ struct map_ent *mp = NULL, *map = NULL;
+
if (array.raid_disks) {
if (str)
printf("MD_LEVEL=%s\n", str);
@@ -247,32 +250,22 @@ int Detail(char *dev, struct context *c)
array.minor_version);
}
- if (st && st->sb && info) {
- char nbuf[64];
- struct map_ent *mp, *map = NULL;
-
- fname_from_uuid(st, info, nbuf, ':');
- printf("MD_UUID=%s\n", nbuf + 5);
+ if (info)
mp = map_by_uuid(&map, info->uuid);
+ if (!mp)
+ mp = map_by_devnm(&map, fd2devnm(fd));
- if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+ if (mp) {
+ __fname_from_uuid(mp->uuid, 0, nbuf, ':');
+ printf("MD_UUID=%s\n", nbuf + 5);
+ if (mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN);
+ }
+ map_free(map);
+ if (st && st->sb) {
if (st->ss->export_detail_super)
st->ss->export_detail_super(st);
- map_free(map);
- } else {
- struct map_ent *mp, *map = NULL;
- char nbuf[64];
- mp = map_by_devnm(&map, fd2devnm(fd));
- if (mp) {
- __fname_from_uuid(mp->uuid, 0, nbuf, ':');
- printf("MD_UUID=%s\n", nbuf+5);
- }
- if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
- printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN);
-
- map_free(map);
}
if (!c->no_devices && sra) {
struct mdinfo *mdi;
--
2.40.1

@ -1,85 +0,0 @@
From 190dc029b141c423e724566cbed5d5afbb10b05a Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Mon, 18 Apr 2022 13:44:23 -0400
Subject: [PATCH 20/83] Revert "mdadm: fix coredump of mdadm --monitor -r"
This reverts commit 546047688e1c64638f462147c755b58119cabdc8.
The change from commit mdadm: fix coredump of mdadm
--monitor -r broke the printing of the return message when
passing -r to mdadm --manage, the removal of a device from
an array.
If the current code reverts this commit, both issues are
still fixed.
The original problem reported that the fix tried to address
was: The --monitor -r option requires a parameter,
otherwise a null pointer will be manipulated when
converting to integer data, and a core dump will appear.
The original problem was really fixed with:
60815698c0a Refactor parse_num and use it to parse optarg.
Which added a check for NULL in 'optarg' before moving it
to the 'increments' variable.
New issue: When trying to remove a device using the short
argument -r, instead of the long argument --remove, the
output is empty. The problem started when commit
546047688e1c was added.
Steps to Reproduce:
1. create/assemble /dev/md0 device
2. mdadm --manage /dev/md0 -r /dev/vdxx
Actual results:
Nothing, empty output, nothing happens, the device is still
connected to the array.
The output should have stated "mdadm: hot remove failed
for /dev/vdxx: Device or resource busy", if the device was
still active. Or it should remove the device and print
a message:
mdadm: set /dev/vdd faulty in /dev/md0
mdadm: hot removed /dev/vdd from /dev/md0
The following commit should be reverted as it breaks
mdadm --manage -r.
commit 546047688e1c64638f462147c755b58119cabdc8
Author: Wu Guanghao <wuguanghao3@huawei.com>
Date: Mon Aug 16 15:24:51 2021 +0800
mdadm: fix coredump of mdadm --monitor -r
-Nigel
Signed-off-by: Nigel Croxon <ncroxon@redhat.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
ReadMe.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/ReadMe.c b/ReadMe.c
index 8f873c48..bec1be9a 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -81,11 +81,11 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE EXTRAVERSION "\n";
* found, it is started.
*/
-char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:r:n:x:u:c:d:z:U:N:safRSow1tye:k";
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:r:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_auto_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:r:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
struct option long_options[] = {
{"manage", 0, 0, ManageOpt},
--
2.38.1

@ -0,0 +1,286 @@
From 0c0f09cb035b6a27a1d11c54836742a9945a5014 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:05 +0100
Subject: [PATCH 21/41] mdadm: Add functions for spare criteria verification
It is done similar way in few places. As a result, two almost identical
functions (dev_size_from_id() and dev_sector_size_from_id()) are
removed. Now, it uses same file descriptor to send two ioctls.
Two extern functions are added, in next patches
disk_fd_matches_criteria() is used.
Next optimization is inline zeroing struct spare_criteria. With that,
we don't need to reset values in get_spare_criteria_imsm().
Dedicated boolean field for checking if criteria are filled is added.
We don't need to execute the code if it is not set.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Incremental.c | 2 +-
Monitor.c | 14 +------
mdadm.h | 6 ++-
super-intel.c | 4 +-
util.c | 112 ++++++++++++++++++++++++++------------------------
5 files changed, 67 insertions(+), 71 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index 30c07c03..2b5a5859 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -874,7 +874,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct domainlist *dl = NULL;
struct mdinfo *sra;
unsigned long long devsize, freesize = 0;
- struct spare_criteria sc = {0, 0};
+ struct spare_criteria sc = {0};
if (is_subarray(mp->metadata))
continue;
diff --git a/Monitor.c b/Monitor.c
index 9be2b528..1ece8712 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1070,22 +1070,12 @@ static dev_t choose_spare(struct state *from, struct state *to,
for (d = from->raid; !dev && d < MAX_DISKS; d++) {
if (from->devid[d] > 0 && from->devstate[d] == 0) {
struct dev_policy *pol;
- unsigned long long dev_size;
- unsigned int dev_sector_size;
if (to->metadata->ss->external &&
test_partition_from_id(from->devid[d]))
continue;
- if (sc->min_size &&
- dev_size_from_id(from->devid[d], &dev_size) &&
- dev_size < sc->min_size)
- continue;
-
- if (sc->sector_size &&
- dev_sector_size_from_id(from->devid[d],
- &dev_sector_size) &&
- sc->sector_size != dev_sector_size)
+ if (devid_matches_criteria(from->devid[d], sc) == false)
continue;
pol = devid_policy(from->devid[d]);
@@ -1170,12 +1160,12 @@ static void try_spare_migration(struct state *statelist)
{
struct state *from;
struct state *st;
- struct spare_criteria sc;
link_containers_with_subarrays(statelist);
for (st = statelist; st; st = st->next)
if (st->active < st->raid && st->spare == 0 && !st->err) {
struct domainlist *domlist = NULL;
+ struct spare_criteria sc = {0};
int d;
struct state *to = st;
diff --git a/mdadm.h b/mdadm.h
index 75c887e4..e8abd730 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -430,6 +430,7 @@ struct createinfo {
};
struct spare_criteria {
+ bool criteria_set;
unsigned long long min_size;
unsigned int sector_size;
};
@@ -1368,8 +1369,6 @@ extern struct supertype *dup_super(struct supertype *st);
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep);
extern int must_be_container(int fd);
-extern int dev_size_from_id(dev_t id, unsigned long long *size);
-extern int dev_sector_size_from_id(dev_t id, unsigned int *size);
void wait_for(char *dev, int fd);
/*
@@ -1708,6 +1707,9 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
#define INCR_UNSAFE 2
#define INCR_ALREADY 4
#define INCR_YES 8
+
+extern bool devid_matches_criteria(dev_t devid, struct spare_criteria *sc);
+extern bool disk_fd_matches_criteria(int disk_fd, struct spare_criteria *sc);
extern struct mdinfo *container_choose_spares(struct supertype *st,
struct spare_criteria *criteria,
struct domainlist *domlist,
diff --git a/super-intel.c b/super-intel.c
index 4babec9f..39ec4754 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1748,9 +1748,6 @@ int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
int i;
unsigned long long size = 0;
- c->min_size = 0;
- c->sector_size = 0;
-
if (!super)
return -EINVAL;
/* find first active disk in array */
@@ -1774,6 +1771,7 @@ int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
c->min_size = size * 512;
c->sector_size = super->sector_size;
+ c->criteria_set = true;
return 0;
}
diff --git a/util.c b/util.c
index b1454473..041e78cf 100644
--- a/util.c
+++ b/util.c
@@ -1266,40 +1266,6 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
return st;
}
-int dev_size_from_id(dev_t id, unsigned long long *size)
-{
- char buf[20];
- int fd;
-
- sprintf(buf, "%d:%d", major(id), minor(id));
- fd = dev_open(buf, O_RDONLY);
- if (fd < 0)
- return 0;
- if (get_dev_size(fd, NULL, size)) {
- close(fd);
- return 1;
- }
- close(fd);
- return 0;
-}
-
-int dev_sector_size_from_id(dev_t id, unsigned int *size)
-{
- char buf[20];
- int fd;
-
- sprintf(buf, "%d:%d", major(id), minor(id));
- fd = dev_open(buf, O_RDONLY);
- if (fd < 0)
- return 0;
- if (get_dev_sector_size(fd, NULL, size)) {
- close(fd);
- return 1;
- }
- close(fd);
- return 0;
-}
-
struct supertype *dup_super(struct supertype *orig)
{
struct supertype *st;
@@ -2088,6 +2054,60 @@ void append_metadata_update(struct supertype *st, void *buf, int len)
unsigned int __invalid_size_argument_for_IOC = 0;
#endif
+/**
+ * disk_fd_matches_criteria() - check if device matches spare criteria.
+ * @disk_fd: file descriptor of the disk.
+ * @sc: criteria to test.
+ *
+ * Return: true if disk matches criteria, false otherwise.
+ */
+bool disk_fd_matches_criteria(int disk_fd, struct spare_criteria *sc)
+{
+ unsigned int dev_sector_size = 0;
+ unsigned long long dev_size = 0;
+
+ if (!sc->criteria_set)
+ return true;
+
+ if (!get_dev_size(disk_fd, NULL, &dev_size) || dev_size < sc->min_size)
+ return false;
+
+ if (!get_dev_sector_size(disk_fd, NULL, &dev_sector_size) ||
+ sc->sector_size != dev_sector_size)
+ return false;
+
+ return true;
+}
+
+/**
+ * devid_matches_criteria() - check if device referenced by devid matches spare criteria.
+ * @devid: devid of the device to check.
+ * @sc: criteria to test.
+ *
+ * Return: true if disk matches criteria, false otherwise.
+ */
+bool devid_matches_criteria(dev_t devid, struct spare_criteria *sc)
+{
+ char buf[NAME_MAX];
+ bool ret;
+ int fd;
+
+ if (!sc->criteria_set)
+ return true;
+
+ snprintf(buf, NAME_MAX, "%d:%d", major(devid), minor(devid));
+
+ fd = dev_open(buf, O_RDONLY);
+ if (!is_fd_valid(fd))
+ return false;
+
+ /* Error code inherited */
+ ret = disk_fd_matches_criteria(fd, sc);
+
+ close(fd);
+ return ret;
+}
+
/* Pick all spares matching given criteria from a container
* if min_size == 0 do not check size
* if domlist == NULL do not check domains
@@ -2111,28 +2131,13 @@ struct mdinfo *container_choose_spares(struct supertype *st,
dp = &disks->devs;
disks->array.spare_disks = 0;
while (*dp) {
- int found = 0;
+ bool found = false;
+
d = *dp;
if (d->disk.state == 0) {
- /* check if size is acceptable */
- unsigned long long dev_size;
- unsigned int dev_sector_size;
- int size_valid = 0;
- int sector_size_valid = 0;
-
dev_t dev = makedev(d->disk.major,d->disk.minor);
- if (!criteria->min_size ||
- (dev_size_from_id(dev, &dev_size) &&
- dev_size >= criteria->min_size))
- size_valid = 1;
-
- if (!criteria->sector_size ||
- (dev_sector_size_from_id(dev, &dev_sector_size) &&
- criteria->sector_size == dev_sector_size))
- sector_size_valid = 1;
-
- found = size_valid && sector_size_valid;
+ found = devid_matches_criteria(dev, criteria);
/* check if domain matches */
if (found && domlist) {
@@ -2141,7 +2146,8 @@ struct mdinfo *container_choose_spares(struct supertype *st,
pol_add(&pol, pol_domain,
spare_group, NULL);
if (domain_test(domlist, pol, metadata) != 1)
- found = 0;
+ found = false;
+
dev_policy_free(pol);
}
}
--
2.40.1

@ -1,31 +0,0 @@
From 953cc7e5a485a91ddec7312c7a5d7779749fad5f Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 21 Jun 2022 00:10:39 +0800
Subject: [PATCH 21/83] util: replace ioctl use with function
Replace using of ioctl calling to get md array info with
special function prepared to it.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
util.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/util.c b/util.c
index cc94f96e..38f0420e 100644
--- a/util.c
+++ b/util.c
@@ -267,7 +267,7 @@ int md_array_active(int fd)
* GET_ARRAY_INFO doesn't provide access to the proper state
* information, so fallback to a basic check for raid_disks != 0
*/
- ret = ioctl(fd, GET_ARRAY_INFO, &array);
+ ret = md_get_array_info(fd, &array);
}
return !ret;
--
2.38.1

@ -0,0 +1,357 @@
From f656201188d73cdc2726265f1348f8ffbf7587be Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:06 +0100
Subject: [PATCH 22/41] mdadm: drop get_required_spare_criteria()
Only IMSM implements get_spare_criteria, so load_super() in
get_required_spare_criteria() is dead code. It is moved inside
metadata handler, because only IMSM implements it.
Give possibility to provide devnode to be opened. With that we can hide
load_container() used only to fill spare criteria inside handler
and simplify implementation in generic code.
Add helper function for testing spare criteria in Incremental and
error messages.
File descriptor in get_spare_criteria_imsm() is always opened on purpose.
New functionality added in next patches will require it. For the same
reason, function is moved to other place.
No functional changes.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Incremental.c | 77 ++++++++++++++++++++++----------
Monitor.c | 35 +++------------
mdadm.h | 5 +--
super-intel.c | 120 +++++++++++++++++++++++++++++++++-----------------
4 files changed, 140 insertions(+), 97 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index 2b5a5859..66c2cc86 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -833,6 +833,53 @@ container_members_max_degradation(struct map_ent *map, struct map_ent *me)
return max_degraded;
}
+/**
+ * incremental_external_test_spare_criteria() - helper to test spare criteria.
+ * @st: supertype, must be not NULL, it is duplicated here.
+ * @container_devnm: devnm of the container.
+ * @disk_fd: file descriptor of device to tested.
+ * @verbose: verbose flag.
+ *
+ * The function is used on new drive verification path to check if it can be added to external
+ * container. To test spare criteria, metadata must be loaded. It duplicates super to not mess in
+ * original one.
+ * Function is executed if superblock supports get_spare_criteria(), otherwise success is returned.
+ */
+mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, char *container_devnm,
+ int disk_fd, int verbose)
+{
+ mdadm_status_t rv = MDADM_STATUS_ERROR;
+ char container_devname[PATH_MAX];
+ struct spare_criteria sc = {0};
+ struct supertype *dup;
+
+ if (!st->ss->get_spare_criteria)
+ return MDADM_STATUS_SUCCESS;
+
+ dup = dup_super(st);
+ snprintf(container_devname, PATH_MAX, "/dev/%s", container_devnm);
+
+ if (dup->ss->get_spare_criteria(dup, container_devname, &sc) != 0) {
+ if (verbose > 1)
+ pr_err("Failed to get spare criteria for %s\n", container_devname);
+ goto out;
+ }
+
+ if (!disk_fd_matches_criteria(disk_fd, &sc)) {
+ if (verbose > 1)
+ pr_err("Disk does not match spare criteria for %s\n", container_devname);
+ goto out;
+ }
+
+ rv = MDADM_STATUS_SUCCESS;
+
+out:
+ dup->ss->free_super(dup);
+ free(dup);
+
+ return rv;
+}
+
static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct map_ent *target, int bare,
struct supertype *st, int verbose)
@@ -873,8 +920,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct supertype *st2;
struct domainlist *dl = NULL;
struct mdinfo *sra;
- unsigned long long devsize, freesize = 0;
- struct spare_criteria sc = {0};
+ unsigned long long freesize = 0;
if (is_subarray(mp->metadata))
continue;
@@ -925,34 +971,19 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
if (sra->array.failed_disks == -1)
sra->array.failed_disks = container_members_max_degradation(map, mp);
- get_dev_size(dfd, NULL, &devsize);
if (sra->component_size == 0) {
- /* true for containers, here we must read superblock
- * to obtain minimum spare size */
- struct supertype *st3 = dup_super(st2);
- int mdfd = open_dev(mp->devnm);
- if (mdfd < 0) {
- free(st3);
+ /* true for containers */
+ if (incremental_external_test_spare_criteria(st2, mp->devnm, dfd, verbose))
goto next;
- }
- if (st3->ss->load_container &&
- !st3->ss->load_container(st3, mdfd, mp->path)) {
- if (st3->ss->get_spare_criteria)
- st3->ss->get_spare_criteria(st3, &sc);
- st3->ss->free_super(st3);
- }
- free(st3);
- close(mdfd);
}
- if ((sra->component_size > 0 &&
- st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
+
+ if (sra->component_size > 0 &&
+ st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
sra->array.raid_disks, &sra->array.chunk_size,
sra->component_size,
sra->devs ? sra->devs->data_offset : INVALID_SECTORS,
devname, &freesize, sra->consistency_policy,
- 0) &&
- freesize < sra->component_size) ||
- (sra->component_size == 0 && devsize < sc.min_size)) {
+ 0) && freesize < sra->component_size) {
if (verbose > 1)
pr_err("not adding %s to %s as it is too small\n",
devname, mp->path);
diff --git a/Monitor.c b/Monitor.c
index 1ece8712..6b4560ae 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1008,34 +1008,6 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
return new_found;
}
-static int get_required_spare_criteria(struct state *st,
- struct spare_criteria *sc)
-{
- int fd;
-
- if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
- sc->min_size = 0;
- sc->sector_size = 0;
- return 0;
- }
-
- fd = open(st->devname, O_RDONLY);
- if (fd < 0)
- return 1;
- if (st->metadata->ss->external)
- st->metadata->ss->load_container(st->metadata, fd, st->devname);
- else
- st->metadata->ss->load_super(st->metadata, fd, st->devname);
- close(fd);
- if (!st->metadata->sb)
- return 1;
-
- st->metadata->ss->get_spare_criteria(st->metadata, sc);
- st->metadata->ss->free_super(st->metadata);
-
- return 0;
-}
-
static int check_donor(struct state *from, struct state *to)
{
struct state *sub;
@@ -1178,8 +1150,11 @@ static void try_spare_migration(struct state *statelist)
/* member of a container */
to = to->parent;
- if (get_required_spare_criteria(to, &sc))
- continue;
+ if (to->metadata->ss->get_spare_criteria)
+ if (to->metadata->ss->get_spare_criteria(to->metadata, to->devname,
+ &sc))
+ continue;
+
if (to->metadata->ss->external) {
/* We must make sure there is
* no suitable spare in container already.
diff --git a/mdadm.h b/mdadm.h
index e8abd730..cbc586f5 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1116,10 +1116,9 @@ extern struct superswitch {
* Return spare criteria for array:
* - minimum disk size can be used in array;
* - sector size can be used in array.
- * Return values: 0 - for success and -EINVAL on error.
*/
- int (*get_spare_criteria)(struct supertype *st,
- struct spare_criteria *sc);
+ mdadm_status_t (*get_spare_criteria)(struct supertype *st, char *mddev_path,
+ struct spare_criteria *sc);
/* Find somewhere to put a bitmap - possibly auto-size it - and
* update the metadata to record this. The array may be newly
* created, in which case data_size may be updated, or it might
diff --git a/super-intel.c b/super-intel.c
index 39ec4754..7ad391ac 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1736,46 +1736,6 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
return (remainder < rv) ? remainder : rv;
}
-/*
- * Return minimum size of a spare and sector size
- * that can be used in this array
- */
-int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
-{
- struct intel_super *super = st->sb;
- struct dl *dl;
- struct extent *e;
- int i;
- unsigned long long size = 0;
-
- if (!super)
- return -EINVAL;
- /* find first active disk in array */
- dl = super->disks;
- while (dl && (is_failed(&dl->disk) || dl->index == -1))
- dl = dl->next;
- if (!dl)
- return -EINVAL;
- /* find last lba used by subarrays */
- e = get_extents(super, dl, 0);
- if (!e)
- return -EINVAL;
- for (i = 0; e[i].size; i++)
- continue;
- if (i > 0)
- size = e[i-1].start + e[i-1].size;
- free(e);
-
- /* add the amount of space needed for metadata */
- size += imsm_min_reserved_sectors(super);
-
- c->min_size = size * 512;
- c->sector_size = super->sector_size;
- c->criteria_set = true;
-
- return 0;
-}
-
static bool is_gen_migration(struct imsm_dev *dev);
#define IMSM_4K_DIV 8
@@ -11295,6 +11255,84 @@ static const char *imsm_get_disk_controller_domain(const char *path)
return drv;
}
+/**
+ * get_spare_criteria_imsm() - set spare criteria.
+ * @st: supertype.
+ * @mddev_path: path to md device devnode, it must be container.
+ * @c: spare_criteria struct to fill, not NULL.
+ *
+ * If superblock is not loaded, use mddev_path to load_container. It must be given in this case.
+ * Filles size and sector size accordingly to superblock.
+ */
+mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path,
+ struct spare_criteria *c)
+{
+ mdadm_status_t ret = MDADM_STATUS_ERROR;
+ bool free_superblock = false;
+ unsigned long long size = 0;
+ struct intel_super *super;
+ struct extent *e;
+ struct dl *dl;
+ int i;
+
+ /* If no superblock and no mddev_path, we cannot load superblock. */
+ assert(st->sb || mddev_path);
+
+ if (mddev_path) {
+ int fd = open(mddev_path, O_RDONLY);
+
+ if (!is_fd_valid(fd))
+ return MDADM_STATUS_ERROR;
+
+ if (!st->sb) {
+ if (load_container_imsm(st, fd, st->devnm)) {
+ close(fd);
+ return MDADM_STATUS_ERROR;
+ }
+ free_superblock = true;
+ }
+ close(fd);
+ }
+
+ super = st->sb;
+
+ /* find first active disk in array */
+ dl = super->disks;
+ while (dl && (is_failed(&dl->disk) || dl->index == -1))
+ dl = dl->next;
+
+ if (!dl)
+ goto out;
+
+ /* find last lba used by subarrays */
+ e = get_extents(super, dl, 0);
+ if (!e)
+ goto out;
+
+ for (i = 0; e[i].size; i++)
+ continue;
+ if (i > 0)
+ size = e[i - 1].start + e[i - 1].size;
+ free(e);
+
+ /* add the amount of space needed for metadata */
+ size += imsm_min_reserved_sectors(super);
+
+ c->min_size = size * 512;
+ c->sector_size = super->sector_size;
+ c->criteria_set = true;
+ ret = MDADM_STATUS_SUCCESS;
+
+out:
+ if (free_superblock)
+ free_super_imsm(st);
+
+ if (ret != MDADM_STATUS_SUCCESS)
+ c->criteria_set = false;
+
+ return ret;
+}
+
static char *imsm_find_array_devnm_by_subdev(int subdev, char *container)
{
static char devnm[32];
@@ -11425,7 +11463,7 @@ static struct mdinfo *get_spares_for_grow(struct supertype *st)
{
struct spare_criteria sc;
- get_spare_criteria_imsm(st, &sc);
+ get_spare_criteria_imsm(st, NULL, &sc);
return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
}
--
2.40.1

@ -1,110 +0,0 @@
From 63902857b98c37c8ac4b837bb01d006b327a4532 Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Tue, 21 Jun 2022 00:10:40 +0800
Subject: [PATCH 22/83] mdadm/super1: restore commit 45a87c2f31335 to fix
clustered slot issue
Commit 9d67f6496c71 ("mdadm:check the nodes when operate clustered
array") modified assignment logic for st->nodes in write_bitmap1(),
which introduced bitmap slot issue:
load_super1 didn't set up supertype.nodes, which made spare disk only
have one slot info. Then it triggered kernel md_bitmap_load_sb to get
wrong bitmap slot data.
For fixing this issue, there are two methods:
1> revert the related code of commit 9d67f6496c71. and restore the code
from former commit 45a87c2f31335 ("super1: add more checks for
NodeNumUpdate option").
st->nodes value would be 0 & 1 under current code logic. i.e.
When adding a spare disk, there is no place to init st->nodes, and
the value is ZERO.
2> keep 9d67f6496c71, add additional ->nodes handling in load_super1(),
let load_super1 to set st->nodes when bitmap is BITMAP_MAJOR_CLUSTERED.
Under current mdadm code logic, load_super1 will be called many
times, any new code in load_super1 will cost mdadm running more time.
And more reason is I prefer as much as possible to limit clustered
code spreading in every corner.
So I used method <1> to fix this issue.
How to trigger:
dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sda
dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdb
dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdc
mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb
mdadm -a /dev/md0 /dev/sdc
mdadm /dev/md0 --fail /dev/sda
mdadm /dev/md0 --remove /dev/sda
mdadm -Ss
mdadm -A /dev/md0 /dev/sdb /dev/sdc
the output of current "mdadm -X /dev/sdc":
(there should be (by default) 4 slot info for correct output)
```
Filename : /dev/sdc
Magic : 6d746962
Version : 5
UUID : a74642f8:a6b1fba8:58e1f8db:cfe7b082
Events : 29
Events Cleared : 0
State : OK
Chunksize : 64 MB
Daemon : 5s flush period
Write Mode : Normal
Sync Size : 306176 (299.00 MiB 313.52 MB)
Bitmap : 5 bits (chunks), 5 dirty (100.0%)
```
And mdadm later operations will trigger kernel output error message:
(triggered by "mdadm -A /dev/md0 /dev/sdb /dev/sdc")
```
kernel: md0: invalid bitmap file superblock: bad magic
kernel: md_bitmap_copy_from_slot can't get bitmap from slot 1
kernel: md-cluster: Could not gather bitmaps from slot 1
kernel: md0: invalid bitmap file superblock: bad magic
kernel: md_bitmap_copy_from_slot can't get bitmap from slot 2
kernel: md-cluster: Could not gather bitmaps from slot 2
kernel: md0: invalid bitmap file superblock: bad magic
kernel: md_bitmap_copy_from_slot can't get bitmap from slot 3
kernel: md-cluster: Could not gather bitmaps from slot 3
kernel: md-cluster: failed to gather all resyn infos
kernel: md0: detected capacity change from 0 to 612352
```
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super1.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/super1.c b/super1.c
index e3e2f954..3a0c69fd 100644
--- a/super1.c
+++ b/super1.c
@@ -2674,7 +2674,17 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
}
if (bms->version == BITMAP_MAJOR_CLUSTERED) {
- if (__cpu_to_le32(st->nodes) < bms->nodes) {
+ if (st->nodes == 1) {
+ /* the parameter for nodes is not valid */
+ pr_err("Warning: cluster-md at least needs two nodes\n");
+ return -EINVAL;
+ } else if (st->nodes == 0) {
+ /*
+ * parameter "--nodes" is not specified, (eg, add a disk to
+ * clustered raid)
+ */
+ break;
+ } else if (__cpu_to_le32(st->nodes) < bms->nodes) {
/*
* Since the nodes num is not increased, no
* need to check the space enough or not,
--
2.38.1

@ -0,0 +1,64 @@
From e97ca3583c96591af0e4863c12c394074a51c84d Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:07 +0100
Subject: [PATCH 23/41] Manage: fix check after dereference issue
The code dereferences dev_st earlier without checking, it gives SAST
problem.
dev_st is needed for attempt_re_add(), but it is executed only if
dv->disposition != 'S', so move disposition check up.
tst is a must to reach this place, dup_super() have to return valid
pointer, all it needs to check is if load_super() returns superblock.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Manage.c | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/Manage.c b/Manage.c
index 30302ac8..77b79cf5 100644
--- a/Manage.c
+++ b/Manage.c
@@ -794,25 +794,23 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
* simply re-add it.
*/
- if (array->not_persistent == 0) {
+ if (array->not_persistent == 0 && dv->disposition != 'S') {
+ int rv = 0;
+
dev_st = dup_super(tst);
dev_st->ss->load_super(dev_st, tfd, NULL);
- if (dev_st->sb && dv->disposition != 'S') {
- int rv;
- rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
- rdev, update, devname,
- verbose, array);
- dev_st->ss->free_super(dev_st);
- if (rv) {
- free(dev_st);
- return rv;
- }
- }
- if (dev_st) {
+ if (dev_st->sb) {
+ rv = attempt_re_add(fd, tfd, dv, dev_st, tst, rdev, update,
+ devname, verbose, array);
+
dev_st->ss->free_super(dev_st);
- free(dev_st);
}
+
+ free(dev_st);
+
+ if (rv)
+ return rv;
}
if (dv->disposition == 'M') {
if (verbose > 0)
--
2.40.1

@ -1,122 +0,0 @@
From 76c152ca9851e9fcdf52e8f6e7e6c09b936bdd14 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 21 Jun 2022 00:10:41 +0800
Subject: [PATCH 23/83] imsm: introduce get_disk_slot_in_dev()
The routine was added to remove unnecessary get_imsm_dev() and
get_imsm_map() calls, used only to determine disk slot.
Additionally, enum for IMSM return statues was added for further usage.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 47 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 36 insertions(+), 11 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 3788feb9..cd1f1e3d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -366,6 +366,18 @@ struct migr_record {
};
ASSERT_SIZE(migr_record, 128)
+/**
+ * enum imsm_status - internal IMSM return values representation.
+ * @STATUS_OK: function succeeded.
+ * @STATUS_ERROR: General error ocurred (not specified).
+ *
+ * Typedefed to imsm_status_t.
+ */
+typedef enum imsm_status {
+ IMSM_STATUS_ERROR = -1,
+ IMSM_STATUS_OK = 0,
+} imsm_status_t;
+
struct md_list {
/* usage marker:
* 1: load metadata
@@ -1183,7 +1195,7 @@ static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
}
-static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
+static int get_imsm_disk_slot(struct imsm_map *map, const unsigned int idx)
{
int slot;
__u32 ord;
@@ -1194,7 +1206,7 @@ static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
return slot;
}
- return -1;
+ return IMSM_STATUS_ERROR;
}
static int get_imsm_raid_level(struct imsm_map *map)
@@ -1209,6 +1221,23 @@ static int get_imsm_raid_level(struct imsm_map *map)
return map->raid_level;
}
+/**
+ * get_disk_slot_in_dev() - retrieve disk slot from &imsm_dev.
+ * @super: &intel_super pointer, not NULL.
+ * @dev_idx: imsm device index.
+ * @idx: disk index.
+ *
+ * Return: Slot on success, IMSM_STATUS_ERROR otherwise.
+ */
+static int get_disk_slot_in_dev(struct intel_super *super, const __u8 dev_idx,
+ const unsigned int idx)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ return get_imsm_disk_slot(map, idx);
+}
+
static int cmp_extent(const void *av, const void *bv)
{
const struct extent *a = av;
@@ -1225,13 +1254,9 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
int memberships = 0;
int i;
- for (i = 0; i < super->anchor->num_raid_devs; i++) {
- struct imsm_dev *dev = get_imsm_dev(super, i);
- struct imsm_map *map = get_imsm_map(dev, MAP_0);
-
- if (get_imsm_disk_slot(map, dl->index) >= 0)
+ for (i = 0; i < super->anchor->num_raid_devs; i++)
+ if (get_disk_slot_in_dev(super, i, dl->index) >= 0)
memberships++;
- }
return memberships;
}
@@ -1941,6 +1966,7 @@ void examine_migr_rec_imsm(struct intel_super *super)
/* first map under migration */
map = get_imsm_map(dev, MAP_0);
+
if (map)
slot = get_imsm_disk_slot(map, super->disks->index);
if (map == NULL || slot > 1 || slot < 0) {
@@ -9655,10 +9681,9 @@ static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
/* count arrays using the victim in the metadata */
found = 0;
for (a = active_array; a ; a = a->next) {
- dev = get_imsm_dev(super, a->info.container_member);
- map = get_imsm_map(dev, MAP_0);
+ int dev_idx = a->info.container_member;
- if (get_imsm_disk_slot(map, victim) >= 0)
+ if (get_disk_slot_in_dev(super, dev_idx, victim) >= 0)
found++;
}
--
2.38.1

@ -0,0 +1,184 @@
From 29273f606542d915a3ddf37bb084f4eff54fcc3b Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:08 +0100
Subject: [PATCH 24/41] Manage: implement manage_add_external()
Move external add code to separate function. It is easier to control
error path now. Error messages are adjusted.
No functional changes.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Manage.c | 147 ++++++++++++++++++++++++++++++++-----------------------
1 file changed, 86 insertions(+), 61 deletions(-)
diff --git a/Manage.c b/Manage.c
index 77b79cf5..b3e216cb 100644
--- a/Manage.c
+++ b/Manage.c
@@ -695,6 +695,91 @@ skip_re_add:
return 0;
}
+/**
+ * manage_add_external() - Add disk to external container.
+ * @st: external supertype pointer, must not be NULL, superblock is released here.
+ * @fd: container file descriptor, must not have O_EXCL mode.
+ * @disk_fd: device to add file descriptor.
+ * @disk_name: name of the device to add.
+ * @disc: disk info.
+ *
+ * Superblock is released here because any open fd with O_EXCL will block sysfs_add_disk().
+ */
+mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name,
+ mdu_disk_info_t *disc)
+{
+ mdadm_status_t rv = MDADM_STATUS_ERROR;
+ char container_devpath[MD_NAME_MAX];
+ struct mdinfo new_mdi;
+ struct mdinfo *sra = NULL;
+ int container_fd;
+ int disk_fd = -1;
+
+ snprintf(container_devpath, MD_NAME_MAX, "%s", fd2devnm(fd));
+
+ container_fd = open_dev_excl(container_devpath);
+ if (!is_fd_valid(container_fd)) {
+ pr_err("Failed to get exclusive access to container %s\n", container_devpath);
+ return MDADM_STATUS_ERROR;
+ }
+
+ /* Check if metadata handler is able to accept the drive */
+ if (!st->ss->validate_geometry(st, LEVEL_CONTAINER, 0, 1, NULL, 0, 0, disk_name, NULL,
+ 0, 1))
+ goto out;
+
+ Kill(disk_name, NULL, 0, -1, 0);
+
+ disk_fd = dev_open(disk_name, O_RDWR | O_EXCL | O_DIRECT);
+ if (!is_fd_valid(disk_fd)) {
+ pr_err("Failed to exclusively open %s\n", disk_name);
+ goto out;
+ }
+
+ if (st->ss->add_to_super(st, disc, disk_fd, disk_name, INVALID_SECTORS))
+ goto out;
+
+ if (!mdmon_running(st->container_devnm))
+ st->ss->sync_metadata(st);
+
+ sra = sysfs_read(container_fd, NULL, 0);
+ if (!sra) {
+ pr_err("Failed to read sysfs for %s\n", disk_name);
+ goto out;
+ }
+
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ st->ss->getinfo_super(st, &new_mdi, NULL);
+ new_mdi.disk.major = disc->major;
+ new_mdi.disk.minor = disc->minor;
+ new_mdi.recovery_start = 0;
+
+ st->ss->free_super(st);
+
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("Failed to add %s to container %s\n", disk_name, container_devpath);
+ goto out;
+ }
+ ping_monitor(container_devpath);
+ rv = MDADM_STATUS_SUCCESS;
+
+out:
+ close(container_fd);
+
+ if (sra)
+ sysfs_free(sra);
+
+ if (rv != MDADM_STATUS_SUCCESS && is_fd_valid(disk_fd))
+ /* Metadata handler records this descriptor, so release it only on failure. */
+ close(disk_fd);
+
+ if (st->sb)
+ st->ss->free_super(st);
+
+ return rv;
+}
+
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
@@ -966,68 +1051,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
if (dv->failfast == FlagSet)
disc.state |= (1 << MD_DISK_FAILFAST);
if (tst->ss->external) {
- /* add a disk
- * to an external metadata container */
- struct mdinfo new_mdi;
- struct mdinfo *sra;
- int container_fd;
- char devnm[32];
- int dfd;
-
- strcpy(devnm, fd2devnm(fd));
-
- container_fd = open_dev_excl(devnm);
- if (container_fd < 0) {
- pr_err("add failed for %s: could not get exclusive access to container\n",
- dv->devname);
- tst->ss->free_super(tst);
+ if (manage_add_external(tst, fd, dv->devname, &disc) != MDADM_STATUS_SUCCESS)
goto unlock;
- }
-
- /* Check if metadata handler is able to accept the drive */
- if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
- 0, 0, dv->devname, NULL, 0, 1)) {
- close(container_fd);
- goto unlock;
- }
-
- Kill(dv->devname, NULL, 0, -1, 0);
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname, INVALID_SECTORS)) {
- close(dfd);
- close(container_fd);
- goto unlock;
- }
- if (!mdmon_running(tst->container_devnm))
- tst->ss->sync_metadata(tst);
-
- sra = sysfs_read(container_fd, NULL, 0);
- if (!sra) {
- pr_err("add failed for %s: sysfs_read failed\n",
- dv->devname);
- close(container_fd);
- tst->ss->free_super(tst);
- goto unlock;
- }
- sra->array.level = LEVEL_CONTAINER;
- /* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi, NULL);
- new_mdi.disk.major = disc.major;
- new_mdi.disk.minor = disc.minor;
- new_mdi.recovery_start = 0;
- /* Make sure fds are closed as they are O_EXCL which
- * would block add_disk */
- tst->ss->free_super(tst);
- if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
- pr_err("add new device to external metadata failed for %s\n", dv->devname);
- close(container_fd);
- sysfs_free(sra);
- goto unlock;
- }
- ping_monitor(devnm);
- sysfs_free(sra);
- close(container_fd);
} else {
tst->ss->free_super(tst);
if (ioctl(fd, ADD_NEW_DISK, &disc)) {
--
2.40.1

@ -1,252 +0,0 @@
From 6d4d9ab295de165e57b5c30e044028dbffb8f297 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 21 Jun 2022 00:10:42 +0800
Subject: [PATCH 24/83] imsm: use same slot across container
Autolayout relies on drives order on super->disks list, but
it is not quaranted by readdir() in sysfs_read(). As a result
drive could be put in different slot in second volume.
Make it consistent by reffering to first volume, if exists.
Use enum imsm_status to unify error handling.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 169 ++++++++++++++++++++++++++++++++------------------
1 file changed, 108 insertions(+), 61 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index cd1f1e3d..deef7c87 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7522,11 +7522,27 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
return 1;
}
-static int imsm_get_free_size(struct supertype *st, int raiddisks,
- unsigned long long size, int chunk,
- unsigned long long *freesize)
+/**
+ * imsm_get_free_size() - get the biggest, common free space from members.
+ * @super: &intel_super pointer, not NULL.
+ * @raiddisks: number of raid disks.
+ * @size: requested size, could be 0 (means max size).
+ * @chunk: requested chunk.
+ * @freesize: pointer for returned size value.
+ *
+ * Return: &IMSM_STATUS_OK or &IMSM_STATUS_ERROR.
+ *
+ * @freesize is set to meaningful value, this can be @size, or calculated
+ * max free size.
+ * super->create_offset value is modified and set appropriately in
+ * merge_extends() for further creation.
+ */
+static imsm_status_t imsm_get_free_size(struct intel_super *super,
+ const int raiddisks,
+ unsigned long long size,
+ const int chunk,
+ unsigned long long *freesize)
{
- struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
struct dl *dl;
int i;
@@ -7570,12 +7586,10 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
/* chunk is in K */
minsize = chunk * 2;
- if (cnt < raiddisks ||
- (super->orom && used && used != raiddisks) ||
- maxsize < minsize ||
- maxsize == 0) {
+ if (cnt < raiddisks || (super->orom && used && used != raiddisks) ||
+ maxsize < minsize || maxsize == 0) {
pr_err("not enough devices with space to create array.\n");
- return 0; /* No enough free spaces large enough */
+ return IMSM_STATUS_ERROR;
}
if (size == 0) {
@@ -7588,37 +7602,69 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
}
if (mpb->num_raid_devs > 0 && size && size != maxsize)
pr_err("attempting to create a second volume with size less then remaining space.\n");
- cnt = 0;
- for (dl = super->disks; dl; dl = dl->next)
- if (dl->e)
- dl->raiddisk = cnt++;
-
*freesize = size;
dprintf("imsm: imsm_get_free_size() returns : %llu\n", size);
- return 1;
+ return IMSM_STATUS_OK;
}
-static int reserve_space(struct supertype *st, int raiddisks,
- unsigned long long size, int chunk,
- unsigned long long *freesize)
+/**
+ * autolayout_imsm() - automatically layout a new volume.
+ * @super: &intel_super pointer, not NULL.
+ * @raiddisks: number of raid disks.
+ * @size: requested size, could be 0 (means max size).
+ * @chunk: requested chunk.
+ * @freesize: pointer for returned size value.
+ *
+ * We are being asked to automatically layout a new volume based on the current
+ * contents of the container. If the parameters can be satisfied autolayout_imsm
+ * will record the disks, start offset, and will return size of the volume to
+ * be created. See imsm_get_free_size() for details.
+ * add_to_super() and getinfo_super() detect when autolayout is in progress.
+ * If first volume exists, slots are set consistently to it.
+ *
+ * Return: &IMSM_STATUS_OK on success, &IMSM_STATUS_ERROR otherwise.
+ *
+ * Disks are marked for creation via dl->raiddisk.
+ */
+static imsm_status_t autolayout_imsm(struct intel_super *super,
+ const int raiddisks,
+ unsigned long long size, const int chunk,
+ unsigned long long *freesize)
{
- struct intel_super *super = st->sb;
- struct dl *dl;
- int cnt;
- int rv = 0;
+ int curr_slot = 0;
+ struct dl *disk;
+ int vol_cnt = super->anchor->num_raid_devs;
+ imsm_status_t rv;
- rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize);
- if (rv) {
- cnt = 0;
- for (dl = super->disks; dl; dl = dl->next)
- if (dl->e)
- dl->raiddisk = cnt++;
- rv = 1;
+ rv = imsm_get_free_size(super, raiddisks, size, chunk, freesize);
+ if (rv != IMSM_STATUS_OK)
+ return IMSM_STATUS_ERROR;
+
+ for (disk = super->disks; disk; disk = disk->next) {
+ if (!disk->e)
+ continue;
+
+ if (curr_slot == raiddisks)
+ break;
+
+ if (vol_cnt == 0) {
+ disk->raiddisk = curr_slot;
+ } else {
+ int _slot = get_disk_slot_in_dev(super, 0, disk->index);
+
+ if (_slot == -1) {
+ pr_err("Disk %s is not used in first volume, aborting\n",
+ disk->devname);
+ return IMSM_STATUS_ERROR;
+ }
+ disk->raiddisk = _slot;
+ }
+ curr_slot++;
}
- return rv;
+ return IMSM_STATUS_OK;
}
static int validate_geometry_imsm(struct supertype *st, int level, int layout,
@@ -7654,35 +7700,35 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
if (!dev) {
- if (st->sb) {
- struct intel_super *super = st->sb;
- if (!validate_geometry_imsm_orom(st->sb, level, layout,
- raiddisks, chunk, size,
- verbose))
+ struct intel_super *super = st->sb;
+
+ /*
+ * Autolayout mode, st->sb and freesize must be set.
+ */
+ if (!super || !freesize) {
+ pr_vrb("freesize and superblock must be set for autolayout, aborting\n");
+ return 1;
+ }
+
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk, size,
+ verbose))
+ return 0;
+
+ if (super->orom) {
+ imsm_status_t rv;
+ int count = count_volumes(super->hba, super->orom->dpa,
+ verbose);
+ if (super->orom->vphba <= count) {
+ pr_vrb("platform does not support more than %d raid volumes.\n",
+ super->orom->vphba);
return 0;
- /* we are being asked to automatically layout a
- * new volume based on the current contents of
- * the container. If the the parameters can be
- * satisfied reserve_space will record the disks,
- * start offset, and size of the volume to be
- * created. add_to_super and getinfo_super
- * detect when autolayout is in progress.
- */
- /* assuming that freesize is always given when array is
- created */
- if (super->orom && freesize) {
- int count;
- count = count_volumes(super->hba,
- super->orom->dpa, verbose);
- if (super->orom->vphba <= count) {
- pr_vrb("platform does not support more than %d raid volumes.\n",
- super->orom->vphba);
- return 0;
- }
}
- if (freesize)
- return reserve_space(st, raiddisks, size,
- *chunk, freesize);
+
+ rv = autolayout_imsm(super, raiddisks, size, *chunk,
+ freesize);
+ if (rv != IMSM_STATUS_OK)
+ return 0;
}
return 1;
}
@@ -11538,7 +11584,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
unsigned long long current_size;
unsigned long long free_size;
unsigned long long max_size;
- int rv;
+ imsm_status_t rv;
getinfo_super_imsm_volume(st, &info, NULL);
if (geo->level != info.array.level && geo->level >= 0 &&
@@ -11657,9 +11703,10 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
}
/* check the maximum available size
*/
- rv = imsm_get_free_size(st, dev->vol.map->num_members,
- 0, chunk, &free_size);
- if (rv == 0)
+ rv = imsm_get_free_size(super, dev->vol.map->num_members,
+ 0, chunk, &free_size);
+
+ if (rv != IMSM_STATUS_OK)
/* Cannot find maximum available space
*/
max_size = 0;
--
2.38.1

@ -1,122 +0,0 @@
From 9a7df595bbe360132cb37c8b39aa1fd9ac24b43f Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 21 Jun 2022 00:10:43 +0800
Subject: [PATCH 25/83] imsm: block changing slots during creation
If user specifies drives for array creation, then slot order across
volumes is not preserved.
Ideally, it should be checked in validate_geometry() but it is not
possible in current implementation (order is determined later).
Add verification in add_to_super_imsm_volume() and throw error if
mismatch is detected.
IMSM allows to use only same members within container.
This is not hardware dependency but metadata limitation.
Therefore, 09-imsm-overlap test is removed. Testing it is pointless.
After this patch, creation in this scenario is blocked. Offset
verification is covered in other tests.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 33 ++++++++++++++++++++++-----------
tests/09imsm-overlap | 28 ----------------------------
2 files changed, 22 insertions(+), 39 deletions(-)
delete mode 100644 tests/09imsm-overlap
diff --git a/super-intel.c b/super-intel.c
index deef7c87..8ffe485c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -5789,6 +5789,10 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
struct imsm_map *map;
struct dl *dl, *df;
int slot;
+ int autolayout = 0;
+
+ if (!is_fd_valid(fd))
+ autolayout = 1;
dev = get_imsm_dev(super, super->current_vol);
map = get_imsm_map(dev, MAP_0);
@@ -5799,25 +5803,32 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
return 1;
}
- if (!is_fd_valid(fd)) {
- /* we're doing autolayout so grab the pre-marked (in
- * validate_geometry) raid_disk
- */
- for (dl = super->disks; dl; dl = dl->next)
+ for (dl = super->disks; dl ; dl = dl->next) {
+ if (autolayout) {
if (dl->raiddisk == dk->raid_disk)
break;
- } else {
- for (dl = super->disks; dl ; dl = dl->next)
- if (dl->major == dk->major &&
- dl->minor == dk->minor)
- break;
+ } else if (dl->major == dk->major && dl->minor == dk->minor)
+ break;
}
if (!dl) {
- pr_err("%s is not a member of the same container\n", devname);
+ if (!autolayout)
+ pr_err("%s is not a member of the same container.\n",
+ devname);
return 1;
}
+ if (!autolayout && super->current_vol > 0) {
+ int _slot = get_disk_slot_in_dev(super, 0, dl->index);
+
+ if (_slot != dk->raid_disk) {
+ pr_err("Member %s is in %d slot for the first volume, but is in %d slot for a new volume.\n",
+ dl->devname, _slot, dk->raid_disk);
+ pr_err("Raid members are in different order than for the first volume, aborting.\n");
+ return 1;
+ }
+ }
+
if (mpb->num_disks == 0)
if (!get_dev_sector_size(dl->fd, dl->devname,
&super->sector_size))
diff --git a/tests/09imsm-overlap b/tests/09imsm-overlap
deleted file mode 100644
index ff5d2093..00000000
--- a/tests/09imsm-overlap
+++ /dev/null
@@ -1,28 +0,0 @@
-
-. tests/env-imsm-template
-
-# create raid arrays with varying degress of overlap
-mdadm -CR $container -e imsm -n 6 $dev0 $dev1 $dev2 $dev3 $dev4 $dev5
-imsm_check container 6
-
-size=1024
-level=1
-num_disks=2
-mdadm -CR $member0 $dev0 $dev1 -n $num_disks -l $level -z $size
-mdadm -CR $member1 $dev1 $dev2 -n $num_disks -l $level -z $size
-mdadm -CR $member2 $dev2 $dev3 -n $num_disks -l $level -z $size
-mdadm -CR $member3 $dev3 $dev4 -n $num_disks -l $level -z $size
-mdadm -CR $member4 $dev4 $dev5 -n $num_disks -l $level -z $size
-
-udevadm settle
-
-offset=0
-imsm_check member $member0 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member1 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member2 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member3 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member4 $num_disks $level $size 1024 $offset
--
2.38.1

@ -0,0 +1,138 @@
From 14a8657940be34a781222b4b715bd09eb80d1057 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:09 +0100
Subject: [PATCH 25/41] mdadm: introduce sysfs_get_container_devnm()
There at least two places where it is done directly, so replace them
with function. Print message about creating external array, add "/dev/"
prefix to refer directly to devnode.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Create.c | 21 ++++++++++-----------
Manage.c | 14 ++++----------
mdadm.h | 2 ++
sysfs.c | 23 +++++++++++++++++++++++
4 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/Create.c b/Create.c
index 7e9170b6..0b776266 100644
--- a/Create.c
+++ b/Create.c
@@ -1142,24 +1142,23 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
if (did_default && c->verbose >= 0) {
if (is_subarray(info.text_version)) {
- char devnm[32];
- char *ep;
+ char devnm[MD_NAME_MAX];
struct mdinfo *mdi;
- strncpy(devnm, info.text_version+1, 32);
- devnm[31] = 0;
- ep = strchr(devnm, '/');
- if (ep)
- *ep = 0;
+ sysfs_get_container_devnm(&info, devnm);
mdi = sysfs_read(-1, devnm, GET_VERSION);
+ if (!mdi) {
+ pr_err("Cannot open sysfs for container %s\n", devnm);
+ goto abort_locked;
+ }
+
+ pr_info("Creating array inside %s container /dev/%s\n", mdi->text_version,
+ devnm);
- pr_info("Creating array inside %s container %s\n",
- mdi?mdi->text_version:"managed", devnm);
sysfs_free(mdi);
} else
- pr_info("Defaulting to version %s metadata\n",
- info.text_version);
+ pr_info("Defaulting to version %s metadata\n", info.text_version);
}
map_update(&map, fd2devnm(mdfd), info.text_version,
diff --git a/Manage.c b/Manage.c
index b3e216cb..969d0ea9 100644
--- a/Manage.c
+++ b/Manage.c
@@ -178,7 +178,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
struct map_ent *map = NULL;
struct mdinfo *mdi;
char devnm[32];
- char container[32];
+ char container[MD_NAME_MAX] = {0};
int err;
int count;
char buf[SYSFS_MAX_BUF_SIZE];
@@ -192,15 +192,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
* to stop is probably a bad idea.
*/
mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
- if (mdi && is_subarray(mdi->text_version)) {
- char *sl;
- strncpy(container, mdi->text_version+1, sizeof(container));
- container[sizeof(container)-1] = 0;
- sl = strchr(container, '/');
- if (sl)
- *sl = 0;
- } else
- container[0] = 0;
+ if (mdi && is_subarray(mdi->text_version))
+ sysfs_get_container_devnm(mdi, container);
+
close(fd);
count = 5;
while (((fd = ((devname[0] == '/')
diff --git a/mdadm.h b/mdadm.h
index cbc586f5..39b86bd0 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -777,6 +777,8 @@ enum sysfs_read_flags {
#define SYSFS_MAX_BUF_SIZE 64
+extern void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf);
+
/* If fd >= 0, get the array it is open on,
* else use devnm.
*/
diff --git a/sysfs.c b/sysfs.c
index f95ef701..230b842e 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -74,6 +74,29 @@ void sysfs_free(struct mdinfo *sra)
}
}
+/**
+ * sysfs_get_container_devnm() - extract container device name.
+ * @mdi: md_info describes member array, with GET_VERSION option.
+ * @buf: buf to fill, must be MD_NAME_MAX.
+ *
+ * External array version is in format {/,-}<container_devnm>/<array_index>
+ * Extract container_devnm from it and safe it in @buf.
+ */
+void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf)
+{
+ char *p;
+
+ assert(is_subarray(mdi->text_version));
+
+ /* Skip first special sign */
+ snprintf(buf, MD_NAME_MAX, "%s", mdi->text_version + 1);
+
+ /* Remove array index */
+ p = strchr(buf, '/');
+ if (p)
+ *p = 0;
+}
+
int sysfs_open(char *devnm, char *devname, char *attr)
{
char fname[MAX_SYSFS_PATH_LEN];
--
2.40.1

@ -1,177 +0,0 @@
From 70f1ff4291b0388adca1f4c91918ce1175e8b360 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Wed, 15 Jun 2022 14:28:39 +0200
Subject: [PATCH 26/83] mdadm: block update=ppl for non raid456 levels
Option ppl should be used only for raid levels 4, 5 and 6. Cancel update
for other levels.
Applied globally for imsm and ddf format.
Additionally introduce is_level456() helper function.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 11 +++++------
Grow.c | 2 +-
Manage.c | 14 ++++++++++++--
mdadm.h | 11 +++++++++++
super0.c | 2 +-
super1.c | 3 +--
6 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 4b213560..6df6bfbc 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -906,8 +906,7 @@ static int force_array(struct mdinfo *content,
* devices in RAID4 or last devices in RAID4/5/6.
*/
delta = devices[j].i.delta_disks;
- if (devices[j].i.array.level >= 4 &&
- devices[j].i.array.level <= 6 &&
+ if (is_level456(devices[j].i.array.level) &&
i/2 >= content->array.raid_disks - delta)
/* OK */;
else if (devices[j].i.array.level == 4 &&
@@ -1226,8 +1225,7 @@ static int start_array(int mdfd,
fprintf(stderr, ".\n");
}
if (content->reshape_active &&
- content->array.level >= 4 &&
- content->array.level <= 6) {
+ is_level456(content->array.level)) {
/* might need to increase the size
* of the stripe cache - default is 256
*/
@@ -1974,7 +1972,8 @@ int assemble_container_content(struct supertype *st, int mdfd,
int start_reshape;
char *avail;
int err;
- int is_raid456, is_clean, all_disks;
+ int is_clean, all_disks;
+ bool is_raid456;
if (sysfs_init(content, mdfd, NULL)) {
pr_err("Unable to initialize sysfs\n");
@@ -2107,7 +2106,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
content->array.state |= 1;
}
- is_raid456 = (content->array.level >= 4 && content->array.level <= 6);
+ is_raid456 = is_level456(content->array.level);
is_clean = content->array.state & 1;
if (enough(content->array.level, content->array.raid_disks,
diff --git a/Grow.c b/Grow.c
index f6efbc48..8c520d42 100644
--- a/Grow.c
+++ b/Grow.c
@@ -2944,7 +2944,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
}
md_get_array_info(fd, &array);
- if (level == 0 && (array.level >= 4 && array.level <= 6)) {
+ if (level == 0 && is_level456(array.level)) {
/* To convert to RAID0 we need to fail and
* remove any non-data devices. */
int found = 0;
diff --git a/Manage.c b/Manage.c
index f789e0c1..e5e6abe4 100644
--- a/Manage.c
+++ b/Manage.c
@@ -307,7 +307,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
* - unfreeze reshape
* - wait on 'sync_completed' for that point to be reached.
*/
- if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
+ if (mdi && is_level456(mdi->array.level) &&
sysfs_attribute_available(mdi, NULL, "sync_action") &&
sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
@@ -1679,6 +1679,7 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
+ struct mdinfo *info = NULL;
memset(st, 0, sizeof(*st));
@@ -1696,6 +1697,13 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
if (mdmon_running(st->devnm))
st->update_tail = &st->updates;
+ info = st->ss->container_content(st, subarray);
+
+ if (strncmp(update, "ppl", 3) == 0 && !is_level456(info->array.level)) {
+ pr_err("RWH policy ppl is supported only for raid4, raid5 and raid6.\n");
+ goto free_super;
+ }
+
rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
@@ -1711,7 +1719,9 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
subarray, dev);
- free_super:
+free_super:
+ if (info)
+ free(info);
st->ss->free_super(st);
close(fd);
diff --git a/mdadm.h b/mdadm.h
index d53df169..974415b9 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -796,6 +796,17 @@ static inline int is_fd_valid(int fd)
return (fd > -1);
}
+/**
+ * is_level456() - check whether given level is between inclusive 4 and 6.
+ * @level: level to check.
+ *
+ * Return: true if condition is met, false otherwise
+ */
+static inline bool is_level456(int level)
+{
+ return (level >= 4 && level <= 6);
+}
+
/**
* close_fd() - verify, close and unset file descriptor.
* @fd: pointer to file descriptor.
diff --git a/super0.c b/super0.c
index 61c9ec1d..37f595ed 100644
--- a/super0.c
+++ b/super0.c
@@ -683,7 +683,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
int parity = sb->level == 6 ? 2 : 1;
rv = 0;
- if (sb->level >= 4 && sb->level <= 6 &&
+ if (is_level456(sb->level) &&
sb->reshape_position % (
sb->new_chunk/512 *
(sb->raid_disks - sb->delta_disks - parity))) {
diff --git a/super1.c b/super1.c
index 3a0c69fd..71af860c 100644
--- a/super1.c
+++ b/super1.c
@@ -1530,8 +1530,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
* So we reject a revert-reshape unless the
* alignment is good.
*/
- if (__le32_to_cpu(sb->level) >= 4 &&
- __le32_to_cpu(sb->level) <= 6) {
+ if (is_level456(__le32_to_cpu(sb->level))) {
reshape_sectors =
__le64_to_cpu(sb->reshape_position);
reshape_chunk = __le32_to_cpu(sb->new_chunk);
--
2.38.1

@ -0,0 +1,115 @@
From 1fef0c6ff54c2710f75a239dd8a5e0ffb0068e86 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:10 +0100
Subject: [PATCH 26/41] mdadm.h: Introduce custom device policies
The approach proposed here is to test drive policies outside
validate_geometry() separately per every drive and add determined
policies to list. The implementation reuses dev_policy we have in
mdadm.
This concept addresses following problems:
- test drives if they fit together to criteria required by metadata
handler,
- test all drives assigned to the container even if some of them are not
target of the request, mdmon is free to use any drive in the same
container,
- extensibility, new policies can be added to handler easy,
- fix issues related to imsm controller domain verifying.
Add superswitch function. It is used in next patches.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.h | 54 ++++++++++++++++++++++++++++++++++++------------------
1 file changed, 36 insertions(+), 18 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index 39b86bd0..889f4a0f 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -940,6 +940,23 @@ struct reshape {
unsigned long long new_size; /* New size of array in sectors */
};
+/**
+ * struct dev_policy - Data structure for policy management.
+ * @next: pointer to next dev_policy.
+ * @name: policy name, category.
+ * @metadata: the metadata type it affects.
+ * @value: value of the policy.
+ *
+ * The functions to manipulate dev_policy lists do not free elements, so they must be statically
+ * allocated. @name and @metadata can be compared by address.
+ */
+typedef struct dev_policy {
+ struct dev_policy *next;
+ char *name;
+ const char *metadata;
+ const char *value;
+} dev_policy_t;
+
/* A superswitch provides entry point to a metadata handler.
*
* The superswitch primarily operates on some "metadata" that
@@ -1168,6 +1185,25 @@ extern struct superswitch {
char *subdev, unsigned long long *freesize,
int consistency_policy, int verbose);
+ /**
+ * test_and_add_drive_policies() - test new and add custom policies from metadata handler.
+ * @pols: list of currently recorded policies.
+ * @disk_fd: file descriptor of the device to check.
+ * @verbose: verbose flag.
+ *
+ * Used by IMSM to verify all drives in container/array, against requirements not recored
+ * in superblock, like controller type for IMSM. It should check all drives even if
+ * they are not actually used, because mdmon or kernel are free to use any drive assigned to
+ * container automatically.
+ *
+ * Generating and comparison methods belong to metadata handler. It is not mandatory to be
+ * implemented.
+ *
+ * Return: MDADM_STATUS_SUCCESS is expected on success.
+ */
+ mdadm_status_t (*test_and_add_drive_policies)(dev_policy_t **pols, int disk_fd,
+ const int verbose);
+
/* Return a linked list of 'mdinfo' structures for all arrays
* in the container. For non-containers, it is like
* getinfo_super with an allocated mdinfo.*/
@@ -1372,23 +1408,6 @@ extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep);
extern int must_be_container(int fd);
void wait_for(char *dev, int fd);
-/*
- * Data structures for policy management.
- * Each device can have a policy structure that lists
- * various name/value pairs each possibly with a metadata associated.
- * The policy list is sorted by name/value/metadata
- */
-struct dev_policy {
- struct dev_policy *next;
- char *name; /* None of these strings are allocated. They are
- * all just references to strings which are known
- * to exist elsewhere.
- * name and metadata can be compared by address equality.
- */
- const char *metadata;
- const char *value;
-};
-
extern char pol_act[], pol_domain[], pol_metadata[], pol_auto[];
/* iterate over the sublist starting at list, having the same
@@ -1430,7 +1449,6 @@ extern struct dev_policy *disk_policy(struct mdinfo *disk);
extern struct dev_policy *devid_policy(int devid);
extern void dev_policy_free(struct dev_policy *p);
-//extern void pol_new(struct dev_policy **pol, char *name, char *val, char *metadata);
extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata);
extern struct dev_policy *pol_find(struct dev_policy *pol, char *name);
--
2.40.1

@ -1,30 +0,0 @@
From 42e02e613fb0b4a2c0c0d984b9e6e2933875bb44 Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 22 Jul 2022 08:43:47 +0200
Subject: [PATCH 27/83] mdadm: Fix array size mismatch after grow
imsm_fix_size_mismatch() is invoked to fix the problem, but it couldn't
proceed due to migration check. This patch allows for intended behavior.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/super-intel.c b/super-intel.c
index 8ffe485c..76b947f5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11854,7 +11854,7 @@ static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
unsigned long long d_size = imsm_dev_size(dev);
int u_size;
- if (calc_size == d_size || dev->vol.migr_type == MIGR_GEN_MIGR)
+ if (calc_size == d_size)
continue;
/* There is a difference, confirm that imsm_dev_size is
--
2.38.1

@ -0,0 +1,144 @@
From 5a2e194cb31569880a26356b8594ddca6e3b3828 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:11 +0100
Subject: [PATCH 27/41] mdadm: test_and_add device policies implementation
Add support for three scenarios:
- obtaining array wide policies via fd,
- obtaining array wide policies via struct mdinfo,
- getting policies for particular drive from the request.
Add proper functions and make them extern. These functions are used
in next patches.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.h | 7 +++++
policy.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+)
diff --git a/mdadm.h b/mdadm.h
index 889f4a0f..af2bc714 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1452,6 +1452,13 @@ extern void dev_policy_free(struct dev_policy *p);
extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata);
extern struct dev_policy *pol_find(struct dev_policy *pol, char *name);
+extern mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols,
+ int fd, const int verbose);
+extern mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+ struct mdinfo *mdi, const int verbose);
+extern mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+ int array_fd, const int verbose);
+
enum policy_action {
act_default,
act_include,
diff --git a/policy.c b/policy.c
index eee9ef63..4b85f62d 100644
--- a/policy.c
+++ b/policy.c
@@ -397,6 +397,99 @@ struct dev_policy *path_policy(char **paths, char *type)
return pol;
}
+/**
+ * drive_test_and_add_policies() - get policies for drive and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @fd: device descriptor.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. Use metadata handler to get
+ * policies.
+ */
+mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols, int fd,
+ const int verbose)
+{
+ if (!st->ss->test_and_add_drive_policies)
+ return MDADM_STATUS_SUCCESS;
+
+ if (st->ss->test_and_add_drive_policies(pols, fd, verbose) == MDADM_STATUS_SUCCESS) {
+ /* After successful call list cannot be empty */
+ assert(*pols);
+ return MDADM_STATUS_SUCCESS;
+ }
+
+ return MDADM_STATUS_ERROR;
+}
+
+/**
+ * sysfs_test_and_add_policies() - get policies for mddev and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @mdi: mdinfo describes the MD array, must have GET_DISKS option.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. To get policies, all disks
+ * connected to mddev are analyzed.
+ */
+mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+ struct mdinfo *mdi, const int verbose)
+{
+ struct mdinfo *sd;
+
+ if (!st->ss->test_and_add_drive_policies)
+ return MDADM_STATUS_SUCCESS;
+
+ for (sd = mdi->devs; sd; sd = sd->next) {
+ char *devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
+ int fd = dev_open(devpath, O_RDONLY);
+ int rv;
+
+ if (!is_fd_valid(fd)) {
+ pr_err("Cannot open fd for %s\n", devpath);
+ return MDADM_STATUS_ERROR;
+ }
+
+ rv = drive_test_and_add_policies(st, pols, fd, verbose);
+ close(fd);
+
+ if (rv)
+ return MDADM_STATUS_ERROR;
+ }
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * mddev_test_and_add_policies() - get policies for mddev and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @array_fd: MD device descriptor.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. Use fd to extract disks.
+ */
+mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+ int array_fd, const int verbose)
+{
+ struct mdinfo *sra;
+ int ret;
+
+ if (!st->ss->test_and_add_drive_policies)
+ return MDADM_STATUS_SUCCESS;
+
+ sra = sysfs_read(array_fd, NULL, GET_DEVS);
+ if (!sra) {
+ pr_err("Cannot load sysfs for %s\n", fd2devnm(array_fd));
+ return MDADM_STATUS_ERROR;
+ }
+
+ ret = sysfs_test_and_add_drive_policies(st, pols, sra, verbose);
+
+ sysfs_free(sra);
+ return ret;
+}
+
void pol_add(struct dev_policy **pol,
char *name, char *val,
char *metadata)
--
2.40.1

@ -0,0 +1,124 @@
From f5a39b66f794322f30828389ddd488d17f578ad5 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:12 +0100
Subject: [PATCH 28/41] Create: Use device policies
Generate and compare policies, abort if policies do not match.
It is tested for both create modes, with container and disk list
specified directly. It is used if supertype supports it.
For a case when disk list is specified, container may contain more
devices, so additional check on container is done to analyze all disks.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Create.c | 31 +++++++++++++++++++++++++------
1 file changed, 25 insertions(+), 6 deletions(-)
diff --git a/Create.c b/Create.c
index 0b776266..4397ff49 100644
--- a/Create.c
+++ b/Create.c
@@ -497,6 +497,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
*/
int mdfd;
unsigned long long minsize = 0, maxsize = 0;
+ dev_policy_t *custom_pols = NULL;
char *mindisc = NULL;
char *maxdisc = NULL;
char *name = ident->name;
@@ -588,6 +589,9 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
first_missing = subdevs * 2;
second_missing = subdevs * 2;
insert_point = subdevs * 2;
+
+ if (mddev_test_and_add_drive_policies(st, &custom_pols, fd, 1))
+ exit(1);
}
}
if (fd >= 0)
@@ -739,7 +743,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
close(dfd);
exit(2);
}
- close(dfd);
+
info.array.working_disks++;
if (dnum < s->raiddisks && dv->disposition != 'j')
info.array.active_disks++;
@@ -812,6 +816,11 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
}
}
+ if (drive_test_and_add_policies(st, &custom_pols, dfd, 1))
+ exit(1);
+
+ close(dfd);
+
if (dv->disposition == 'j')
goto skip_size_check; /* skip write journal for size check */
@@ -886,6 +895,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
close(fd);
}
}
+
if (missing_disks == dnum && !have_container) {
pr_err("Subdevs can't be all missing\n");
return 1;
@@ -1140,25 +1150,30 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
goto abort_locked;
}
- if (did_default && c->verbose >= 0) {
+ if (did_default) {
if (is_subarray(info.text_version)) {
char devnm[MD_NAME_MAX];
struct mdinfo *mdi;
sysfs_get_container_devnm(&info, devnm);
- mdi = sysfs_read(-1, devnm, GET_VERSION);
+ mdi = sysfs_read(-1, devnm, GET_VERSION | GET_DEVS);
if (!mdi) {
pr_err("Cannot open sysfs for container %s\n", devnm);
goto abort_locked;
}
- pr_info("Creating array inside %s container /dev/%s\n", mdi->text_version,
- devnm);
+ if (sysfs_test_and_add_drive_policies(st, &custom_pols, mdi, 1))
+ goto abort_locked;
+
+ if (c->verbose >= 0)
+ pr_info("Creating array inside %s container /dev/%s\n",
+ mdi->text_version, devnm);
sysfs_free(mdi);
- } else
+ } else if (c->verbose >= 0) {
pr_info("Defaulting to version %s metadata\n", info.text_version);
+ }
}
map_update(&map, fd2devnm(mdfd), info.text_version,
@@ -1328,6 +1343,8 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
udev_unblock();
close(mdfd);
sysfs_uevent(&info, "change");
+ dev_policy_free(custom_pols);
+
return 0;
abort:
@@ -1339,5 +1356,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
if (mdfd >= 0)
close(mdfd);
+
+ dev_policy_free(custom_pols);
return 1;
}
--
2.40.1

@ -1,34 +0,0 @@
From 751757620afb25a4c02746bf8368a7b5f22352ec Mon Sep 17 00:00:00 2001
From: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Date: Fri, 22 Jul 2022 08:43:48 +0200
Subject: [PATCH 28/83] mdadm: Remove dead code in imsm_fix_size_mismatch
imsm_create_metadata_update_for_size_change() that returns u_size value
could return 0 in the past. As its behavior changed, and returned value
is always the size of imsm_update_size_change structure, check for
u_size is no longer needed.
Signed-off-by: Lukasz Florczak <lukasz.florczak@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 76b947f5..4ddfcf94 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11869,10 +11869,6 @@ static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
geo.size = d_size;
u_size = imsm_create_metadata_update_for_size_change(st, &geo,
&update);
- if (u_size < 1) {
- dprintf("imsm: Cannot prepare size change update\n");
- goto exit;
- }
imsm_update_metadata_locally(st, update, u_size);
if (st->update_tail) {
append_metadata_update(st, update, u_size);
--
2.38.1

@ -0,0 +1,60 @@
From 1251db34616bf4890d86664abc5186e9106e9073 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:13 +0100
Subject: [PATCH 29/41] Manage: check device policies in manage_add_external()
Only IMSM is going to use device policies so it is added to
manage_add_external(). Test policies before adding the drive to
container.
The change blocks adding new device to the container which already
contains not matching devices
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Manage.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Manage.c b/Manage.c
index 969d0ea9..96e5ee54 100644
--- a/Manage.c
+++ b/Manage.c
@@ -704,6 +704,7 @@ mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name
{
mdadm_status_t rv = MDADM_STATUS_ERROR;
char container_devpath[MD_NAME_MAX];
+ struct dev_policy *pols = NULL;
struct mdinfo new_mdi;
struct mdinfo *sra = NULL;
int container_fd;
@@ -722,6 +723,9 @@ mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name
0, 1))
goto out;
+ if (mddev_test_and_add_drive_policies(st, &pols, container_fd, 1))
+ goto out;
+
Kill(disk_name, NULL, 0, -1, 0);
disk_fd = dev_open(disk_name, O_RDWR | O_EXCL | O_DIRECT);
@@ -730,6 +734,9 @@ mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name
goto out;
}
+ if (drive_test_and_add_policies(st, &pols, disk_fd, 1))
+ goto out;
+
if (st->ss->add_to_super(st, disc, disk_fd, disk_name, INVALID_SECTORS))
goto out;
@@ -760,6 +767,7 @@ mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name
out:
close(container_fd);
+ dev_policy_free(pols);
if (sra)
sysfs_free(sra);
--
2.40.1

@ -1,40 +0,0 @@
From c8d1c398505b62d9129a4e711f17e4469f4327ff Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Thu, 14 Jul 2022 09:02:10 +0200
Subject: [PATCH 29/83] Monitor: use devname as char array instead of pointer
Device name wasn't filled properly due to incorrect use of strcpy.
Strcpy was used twice. Firstly to fill devname with "/dev/md/"
and then to add chosen name. First strcpy result was overwritten by
second one (as a result <device_name> instead of "/dev/md/<device_name>"
was assigned). This commit changes this implementation to use snprintf
and devname with fixed size.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 6ca1ebe5..a5b11ae2 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -190,9 +190,11 @@ int Monitor(struct mddev_dev *devlist,
if (mdlist->devname[0] == '/')
st->devname = xstrdup(mdlist->devname);
else {
- st->devname = xmalloc(8+strlen(mdlist->devname)+1);
- strcpy(strcpy(st->devname, "/dev/md/"),
- mdlist->devname);
+ /* length of "/dev/md/" + device name + terminating byte */
+ size_t _len = sizeof("/dev/md/") + strnlen(mdlist->devname, PATH_MAX);
+
+ st->devname = xcalloc(_len, sizeof(char));
+ snprintf(st->devname, _len, "/dev/md/%s", mdlist->devname);
}
if (!is_mddev(mdlist->devname))
return 1;
--
2.38.1

@ -0,0 +1,142 @@
From 51a9f2fc5e982f3bcbf88fe1bf30c0bf55bfd49c Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:14 +0100
Subject: [PATCH 30/41] Monitor, Incremental: use device policies
spare_criteria is expanded to contain policies which will be generated
by handler's get_spare_criteria() function. It provides a way to
test device for metadata specific policies earlier than during
add_do_super(), when device is already removed from previous
array/container for Monitor.
For Incremental, it ensures that all criteria are tested when trying
spare. It is not tested when device contains valid metadata.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Incremental.c | 2 +-
Monitor.c | 3 ++-
mdadm.h | 5 +++--
util.c | 13 +++++++++----
4 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index 66c2cc86..958ba9ba 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -865,7 +865,7 @@ mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, ch
goto out;
}
- if (!disk_fd_matches_criteria(disk_fd, &sc)) {
+ if (!disk_fd_matches_criteria(dup, disk_fd, &sc)) {
if (verbose > 1)
pr_err("Disk does not match spare criteria for %s\n", container_devname);
goto out;
diff --git a/Monitor.c b/Monitor.c
index 6b4560ae..9b016bc3 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1047,7 +1047,7 @@ static dev_t choose_spare(struct state *from, struct state *to,
test_partition_from_id(from->devid[d]))
continue;
- if (devid_matches_criteria(from->devid[d], sc) == false)
+ if (devid_matches_criteria(to->metadata, from->devid[d], sc) == false)
continue;
pol = devid_policy(from->devid[d]);
@@ -1195,6 +1195,7 @@ static void try_spare_migration(struct state *statelist)
}
}
domain_free(domlist);
+ dev_policy_free(sc.pols);
}
}
diff --git a/mdadm.h b/mdadm.h
index af2bc714..cfa11391 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -433,6 +433,7 @@ struct spare_criteria {
bool criteria_set;
unsigned long long min_size;
unsigned int sector_size;
+ struct dev_policy *pols;
};
typedef enum mdadm_status {
@@ -1734,8 +1735,8 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
#define INCR_ALREADY 4
#define INCR_YES 8
-extern bool devid_matches_criteria(dev_t devid, struct spare_criteria *sc);
-extern bool disk_fd_matches_criteria(int disk_fd, struct spare_criteria *sc);
+extern bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc);
+extern bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc);
extern struct mdinfo *container_choose_spares(struct supertype *st,
struct spare_criteria *criteria,
struct domainlist *domlist,
diff --git a/util.c b/util.c
index 041e78cf..05ad3343 100644
--- a/util.c
+++ b/util.c
@@ -2056,12 +2056,13 @@ unsigned int __invalid_size_argument_for_IOC = 0;
/**
* disk_fd_matches_criteria() - check if device matches spare criteria.
+ * @st: supertype, not NULL.
* @disk_fd: file descriptor of the disk.
* @sc: criteria to test.
*
* Return: true if disk matches criteria, false otherwise.
*/
-bool disk_fd_matches_criteria(int disk_fd, struct spare_criteria *sc)
+bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc)
{
unsigned int dev_sector_size = 0;
unsigned long long dev_size = 0;
@@ -2076,17 +2077,21 @@ bool disk_fd_matches_criteria(int disk_fd, struct spare_criteria *sc)
sc->sector_size != dev_sector_size)
return false;
+ if (drive_test_and_add_policies(st, &sc->pols, disk_fd, 0))
+ return false;
+
return true;
}
/**
* devid_matches_criteria() - check if device referenced by devid matches spare criteria.
+ * @st: supertype, not NULL.
* @devid: devid of the device to check.
* @sc: criteria to test.
*
* Return: true if disk matches criteria, false otherwise.
*/
-bool devid_matches_criteria(dev_t devid, struct spare_criteria *sc)
+bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc)
{
char buf[NAME_MAX];
bool ret;
@@ -2102,7 +2107,7 @@ bool devid_matches_criteria(dev_t devid, struct spare_criteria *sc)
return false;
/* Error code inherited */
- ret = disk_fd_matches_criteria(fd, sc);
+ ret = disk_fd_matches_criteria(st, fd, sc);
close(fd);
return ret;
@@ -2137,7 +2142,7 @@ struct mdinfo *container_choose_spares(struct supertype *st,
if (d->disk.state == 0) {
dev_t dev = makedev(d->disk.major,d->disk.minor);
- found = devid_matches_criteria(dev, criteria);
+ found = devid_matches_criteria(st, dev, criteria);
/* check if domain matches */
if (found && domlist) {
--
2.40.1

@ -1,133 +0,0 @@
From 84d969be8f6d8a345b75f558fad26e4f62a558f6 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Thu, 14 Jul 2022 09:02:11 +0200
Subject: [PATCH 30/83] Monitor: use snprintf to fill device name
Safe string functions are propagated in Monitor.c.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 37 ++++++++++++++-----------------------
1 file changed, 14 insertions(+), 23 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index a5b11ae2..93f36ac0 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -33,8 +33,8 @@
#endif
struct state {
- char *devname;
- char devnm[32]; /* to sync with mdstat info */
+ char devname[MD_NAME_MAX + sizeof("/dev/md/")]; /* length of "/dev/md/" + device name + terminating byte*/
+ char devnm[MD_NAME_MAX]; /* to sync with mdstat info */
unsigned int utime;
int err;
char *spare_group;
@@ -45,9 +45,9 @@ struct state {
int devstate[MAX_DISKS];
dev_t devid[MAX_DISKS];
int percent;
- char parent_devnm[32]; /* For subarray, devnm of parent.
- * For others, ""
- */
+ char parent_devnm[MD_NAME_MAX]; /* For subarray, devnm of parent.
+ * For others, ""
+ */
struct supertype *metadata;
struct state *subarray;/* for a container it is a link to first subarray
* for a subarray it is a link to next subarray
@@ -187,15 +187,8 @@ int Monitor(struct mddev_dev *devlist,
continue;
st = xcalloc(1, sizeof *st);
- if (mdlist->devname[0] == '/')
- st->devname = xstrdup(mdlist->devname);
- else {
- /* length of "/dev/md/" + device name + terminating byte */
- size_t _len = sizeof("/dev/md/") + strnlen(mdlist->devname, PATH_MAX);
-
- st->devname = xcalloc(_len, sizeof(char));
- snprintf(st->devname, _len, "/dev/md/%s", mdlist->devname);
- }
+ snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"),
+ "/dev/md/%s", basename(mdlist->devname));
if (!is_mddev(mdlist->devname))
return 1;
st->next = statelist;
@@ -218,7 +211,7 @@ int Monitor(struct mddev_dev *devlist,
st = xcalloc(1, sizeof *st);
mdlist = conf_get_ident(dv->devname);
- st->devname = xstrdup(dv->devname);
+ snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", dv->devname);
st->next = statelist;
st->devnm[0] = 0;
st->percent = RESYNC_UNKNOWN;
@@ -301,7 +294,6 @@ int Monitor(struct mddev_dev *devlist,
for (stp = &statelist; (st = *stp) != NULL; ) {
if (st->from_auto && st->err > 5) {
*stp = st->next;
- free(st->devname);
free(st->spare_group);
free(st);
} else
@@ -554,7 +546,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
goto disappeared;
if (st->devnm[0] == 0)
- strcpy(st->devnm, fd2devnm(fd));
+ snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
for (mse2 = mdstat; mse2; mse2 = mse2->next)
if (strcmp(mse2->devnm, st->devnm) == 0) {
@@ -684,7 +676,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
strncmp(mse->metadata_version, "external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
- strcpy(st->parent_devnm, mse->metadata_version + 10);
+ snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
sl = strchr(st->parent_devnm, '/');
if (sl)
*sl = 0;
@@ -772,14 +764,13 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
continue;
}
- st->devname = xstrdup(name);
+ snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", name);
if ((fd = open(st->devname, O_RDONLY)) < 0 ||
md_get_array_info(fd, &array) < 0) {
/* no such array */
if (fd >= 0)
close(fd);
put_md_name(st->devname);
- free(st->devname);
if (st->metadata) {
st->metadata->ss->free_super(st->metadata);
free(st->metadata);
@@ -791,7 +782,7 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
st->next = *statelist;
st->err = 1;
st->from_auto = 1;
- strcpy(st->devnm, mse->devnm);
+ snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
st->percent = RESYNC_UNKNOWN;
st->expected_spares = -1;
if (mse->metadata_version &&
@@ -799,8 +790,8 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
"external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
- strcpy(st->parent_devnm,
- mse->metadata_version+10);
+ snprintf(st->parent_devnm, MD_NAME_MAX,
+ "%s", mse->metadata_version + 10);
sl = strchr(st->parent_devnm, '/');
*sl = 0;
} else
--
2.38.1

@ -1,42 +0,0 @@
From 14ae4c37bce9a53da08d59d6c2d7e0946e9c9f47 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:06 -0600
Subject: [PATCH 31/83] Makefile: Don't build static build with everything and
everything-test
Running the test suite requires building everything, but it seems to be
difficult to build the static version of mdadm now seeing there
is no readily available static udev library.
The test suite doesn't need the static binary so just don't build it
with the everything or everything-test targets.
Leave the mdadm.static and install-static targets in place in case
someone still has a use case for the static binary.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index bf126033..ec1f99ed 100644
--- a/Makefile
+++ b/Makefile
@@ -182,9 +182,9 @@ check_rundir:
echo "***** or set CHECK_RUN_DIR=0"; exit 1; \
fi
-everything: all mdadm.static swap_super test_stripe raid6check \
+everything: all swap_super test_stripe raid6check \
mdadm.Os mdadm.O2 man
-everything-test: all mdadm.static swap_super test_stripe \
+everything-test: all swap_super test_stripe \
mdadm.Os mdadm.O2 man
# mdadm.uclibc doesn't work on x86-64
# mdadm.tcc doesn't work..
--
2.38.1

@ -0,0 +1,187 @@
From e21aea08eb706939a38f7dc5cf9509a9afd45f8a Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:15 +0100
Subject: [PATCH 31/41] imsm: test_and_add_device_policies() implementation
This patch removes get_disk_controller_domain_imsm() in favour of
test_and_add_device_policies_imsm(). It is used by
create, add and mdmonitor.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
platform-intel.h | 1 -
super-intel.c | 123 ++++++++++++++++++++++++++++++++++-------------
2 files changed, 90 insertions(+), 34 deletions(-)
diff --git a/platform-intel.h b/platform-intel.h
index ce29d3da..3c2bc595 100644
--- a/platform-intel.h
+++ b/platform-intel.h
@@ -262,7 +262,6 @@ int disk_attached_to_hba(int fd, const char *hba_path);
int devt_attached_to_hba(dev_t dev, const char *hba_path);
char *devt_to_devpath(dev_t dev, int dev_level, char *buf);
int path_attached_to_hba(const char *disk_path, const char *hba_path);
-const char *get_sys_dev_type(enum sys_dev_type);
const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id);
const struct imsm_orom *get_orom_by_device_id(__u16 device_id);
struct sys_dev *device_by_id(__u16 device_id);
diff --git a/super-intel.c b/super-intel.c
index 7ad391ac..77140455 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11220,39 +11220,90 @@ abort:
return retval;
}
-static char disk_by_path[] = "/dev/disk/by-path/";
-
-static const char *imsm_get_disk_controller_domain(const char *path)
-{
- char disk_path[PATH_MAX];
- char *drv=NULL;
- struct stat st;
-
- strncpy(disk_path, disk_by_path, PATH_MAX);
- strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
- if (stat(disk_path, &st) == 0) {
- struct sys_dev* hba;
- char *path;
-
- path = devt_to_devpath(st.st_rdev, 1, NULL);
- if (path == NULL)
- return "unknown";
- hba = find_disk_attached_hba(-1, path);
- if (hba && hba->type == SYS_DEV_SAS)
- drv = "isci";
- else if (hba && (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD))
- drv = "ahci";
- else if (hba && hba->type == SYS_DEV_VMD)
- drv = "vmd";
- else if (hba && hba->type == SYS_DEV_NVME)
- drv = "nvme";
- else
- drv = "unknown";
- dprintf("path: %s hba: %s attached: %s\n",
- path, (hba) ? hba->path : "NULL", drv);
- free(path);
+/**
+ * test_and_add_drive_controller_policy_imsm() - add disk controller to policies list.
+ * @type: Policy type to search on list.
+ * @pols: List of currently recorded policies.
+ * @disk_fd: File descriptor of the device to check.
+ * @hba: The hba disk is attached, could be NULL if verification is disabled.
+ * @verbose: verbose flag.
+ *
+ * IMSM cares about drive physical placement. If @hba is not set, it adds unknown policy.
+ * If there is no controller policy on pols we are free to add first one. If there is a policy then,
+ * new must be the same - no controller mixing allowed.
+ */
+static mdadm_status_t
+test_and_add_drive_controller_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd,
+ struct sys_dev *hba, const int verbose)
+{
+ const char *controller_policy = get_sys_dev_type(SYS_DEV_UNKNOWN);
+ struct dev_policy *pol = pol_find(*pols, (char *)type);
+ char devname[MAX_RAID_SERIAL_LEN];
+
+ if (hba)
+ controller_policy = get_sys_dev_type(hba->type);
+
+ if (!pol) {
+ pol_add(pols, (char *)type, (char *)controller_policy, "imsm");
+ return MDADM_STATUS_SUCCESS;
}
- return drv;
+
+ if (strcmp(pol->value, controller_policy) == 0)
+ return MDADM_STATUS_SUCCESS;
+
+ fd2devname(disk_fd, devname);
+ pr_vrb("Intel(R) raid controller \"%s\" found for %s, but \"%s\" was detected earlier\n",
+ controller_policy, devname, pol->value);
+ pr_vrb("Disks under different controllers cannot be used, aborting\n");
+
+ return MDADM_STATUS_ERROR;
+}
+
+struct imsm_drive_policy {
+ char *type;
+ mdadm_status_t (*test_and_add_drive_policy)(const char * const type,
+ struct dev_policy **pols, int disk_fd,
+ struct sys_dev *hba, const int verbose);
+};
+
+struct imsm_drive_policy imsm_policies[] = {
+ {"controller", test_and_add_drive_controller_policy_imsm},
+};
+
+mdadm_status_t test_and_add_drive_policies_imsm(struct dev_policy **pols, int disk_fd,
+ const int verbose)
+{
+ struct imsm_drive_policy *imsm_pol;
+ struct sys_dev *hba = NULL;
+ char path[PATH_MAX];
+ mdadm_status_t ret;
+ unsigned int i;
+
+ /* If imsm platform verification is disabled, do not search for hba. */
+ if (check_no_platform() != 1) {
+ if (!diskfd_to_devpath(disk_fd, 1, path)) {
+ pr_vrb("IMSM: Failed to retrieve device path by file descriptor.\n");
+ return MDADM_STATUS_ERROR;
+ }
+
+ hba = find_disk_attached_hba(disk_fd, path);
+ if (!hba) {
+ pr_vrb("IMSM: Failed to find hba for %s\n", path);
+ return MDADM_STATUS_ERROR;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(imsm_policies); i++) {
+ imsm_pol = &imsm_policies[i];
+
+ ret = imsm_pol->test_and_add_drive_policy(imsm_pol->type, pols, disk_fd, hba,
+ verbose);
+ if (ret != MDADM_STATUS_SUCCESS)
+ /* Inherit error code */
+ return ret;
+ }
+
+ return MDADM_STATUS_SUCCESS;
}
/**
@@ -11280,6 +11331,7 @@ mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path,
if (mddev_path) {
int fd = open(mddev_path, O_RDONLY);
+ mdadm_status_t rv;
if (!is_fd_valid(fd))
return MDADM_STATUS_ERROR;
@@ -11291,7 +11343,12 @@ mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path,
}
free_superblock = true;
}
+
+ rv = mddev_test_and_add_drive_policies(st, &c->pols, fd, 0);
close(fd);
+
+ if (rv != MDADM_STATUS_SUCCESS)
+ goto out;
}
super = st->sb;
@@ -13026,7 +13083,7 @@ struct superswitch super_imsm = {
.update_subarray = update_subarray_imsm,
.load_container = load_container_imsm,
.default_geometry = default_geometry_imsm,
- .get_disk_controller_domain = imsm_get_disk_controller_domain,
+ .test_and_add_drive_policies = test_and_add_drive_policies_imsm,
.reshape_super = imsm_reshape_super,
.manage_reshape = imsm_manage_reshape,
.recover_backup = recover_backup_imsm,
--
2.40.1

@ -1,141 +0,0 @@
From 679bd9508a30b2a0a1baecc9a21dd6c7d8d8d7dc Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:07 -0600
Subject: [PATCH 32/83] DDF: Cleanup validate_geometry_ddf_container()
Move the function up so that the function declaration is not necessary
and remove the unused arguments to the function.
No functional changes are intended but will help with a bug fix in the
next patch.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
super-ddf.c | 88 ++++++++++++++++++++++++-----------------------------
1 file changed, 39 insertions(+), 49 deletions(-)
diff --git a/super-ddf.c b/super-ddf.c
index abbc8b09..9d867f69 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -503,13 +503,6 @@ struct ddf_super {
static int load_super_ddf_all(struct supertype *st, int fd,
void **sbp, char *devname);
static int get_svd_state(const struct ddf_super *, const struct vcl *);
-static int
-validate_geometry_ddf_container(struct supertype *st,
- int level, int layout, int raiddisks,
- int chunk, unsigned long long size,
- unsigned long long data_offset,
- char *dev, unsigned long long *freesize,
- int verbose);
static int validate_geometry_ddf_bvd(struct supertype *st,
int level, int layout, int raiddisks,
@@ -3322,6 +3315,42 @@ static int reserve_space(struct supertype *st, int raiddisks,
return 1;
}
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int raiddisks,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ unsigned long long ldsize;
+
+ if (level != LEVEL_CONTAINER)
+ return 0;
+ if (!dev)
+ return 1;
+
+ fd = dev_open(dev, O_RDONLY|O_EXCL);
+ if (fd < 0) {
+ if (verbose)
+ pr_err("ddf: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ if (!get_dev_size(fd, dev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ if (freesize) {
+ *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
+ if (*freesize == 0)
+ return 0;
+ }
+
+ return 1;
+}
+
static int validate_geometry_ddf(struct supertype *st,
int level, int layout, int raiddisks,
int *chunk, unsigned long long size,
@@ -3347,11 +3376,9 @@ static int validate_geometry_ddf(struct supertype *st,
level = LEVEL_CONTAINER;
if (level == LEVEL_CONTAINER) {
/* Must be a fresh device to add to a container */
- return validate_geometry_ddf_container(st, level, layout,
- raiddisks, *chunk,
- size, data_offset, dev,
- freesize,
- verbose);
+ return validate_geometry_ddf_container(st, level, raiddisks,
+ data_offset, dev,
+ freesize, verbose);
}
if (!dev) {
@@ -3449,43 +3476,6 @@ static int validate_geometry_ddf(struct supertype *st,
return 1;
}
-static int
-validate_geometry_ddf_container(struct supertype *st,
- int level, int layout, int raiddisks,
- int chunk, unsigned long long size,
- unsigned long long data_offset,
- char *dev, unsigned long long *freesize,
- int verbose)
-{
- int fd;
- unsigned long long ldsize;
-
- if (level != LEVEL_CONTAINER)
- return 0;
- if (!dev)
- return 1;
-
- fd = dev_open(dev, O_RDONLY|O_EXCL);
- if (fd < 0) {
- if (verbose)
- pr_err("ddf: Cannot open %s: %s\n",
- dev, strerror(errno));
- return 0;
- }
- if (!get_dev_size(fd, dev, &ldsize)) {
- close(fd);
- return 0;
- }
- close(fd);
- if (freesize) {
- *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
- if (*freesize == 0)
- return 0;
- }
-
- return 1;
-}
-
static int validate_geometry_ddf_bvd(struct supertype *st,
int level, int layout, int raiddisks,
int *chunk, unsigned long long size,
--
2.38.1

@ -0,0 +1,75 @@
From e492d2ac143e7f02d6c262130d42a4422e8295d5 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:16 +0100
Subject: [PATCH 32/41] mdadm: drop get_disk_controller_domain()
This function is unused now. Drop it.
Controller for IMSM is a device policy and is separated from user defined
domains.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.h | 15 ---------------
policy.c | 13 -------------
2 files changed, 28 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index cfa11391..3fedca48 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1286,21 +1286,6 @@ extern struct superswitch {
*/
struct mdinfo *(*activate_spare)(struct active_array *a,
struct metadata_update **updates);
- /*
- * Return statically allocated string that represents metadata specific
- * controller domain of the disk. The domain is used in disk domain
- * matching functions. Disks belong to the same domain if the they have
- * the same domain from mdadm.conf and belong the same metadata domain.
- * Returning NULL or not providing this handler means that metadata
- * does not distinguish the differences between disks that belong to
- * different controllers. They are in the domain specified by
- * configuration file (mdadm.conf).
- * In case when the metadata has the notion of domains based on disk
- * it shall return NULL for disks that do not belong to the controller
- * the supported domains. Such disks will form another domain and won't
- * be mixed with supported ones.
- */
- const char *(*get_disk_controller_domain)(const char *path);
/* for external backup area */
int (*recover_backup)(struct supertype *st, struct mdinfo *info);
diff --git a/policy.c b/policy.c
index 4b85f62d..404f9b5d 100644
--- a/policy.c
+++ b/policy.c
@@ -365,7 +365,6 @@ struct dev_policy *path_policy(char **paths, char *type)
{
struct pol_rule *rules;
struct dev_policy *pol = NULL;
- int i;
rules = config_rules;
@@ -380,18 +379,6 @@ struct dev_policy *path_policy(char **paths, char *type)
rules = rules->next;
}
- /* Now add any metadata-specific internal knowledge
- * about this path
- */
- for (i=0; paths && paths[0] && superlist[i]; i++)
- if (superlist[i]->get_disk_controller_domain) {
- const char *d =
- superlist[i]->get_disk_controller_domain(
- paths[0]);
- if (d)
- pol_new(&pol, pol_domain, d, superlist[i]->name);
- }
-
pol_sort(&pol);
pol_dedup(pol);
return pol;
--
2.40.1

@ -1,49 +0,0 @@
From 2b93288a5650bb811932836f67f30d63c5ddcfbd Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:08 -0600
Subject: [PATCH 33/83] DDF: Fix NULL pointer dereference in
validate_geometry_ddf()
A relatively recent patch added a call to validate_geometry() in
Manage_add() that has level=LEVEL_CONTAINER and chunk=NULL.
This causes some ddf tests to segfault which aborts the test suite.
To fix this, avoid dereferencing chunk when the level is
LEVEL_CONTAINER or LEVEL_NONE.
Fixes: 1f5d54a06df0 ("Manage: Call validate_geometry when adding drive to external container")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
super-ddf.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super-ddf.c b/super-ddf.c
index 9d867f69..949e7d15 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -3369,9 +3369,6 @@ static int validate_geometry_ddf(struct supertype *st,
* If given BVDs, we make an SVD, changing all the GUIDs in the process.
*/
- if (*chunk == UnSet)
- *chunk = DEFAULT_CHUNK;
-
if (level == LEVEL_NONE)
level = LEVEL_CONTAINER;
if (level == LEVEL_CONTAINER) {
@@ -3381,6 +3378,9 @@ static int validate_geometry_ddf(struct supertype *st,
freesize, verbose);
}
+ if (*chunk == UnSet)
+ *chunk = DEFAULT_CHUNK;
+
if (!dev) {
mdu_array_info_t array = {
.level = level,
--
2.38.1

@ -0,0 +1,42 @@
From 933bb500b80cca6f4e9237382f7d8ac852978471 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Thu, 29 Feb 2024 12:52:17 +0100
Subject: [PATCH 33/41] Revert "policy.c: Avoid to take spare without defined
domain by imsm"
This reverts commit 3bf9495270d7 ("policy.c: Avoid to take spare without
defined domain by imsm").
IMSM does not require to be special now because it doesn't create disk
controller domain.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
policy.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/policy.c b/policy.c
index 404f9b5d..dfaafdc0 100644
--- a/policy.c
+++ b/policy.c
@@ -759,7 +759,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
* 1: has domains, all match
*/
int found_any = -1;
- int has_one_domain = 1;
struct dev_policy *p;
pol = pol_find(pol, pol_domain);
@@ -769,9 +768,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
dom = dom->next;
if (!dom || strcmp(dom->dom, p->value) != 0)
return 0;
- if (has_one_domain && metadata && strcmp(metadata, "imsm") == 0)
- found_any = -1;
- has_one_domain = 0;
}
return found_any;
}
--
2.40.1

@ -1,85 +0,0 @@
From 548e9b916f86c06e2cdb50d8f49633f9bec66c7e Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:09 -0600
Subject: [PATCH 34/83] mdadm/Grow: Fix use after close bug by closing after
fork
The test 07reshape-grow fails most of the time. But it succeeds around
1 in 5 times. When it does succeed, it causes the tests to die because
mdadm has segfaulted.
The segfault was caused by mdadm attempting to repoen a file
descriptor that was already closed. The backtrace of the segfault
was:
#0 __strncmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:101
#1 0x000056146e31d44b in devnm2devid (devnm=0x0) at util.c:956
#2 0x000056146e31dab4 in open_dev_flags (devnm=0x0, flags=0)
at util.c:1072
#3 0x000056146e31db22 in open_dev (devnm=0x0) at util.c:1079
#4 0x000056146e3202e8 in reopen_mddev (mdfd=4) at util.c:2244
#5 0x000056146e329f36 in start_array (mdfd=4,
mddev=0x7ffc55342450 "/dev/md0", content=0x7ffc55342860,
st=0x56146fc78660, ident=0x7ffc55342f70, best=0x56146fc6f5d0,
bestcnt=10, chosen_drive=0, devices=0x56146fc706b0, okcnt=5,
sparecnt=0, rebuilding_cnt=0, journalcnt=0, c=0x7ffc55342e90,
clean=1, avail=0x56146fc78720 "\001\001\001\001\001",
start_partial_ok=0, err_ok=0, was_forced=0)
at Assemble.c:1206
#6 0x000056146e32c36e in Assemble (st=0x56146fc78660,
mddev=0x7ffc55342450 "/dev/md0", ident=0x7ffc55342f70,
devlist=0x56146fc6e2d0, c=0x7ffc55342e90)
at Assemble.c:1914
#7 0x000056146e312ac9 in main (argc=11, argv=0x7ffc55343238)
at mdadm.c:1510
The file descriptor was closed early in Grow_continue(). The noted commit
moved the close() call to close the fd above the fork which caused the
parent process to return with a closed fd.
This meant reshape_array() and Grow_continue() would return in the parent
with the fd forked. The fd would eventually be passed to reopen_mddev()
which returned an unhandled NULL from fd2devnm() which would then be
dereferenced in devnm2devid.
Fix this by moving the close() call below the fork. This appears to
fix the 07revert-grow test. While we're at it, switch to using
close_fd() to invalidate the file descriptor.
Fixes: 77b72fa82813 ("mdadm/Grow: prevent md's fd from being occupied during delayed time")
Cc: Alex Wu <alexwu@synology.com>
Cc: BingJing Chang <bingjingc@synology.com>
Cc: Danny Shih <dannyshih@synology.com>
Cc: ChangSyun Peng <allenpeng@synology.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
Grow.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 8c520d42..97f22c75 100644
--- a/Grow.c
+++ b/Grow.c
@@ -3514,7 +3514,6 @@ started:
return 0;
}
- close(fd);
/* Now we just need to kick off the reshape and watch, while
* handling backups of the data...
* This is all done by a forked background process.
@@ -3535,6 +3534,9 @@ started:
break;
}
+ /* Close unused file descriptor in the forked process */
+ close_fd(&fd);
+
/* If another array on the same devices is busy, the
* reshape will wait for them. This would mean that
* the first section that we suspend will stay suspended
--
2.38.1

@ -0,0 +1,307 @@
From 9c63130e8974033969569fb9d0b373d1d1478cf7 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Wed, 6 Mar 2024 13:45:53 +0100
Subject: [PATCH 34/41] mdadm: remove inventory file
It is a file with repo content list. It is outdated already.
Remove it.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
inventory | 284 ------------------------------------------------------
1 file changed, 284 deletions(-)
delete mode 100755 inventory
diff --git a/inventory b/inventory
deleted file mode 100755
index c4801b49..00000000
--- a/inventory
+++ /dev/null
@@ -1,284 +0,0 @@
-
-.gitignore
-ANNOUNCE-3.0
-ANNOUNCE-3.0.1
-ANNOUNCE-3.0.2
-ANNOUNCE-3.0.3
-ANNOUNCE-3.1
-ANNOUNCE-3.1.1
-ANNOUNCE-3.1.2
-ANNOUNCE-3.1.3
-ANNOUNCE-3.1.4
-ANNOUNCE-3.1.5
-ANNOUNCE-3.2
-ANNOUNCE-3.2.1
-ANNOUNCE-3.2.2
-ANNOUNCE-3.2.3
-ANNOUNCE-3.2.4
-ANNOUNCE-3.2.5
-ANNOUNCE-3.2.6
-ANNOUNCE-3.3
-ANNOUNCE-3.3.1
-ANNOUNCE-3.3.2
-ANNOUNCE-3.3.3
-ANNOUNCE-3.3.4
-ANNOUNCE-3.4
-ANNOUNCE-4.0
-ANNOUNCE-4.1
-ANNOUNCE-4.2
-Assemble.c
-Build.c
-COPYING
-ChangeLog
-Create.c
-Detail.c
-Dump.c
-Examine.c
-Grow.c
-INSTALL
-Incremental.c
-Kill.c
-Makefile
-Manage.c
-Monitor.c
-Query.c
-README.initramfs
-ReadMe.c
-TODO
-bitmap.c
-bitmap.h
-clustermd_tests/
-clustermd_tests/00r10_Create
-clustermd_tests/00r1_Create
-clustermd_tests/01r10_Grow_bitmap-switch
-clustermd_tests/01r10_Grow_resize
-clustermd_tests/01r1_Grow_add
-clustermd_tests/01r1_Grow_bitmap-switch
-clustermd_tests/01r1_Grow_resize
-clustermd_tests/02r10_Manage_add
-clustermd_tests/02r10_Manage_add-spare
-clustermd_tests/02r10_Manage_re-add
-clustermd_tests/02r1_Manage_add
-clustermd_tests/02r1_Manage_add-spare
-clustermd_tests/02r1_Manage_re-add
-clustermd_tests/03r10_switch-recovery
-clustermd_tests/03r10_switch-resync
-clustermd_tests/03r1_switch-recovery
-clustermd_tests/03r1_switch-resync
-clustermd_tests/cluster_conf
-clustermd_tests/func.sh
-config.c
-coverity-gcc-hack.h
-crc32.c
-crc32.h
-crc32c.c
-dlink.c
-dlink.h
-external-reshape-design.txt
-inventory
-lib.c
-makedist
-managemon.c
-mapfile.c
-maps.c
-md.4
-md5.h
-md_p.h
-md_u.h
-mdadm.8.in
-mdadm.c
-mdadm.conf-example
-mdadm.conf.5
-mdadm.h
-mdadm.spec
-mdmon-design.txt
-mdmon.8
-mdmon.c
-mdmon.h
-mdopen.c
-mdstat.c
-misc/
-misc/mdcheck
-misc/syslog-events
-mkinitramfs
-monitor.c
-msg.c
-msg.h
-part.h
-platform-intel.c
-platform-intel.h
-policy.c
-probe_roms.c
-probe_roms.h
-pwgr.c
-raid5extend.c
-raid6check.8
-raid6check.c
-restripe.c
-sg_io.c
-sha1.c
-sha1.h
-super-ddf.c
-super-gpt.c
-super-intel.c
-super-mbr.c
-super0.c
-super1.c
-swap_super.c
-sysfs.c
-systemd/
-systemd/SUSE-mdadm_env.sh
-systemd/mdadm-grow-continue@.service
-systemd/mdadm-last-resort@.service
-systemd/mdadm-last-resort@.timer
-systemd/mdadm.shutdown
-systemd/mdcheck_continue.service
-systemd/mdcheck_continue.timer
-systemd/mdcheck_start.service
-systemd/mdcheck_start.timer
-systemd/mdmon@.service
-systemd/mdmonitor-oneshot.service
-systemd/mdmonitor-oneshot.timer
-systemd/mdmonitor.service
-test
-tests/
-tests/00linear
-tests/00multipath
-tests/00names
-tests/00raid0
-tests/00raid1
-tests/00raid10
-tests/00raid4
-tests/00raid5
-tests/00raid6
-tests/00readonly
-tests/01r1fail
-tests/01r5fail
-tests/01r5integ
-tests/01raid6integ
-tests/01replace
-tests/02lineargrow
-tests/02r1add
-tests/02r1grow
-tests/02r5grow
-tests/02r6grow
-tests/03assem-incr
-tests/03r0assem
-tests/03r5assem
-tests/03r5assem-failed
-tests/03r5assemV1
-tests/04r0update
-tests/04r1update
-tests/04r5swap
-tests/04update-metadata
-tests/04update-uuid
-tests/05r1-add-internalbitmap
-tests/05r1-add-internalbitmap-v1a
-tests/05r1-add-internalbitmap-v1b
-tests/05r1-add-internalbitmap-v1c
-tests/05r1-bitmapfile
-tests/05r1-failfast
-tests/05r1-grow-external
-tests/05r1-grow-internal
-tests/05r1-grow-internal-1
-tests/05r1-internalbitmap
-tests/05r1-internalbitmap-v1a
-tests/05r1-internalbitmap-v1b
-tests/05r1-internalbitmap-v1c
-tests/05r1-n3-bitmapfile
-tests/05r1-re-add
-tests/05r1-re-add-nosuper
-tests/05r1-remove-internalbitmap
-tests/05r1-remove-internalbitmap-v1a
-tests/05r1-remove-internalbitmap-v1b
-tests/05r1-remove-internalbitmap-v1c
-tests/05r5-bitmapfile
-tests/05r5-internalbitmap
-tests/05r6-bitmapfile
-tests/05r6tor0
-tests/06name
-tests/06sysfs
-tests/06wrmostly
-tests/07autoassemble
-tests/07autodetect
-tests/07changelevelintr
-tests/07changelevels
-tests/07layouts
-tests/07reshape5intr
-tests/07revert-grow
-tests/07revert-inplace
-tests/07revert-shrink
-tests/07testreshape5
-tests/09imsm-assemble
-tests/09imsm-create-fail-rebuild
-tests/09imsm-overlap
-tests/10ddf-assemble-missing
-tests/10ddf-create
-tests/10ddf-create-fail-rebuild
-tests/10ddf-fail-create-race
-tests/10ddf-fail-readd
-tests/10ddf-fail-readd-readonly
-tests/10ddf-fail-spare
-tests/10ddf-fail-stop-readd
-tests/10ddf-fail-twice
-tests/10ddf-fail-two-spares
-tests/10ddf-geometry
-tests/10ddf-incremental-wrong-order
-tests/10ddf-sudden-degraded
-tests/11spare-migration
-tests/12imsm-r0_2d-grow-r0_3d
-tests/12imsm-r0_2d-grow-r0_4d
-tests/12imsm-r0_2d-grow-r0_5d
-tests/12imsm-r0_3d-grow-r0_4d
-tests/12imsm-r5_3d-grow-r5_4d
-tests/12imsm-r5_3d-grow-r5_5d
-tests/13imsm-r0_r0_2d-grow-r0_r0_4d
-tests/13imsm-r0_r0_2d-grow-r0_r0_5d
-tests/13imsm-r0_r0_3d-grow-r0_r0_4d
-tests/13imsm-r0_r5_3d-grow-r0_r5_4d
-tests/13imsm-r0_r5_3d-grow-r0_r5_5d
-tests/13imsm-r5_r0_3d-grow-r5_r0_4d
-tests/13imsm-r5_r0_3d-grow-r5_r0_5d
-tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d
-tests/14imsm-r0_3d_no_spares-migrate-r5_3d
-tests/14imsm-r0_r0_2d-takeover-r10_4d
-tests/14imsm-r10_4d-grow-r10_5d
-tests/14imsm-r10_r5_4d-takeover-r0_2d
-tests/14imsm-r1_2d-grow-r1_3d
-tests/14imsm-r1_2d-takeover-r0_2d
-tests/14imsm-r5_3d-grow-r5_5d-no-spares
-tests/14imsm-r5_3d-migrate-r4_3d
-tests/15imsm-r0_3d_64k-migrate-r0_3d_256k
-tests/15imsm-r5_3d_4k-migrate-r5_3d_256k
-tests/15imsm-r5_3d_64k-migrate-r5_3d_256k
-tests/15imsm-r5_6d_4k-migrate-r5_6d_256k
-tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k
-tests/16imsm-r0_3d-migrate-r5_4d
-tests/16imsm-r0_5d-migrate-r5_6d
-tests/16imsm-r5_3d-migrate-r0_3d
-tests/16imsm-r5_5d-migrate-r0_5d
-tests/18imsm-1d-takeover-r0_1d
-tests/18imsm-1d-takeover-r1_2d
-tests/18imsm-r0_2d-takeover-r10_4d
-tests/18imsm-r10_4d-takeover-r0_2d
-tests/18imsm-r1_2d-takeover-r0_1d
-tests/19raid6auto-repair
-tests/19raid6check
-tests/19raid6repair
-tests/19repair-does-not-destroy
-tests/20raid5journal
-tests/21raid5cache
-tests/ToTest
-tests/env-ddf-template
-tests/env-imsm-template
-tests/func.sh
-tests/imsm-grow-template
-tests/utils
-udev-md-clustered-confirm-device.rules
-udev-md-raid-arrays.rules
-udev-md-raid-assembly.rules
-udev-md-raid-creating.rules
-udev-md-raid-safe-timeouts.rules
-util.c
-uuid.c
-xmalloc.c
--
2.40.1

@ -1,36 +0,0 @@
From 9ae62977b51dab0f4bb46b1c8ea5ebd1705b2f4d Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:10 -0600
Subject: [PATCH 35/83] monitor: Avoid segfault when calling NULL
get_bad_blocks
Not all struct superswitch implement a get_bad_blocks() function,
yet mdmon seems to call it without checking for NULL and thus
occasionally segfaults in the test 10ddf-geometry.
Fix this by checking for NULL before calling it.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
monitor.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/monitor.c b/monitor.c
index b877e595..820a93d0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -311,6 +311,9 @@ static int check_for_cleared_bb(struct active_array *a, struct mdinfo *mdi)
struct md_bb *bb;
int i;
+ if (!ss->get_bad_blocks)
+ return -1;
+
/*
* Get a list of bad blocks for an array, then read list of
* acknowledged bad blocks from kernel and compare it against metadata
--
2.38.1

@ -0,0 +1,31 @@
From 1750758c7ff526e3560433f6235e5cfa35cf646a Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Wed, 6 Mar 2024 15:50:55 +0100
Subject: [PATCH 35/41] udev.c: Do not require libudev.h if DNO_LIBUDEV
libudev may not be presented at all, do not require it.
Reported-by: Boian Bonev <bbonev@ipacct.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
udev.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/udev.c b/udev.c
index bc4722b0..066e6ab1 100644
--- a/udev.c
+++ b/udev.c
@@ -26,7 +26,10 @@
#include <signal.h>
#include <limits.h>
#include <syslog.h>
+
+#ifndef NO_LIBUDEV
#include <libudev.h>
+#endif
static char *unblock_path;
--
2.40.1

@ -1,78 +0,0 @@
From 6c9d9260633f2c8491985b0782cf0fbd7e51651b Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:11 -0600
Subject: [PATCH 36/83] mdadm: Fix mdadm -r remove option regression
The commit noted below globally adds a parameter to the -r option but missed
the fact that -r is used for another purpose: --remove.
After that commit, a command such as:
mdadm /dev/md0 -r /dev/loop0
will do nothing seeing the device parameter will be consumed as a
argument to the -r option; thus, there will only be one device
seen one the command line, devs_found will only be 1 and nothing will
happen.
This caused the 01r5integ and 01raid6integ tests to hang indefinitely
as mdadm did not remove the failed device. With the device not removed,
it would not be readded. Then the loop waiting for the array status to
change would loop forever.
This commit was recently reverted, but the legitimate fix for the
monitor operations was still not fixed. So add specific monitor
short ops to re-fix the --monitor -r option.
Fixes: 546047688e1c ("mdadm: fix coredump of mdadm --monitor -r")
Fixes: 190dc029b141 ("Revert "mdadm: fix coredump of mdadm --monitor -r"")
Cc: Wu Guanghao <wuguanghao3@huawei.com>
Cc: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
ReadMe.c | 1 +
mdadm.c | 1 +
mdadm.h | 1 +
3 files changed, 3 insertions(+)
diff --git a/ReadMe.c b/ReadMe.c
index bec1be9a..7518a32a 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -82,6 +82,7 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE EXTRAVERSION "\n";
*/
char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
+char short_monitor_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:r:n:x:u:c:d:z:U:N:safRSow1tye:k:";
char short_bitmap_options[]=
"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_auto_options[]=
diff --git a/mdadm.c b/mdadm.c
index be40686c..d0c5e6de 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -227,6 +227,7 @@ int main(int argc, char *argv[])
shortopt = short_bitmap_auto_options;
break;
case 'F': newmode = MONITOR;
+ shortopt = short_monitor_options;
break;
case 'G': newmode = GROW;
shortopt = short_bitmap_options;
diff --git a/mdadm.h b/mdadm.h
index 974415b9..163f4a49 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -419,6 +419,7 @@ enum mode {
};
extern char short_options[];
+extern char short_monitor_options[];
extern char short_bitmap_options[];
extern char short_bitmap_auto_options[];
extern struct option long_options[];
--
2.38.1

@ -0,0 +1,29 @@
From 8bda86099089b44129ef6206764f9de47a45f0db Mon Sep 17 00:00:00 2001
From: Alexander Kanavin <alex@linutronix.de>
Date: Tue, 12 Mar 2024 11:01:50 +0100
Subject: [PATCH 36/41] util.c: add limits.h include for NAME_MAX definition
Add limits.h include for NAME_MAX definition.
Signed-off-by: Alexander Kanavin <alex@linutronix.de>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
util.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/util.c b/util.c
index 05ad3343..49a9c6e2 100644
--- a/util.c
+++ b/util.c
@@ -36,7 +36,7 @@
#include <ctype.h>
#include <dirent.h>
#include <dlfcn.h>
-
+#include <limits.h>
/*
* following taken from linux/blkpg.h because they aren't
--
2.40.1

@ -1,42 +0,0 @@
From 41edf6f45895193f4a523cb0a08d639c9ff9ccc9 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:12 -0600
Subject: [PATCH 37/83] mdadm: Fix optional --write-behind parameter
The commit noted below changed the behaviour of --write-behind to
require an argument. This broke the 06wrmostly test with the error:
mdadm: Invalid value for maximum outstanding write-behind writes: (null).
Must be between 0 and 16383.
To fix this, check if optarg is NULL before parising it, as the origial
code did.
Fixes: 60815698c0ac ("Refactor parse_num and use it to parse optarg.")
Cc: Mateusz Grzonka <mateusz.grzonka@intel.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
mdadm.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mdadm.c b/mdadm.c
index d0c5e6de..56722ed9 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1201,8 +1201,9 @@ int main(int argc, char *argv[])
case O(BUILD, WriteBehind):
case O(CREATE, WriteBehind):
s.write_behind = DEFAULT_MAX_WRITE_BEHIND;
- if (parse_num(&s.write_behind, optarg) != 0 ||
- s.write_behind < 0 || s.write_behind > 16383) {
+ if (optarg &&
+ (parse_num(&s.write_behind, optarg) != 0 ||
+ s.write_behind < 0 || s.write_behind > 16383)) {
pr_err("Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n",
optarg);
exit(2);
--
2.38.1

@ -0,0 +1,59 @@
From 1c8327950566449e206e613c11c8232032f26787 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Mon, 18 Mar 2024 16:19:29 +0100
Subject: [PATCH 37/41] mdadm: set swapuuid in all handlers
It is not set, so it should be 0 but it may vary on compilation
settings. Set it always to 0.
metadata should care to set UUID and read in proper endianness so it
doesn't follow super1 concept of swapuuid to depend on endianness.
It is not an attempt to fix endianness issues.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
super-ddf.c | 1 +
super-intel.c | 1 +
super0.c | 2 ++
3 files changed, 4 insertions(+)
diff --git a/super-ddf.c b/super-ddf.c
index 7571e3b7..94ac5ff3 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -5162,6 +5162,7 @@ struct superswitch super_ddf = {
.default_geometry = default_geometry_ddf,
.external = 1,
+ .swapuuid = 0,
/* for mdmon */
.open_new = ddf_open_new,
diff --git a/super-intel.c b/super-intel.c
index 77140455..e1754f29 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -13116,6 +13116,7 @@ struct superswitch super_imsm = {
.validate_ppl = validate_ppl_imsm,
.external = 1,
+ .swapuuid = 0,
.name = "imsm",
/* for mdmon */
diff --git a/super0.c b/super0.c
index a7c5f813..9b8a1bd6 100644
--- a/super0.c
+++ b/super0.c
@@ -1369,5 +1369,7 @@ struct superswitch super0 = {
.locate_bitmap = locate_bitmap0,
.write_bitmap = write_bitmap0,
.free_super = free_super0,
+
+ .swapuuid = 0,
.name = "0.90",
};
--
2.40.1

@ -0,0 +1,247 @@
From ba65d917d121dfb9876053e6f62dbd4ebf2e028c Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Mon, 18 Mar 2024 16:19:30 +0100
Subject: [PATCH 38/41] mdadm: Fix native --detail --export
Mentioned commit (see Fixes) causes that UUID is not swapped as expected
for native superblock. Fix this problem.
For detail, we should avoid superblock calls, we can have information
about supertype from map, use that.
Simplify fname_from_uuid() by removing dependencies to metadata
handler, it is not needed. Decision is taken at compile time, expect
super1 but this function is not used by super1. Add warning about that.
Remove separator, it is always ':'.
Fixes: 60c19530dd7c ("Detail: remove duplicated code")
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Detail.c | 26 +++++++++++++++++++++++++-
mdadm.h | 3 +--
super-ddf.c | 10 +++++-----
super-intel.c | 16 ++++++++--------
util.c | 24 +++++++++++++-----------
5 files changed, 52 insertions(+), 27 deletions(-)
diff --git a/Detail.c b/Detail.c
index f23ec16f..55a086d3 100644
--- a/Detail.c
+++ b/Detail.c
@@ -49,6 +49,30 @@ static int add_device(const char *dev, char ***p_devices,
return n_devices + 1;
}
+/**
+ * detail_fname_from_uuid() - generate uuid string with special super1 handling.
+ * @mp: map entry to parse.
+ * @buf: buf to write.
+ *
+ * Hack to workaround an issue with super1 superblocks. It swapuuid set in order for assembly
+ * to work, but can't have it set if we want this printout to match all the other uuid printouts
+ * in super1.c, so we force swapuuid to 1 to make our printout match the rest of super1.
+ *
+ * Always convert uuid if host is big endian.
+ */
+char *detail_fname_from_uuid(struct map_ent *mp, char *buf)
+{
+#if __BYTE_ORDER == BIG_ENDIAN
+ bool swap = true;
+#else
+ bool swap = false;
+#endif
+ if (strncmp(mp->metadata, "1.", 2) == 0)
+ swap = true;
+
+ return __fname_from_uuid(mp->uuid, swap, buf, ':');
+}
+
int Detail(char *dev, struct context *c)
{
/*
@@ -256,7 +280,7 @@ int Detail(char *dev, struct context *c)
mp = map_by_devnm(&map, fd2devnm(fd));
if (mp) {
- __fname_from_uuid(mp->uuid, 0, nbuf, ':');
+ detail_fname_from_uuid(mp, nbuf);
printf("MD_UUID=%s\n", nbuf + 5);
if (mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN);
diff --git a/mdadm.h b/mdadm.h
index 3fedca48..a363708a 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1696,8 +1696,7 @@ extern const int uuid_zero[4];
extern int same_uuid(int a[4], int b[4], int swapuuid);
extern void copy_uuid(void *a, int b[4], int swapuuid);
extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep);
-extern char *fname_from_uuid(struct supertype *st,
- struct mdinfo *info, char *buf, char sep);
+extern char *fname_from_uuid(struct mdinfo *info, char *buf);
extern unsigned long calc_csum(void *super, int bytes);
extern int enough(int level, int raid_disks, int layout, int clean,
char *avail);
diff --git a/super-ddf.c b/super-ddf.c
index 94ac5ff3..21426c75 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1617,7 +1617,7 @@ static void brief_examine_super_ddf(struct supertype *st, int verbose)
struct mdinfo info;
char nbuf[64];
getinfo_super_ddf(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
}
@@ -1632,7 +1632,7 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
unsigned int i;
char nbuf[64];
getinfo_super_ddf(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
struct virtual_entry *ve = &ddf->virt->entries[i];
@@ -1645,7 +1645,7 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
ddf->currentconf =&vcl;
vcl.vcnum = i;
uuid_from_super_ddf(st, info.uuid);
- fname_from_uuid(st, &info, nbuf1, ':');
+ fname_from_uuid(&info, nbuf1);
_ddf_array_name(namebuf, ddf, i);
printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
namebuf[0] == '\0' ? "" : " " DEV_MD_DIR, namebuf,
@@ -1658,7 +1658,7 @@ static void export_examine_super_ddf(struct supertype *st)
struct mdinfo info;
char nbuf[64];
getinfo_super_ddf(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf("MD_METADATA=ddf\n");
printf("MD_LEVEL=container\n");
printf("MD_UUID=%s\n", nbuf+5);
@@ -1798,7 +1798,7 @@ static void brief_detail_super_ddf(struct supertype *st, char *subarray)
return;
else
uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
- fname_from_uuid(st, &info, nbuf,':');
+ fname_from_uuid(&info, nbuf);
printf(" UUID=%s", nbuf + 5);
}
diff --git a/super-intel.c b/super-intel.c
index e1754f29..ff2590fe 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2217,7 +2217,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
else
printf("not supported\n");
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf(" UUID : %s\n", nbuf + 5);
sum = __le32_to_cpu(mpb->check_sum);
printf(" Checksum : %08x %s\n", sum,
@@ -2242,7 +2242,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
super->current_vol = i;
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
}
for (i = 0; i < mpb->num_disks; i++) {
@@ -2267,7 +2267,7 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose)
char nbuf[64];
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
}
@@ -2284,13 +2284,13 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
return;
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
for (i = 0; i < super->anchor->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
super->current_vol = i;
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf1, ':');
+ fname_from_uuid(&info, nbuf1);
printf("ARRAY " DEV_MD_DIR "%.16s container=%s member=%d UUID=%s\n",
dev->volume, nbuf + 5, i, nbuf1 + 5);
}
@@ -2304,7 +2304,7 @@ static void export_examine_super_imsm(struct supertype *st)
char nbuf[64];
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf("MD_METADATA=imsm\n");
printf("MD_LEVEL=container\n");
printf("MD_UUID=%s\n", nbuf+5);
@@ -2324,7 +2324,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost,
super->current_vol = strtoul(subarray, NULL, 10);
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf("\n UUID : %s\n", nbuf + 5);
super->current_vol = temp_vol;
@@ -2341,7 +2341,7 @@ static void brief_detail_super_imsm(struct supertype *st, char *subarray)
super->current_vol = strtoul(subarray, NULL, 10);
getinfo_super_imsm(st, &info, NULL);
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(&info, nbuf);
printf(" UUID=%s", nbuf + 5);
super->current_vol = temp_vol;
diff --git a/util.c b/util.c
index 49a9c6e2..03336d6f 100644
--- a/util.c
+++ b/util.c
@@ -589,19 +589,21 @@ char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
}
-char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
- char *buf, char sep)
-{
- // dirty hack to work around an issue with super1 superblocks...
- // super1 superblocks need swapuuid set in order for assembly to
- // work, but can't have it set if we want this printout to match
- // all the other uuid printouts in super1.c, so we force swapuuid
- // to 1 to make our printout match the rest of super1
+/**
+ * fname_from_uuid() - generate uuid string. Should not be used with super1.
+ * @info: info with uuid
+ * @buf: buf to fill.
+ *
+ * This routine should not be used with super1. See detail_fname_from_uuid() for details. It does
+ * not use superswitch swapuuid as it should be 0 but it has to do UUID conversion if host is big
+ * endian- left for backward compatibility.
+ */
+char *fname_from_uuid(struct mdinfo *info, char *buf)
+{
#if __BYTE_ORDER == BIG_ENDIAN
- return __fname_from_uuid(info->uuid, 1, buf, sep);
+ return __fname_from_uuid(info->uuid, true, buf, ':');
#else
- return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
- st->ss->swapuuid, buf, sep);
+ return __fname_from_uuid(info->uuid, false, buf, ':');
#endif
}
--
2.40.1

@ -1,38 +0,0 @@
From 7539254342bc591717b0051734cc6c09c1b88640 Mon Sep 17 00:00:00 2001
From: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Date: Wed, 22 Jun 2022 14:25:13 -0600
Subject: [PATCH 38/83] tests/00raid0: add a test that validates raid0 with
layout fails for 0.9
329dfc28debb disallows the creation of raid0 with layouts for 0.9
metadata. This test confirms the new behavior.
Signed-off-by: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
tests/00raid0 | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/tests/00raid0 b/tests/00raid0
index 8bc18985..e6b21cc4 100644
--- a/tests/00raid0
+++ b/tests/00raid0
@@ -6,11 +6,9 @@ check raid0
testdev $md0 3 $mdsize2_l 512
mdadm -S $md0
-# now with version-0.90 superblock
+# verify raid0 with layouts fail for 0.90
mdadm -CR $md0 -e0.90 -l0 -n4 $dev0 $dev1 $dev2 $dev3
-check raid0
-testdev $md0 4 $mdsize0 512
-mdadm -S $md0
+check opposite_result
# now with no superblock
mdadm -B $md0 -l0 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
--
2.38.1

@ -0,0 +1,87 @@
From de23e12a39cfc94575e1173293fe9e15337ee999 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Mon, 18 Mar 2024 16:53:31 +0100
Subject: [PATCH 39/41] sysfs: remove vers parameter from sysfs_set_array
9003 was passed directly to sysfs_set_array() since md_get_version()
always returned this value. md_get_version() was removed long ago.
Remove dead version check from sysfs_set_array().
Remove "vers" argument and fix function calls.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Assemble.c | 2 +-
mdadm.h | 2 +-
sysfs.c | 6 ++----
util.c | 3 +--
4 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9d042055..f6c5b99e 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1988,7 +1988,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
* and ignoring special character on the first place.
*/
if (strcmp(sra->text_version + 1, content->text_version + 1) != 0) {
- if (sysfs_set_array(content, 9003) != 0) {
+ if (sysfs_set_array(content) != 0) {
sysfs_free(sra);
return 1;
}
diff --git a/mdadm.h b/mdadm.h
index a363708a..ae2106a2 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -811,7 +811,7 @@ extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev,
extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
-extern int sysfs_set_array(struct mdinfo *info, int vers);
+extern int sysfs_set_array(struct mdinfo *info);
extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(char *devnm, long rdev);
diff --git a/sysfs.c b/sysfs.c
index 230b842e..4ded1672 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -678,7 +678,7 @@ int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
return sysfs_set_str(sra, NULL, "safe_mode_delay", delay);
}
-int sysfs_set_array(struct mdinfo *info, int vers)
+int sysfs_set_array(struct mdinfo *info)
{
int rv = 0;
char ver[100];
@@ -702,9 +702,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
if (strlen(buf) >= 9 && buf[9] == '-')
ver[9] = '-';
- if ((vers % 100) < 2 ||
- sysfs_set_str(info, NULL, "metadata_version",
- ver) < 0) {
+ if (sysfs_set_str(info, NULL, "metadata_version", ver) < 0) {
pr_err("This kernel does not support external metadata.\n");
return 1;
}
diff --git a/util.c b/util.c
index 03336d6f..9e837045 100644
--- a/util.c
+++ b/util.c
@@ -1867,8 +1867,7 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
int rv;
if (st->ss->external)
- return sysfs_set_array(info, 9003);
-
+ return sysfs_set_array(info);
memset(&inf, 0, sizeof(inf));
inf.major_version = info->array.major_version;
inf.minor_version = info->array.minor_version;
--
2.40.1

@ -1,99 +0,0 @@
From 14c2161edb77d7294199e8aa7daa9f9d1d0ad5d7 Mon Sep 17 00:00:00 2001
From: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Date: Wed, 22 Jun 2022 14:25:14 -0600
Subject: [PATCH 39/83] tests: fix raid0 tests for 0.90 metadata
Some of the test cases fail because raid0 creation fails with the error,
"0.90 metadata does not support layouts for RAID0" added by commit,
329dfc28debb. Fix some of the test cases by switching from raid0 to
linear level for 0.9 metadata where possible.
Signed-off-by: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
tests/00raid0 | 4 ++--
tests/00readonly | 4 ++++
tests/03r0assem | 6 +++---
tests/04r0update | 4 ++--
tests/04update-metadata | 2 +-
5 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/tests/00raid0 b/tests/00raid0
index e6b21cc4..9b8896cb 100644
--- a/tests/00raid0
+++ b/tests/00raid0
@@ -20,8 +20,8 @@ mdadm -S $md0
# now same again with different chunk size
for chunk in 4 32 256
do
- mdadm -CR $md0 -e0.90 -l raid0 --chunk $chunk -n3 $dev0 $dev1 $dev2
- check raid0
+ mdadm -CR $md0 -e0.90 -l linear --chunk $chunk -n3 $dev0 $dev1 $dev2
+ check linear
testdev $md0 3 $mdsize0 $chunk
mdadm -S $md0
diff --git a/tests/00readonly b/tests/00readonly
index 28b0fa13..39202487 100644
--- a/tests/00readonly
+++ b/tests/00readonly
@@ -4,6 +4,10 @@ for metadata in 0.9 1.0 1.1 1.2
do
for level in linear raid0 raid1 raid4 raid5 raid6 raid10
do
+ if [[ $metadata == "0.9" && $level == "raid0" ]];
+ then
+ continue
+ fi
mdadm -CR $md0 -l $level -n 4 --metadata=$metadata \
$dev1 $dev2 $dev3 $dev4 --assume-clean
check nosync
diff --git a/tests/03r0assem b/tests/03r0assem
index 6744e322..44df0645 100644
--- a/tests/03r0assem
+++ b/tests/03r0assem
@@ -68,9 +68,9 @@ mdadm -S $md2
### Now for version 0...
mdadm --zero-superblock $dev0 $dev1 $dev2
-mdadm -CR $md2 -l0 --metadata=0.90 -n3 $dev0 $dev1 $dev2
-check raid0
-tst="testdev $md2 3 $mdsize0 512"
+mdadm -CR $md2 -llinear --metadata=0.90 -n3 $dev0 $dev1 $dev2
+check linear
+tst="testdev $md2 3 $mdsize0 1"
$tst
uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
diff --git a/tests/04r0update b/tests/04r0update
index 73ee3b9f..b95efb06 100644
--- a/tests/04r0update
+++ b/tests/04r0update
@@ -1,7 +1,7 @@
# create a raid0, re-assemble with a different super-minor
-mdadm -CR -e 0.90 $md0 -l0 -n3 $dev0 $dev1 $dev2
-testdev $md0 3 $mdsize0 512
+mdadm -CR -e 0.90 $md0 -llinear -n3 $dev0 $dev1 $dev2
+testdev $md0 3 $mdsize0 1
minor1=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
mdadm -S /dev/md0
diff --git a/tests/04update-metadata b/tests/04update-metadata
index 232fc1ff..08c14af7 100644
--- a/tests/04update-metadata
+++ b/tests/04update-metadata
@@ -8,7 +8,7 @@ set -xe
dlist="$dev0 $dev1 $dev2 $dev3"
-for ls in raid0/4 linear/4 raid1/1 raid5/3 raid6/2
+for ls in linear/4 raid1/1 raid5/3 raid6/2
do
s=${ls#*/} l=${ls%/*}
mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
--
2.38.1

@ -0,0 +1,55 @@
From ae996e81232b8ba991e763dfa15577a0af358358 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Mon, 18 Mar 2024 17:28:42 +0100
Subject: [PATCH 40/41] mdadm: fix grow segfault for IMSM
If sc is not initialized, there is possibility that sc.pols is not zeroed
and it causes segfault.
Add missing initialization.
Add missing dev_policy_free() in two places.
Fixes: f656201188d7 ("mdadm: drop get_required_spare_criteria()")
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Incremental.c | 1 +
super-intel.c | 9 +++++++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index 958ba9ba..83db0712 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -874,6 +874,7 @@ mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, ch
rv = MDADM_STATUS_SUCCESS;
out:
+ dev_policy_free(sc.pols);
dup->ss->free_super(dup);
free(dup);
diff --git a/super-intel.c b/super-intel.c
index ff2590fe..70f3c4ef 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11518,10 +11518,15 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st,
*/
static struct mdinfo *get_spares_for_grow(struct supertype *st)
{
- struct spare_criteria sc;
+ struct spare_criteria sc = {0};
+ struct mdinfo *spares;
get_spare_criteria_imsm(st, NULL, &sc);
- return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
+ spares = container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
+
+ dev_policy_free(sc.pols);
+
+ return spares;
}
/******************************************************************************
--
2.40.1

@ -1,39 +0,0 @@
From de045db607b1ac4b70fc2a8878463e029c2ab1dc Mon Sep 17 00:00:00 2001
From: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Date: Wed, 22 Jun 2022 14:25:15 -0600
Subject: [PATCH 40/83] tests/04update-metadata: avoid passing chunk size to
raid1
'04update-metadata' test fails with error, "specifying chunk size is
forbidden for this level" added by commit, 5b30a34aa4b5e. Hence,
correcting the test to ignore passing chunk size to raid1.
Signed-off-by: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@oracle.com>
[logang@deltatee.com: fix if/then style and dropped unrelated hunk]
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
tests/04update-metadata | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/tests/04update-metadata b/tests/04update-metadata
index 08c14af7..2b72a303 100644
--- a/tests/04update-metadata
+++ b/tests/04update-metadata
@@ -11,7 +11,11 @@ dlist="$dev0 $dev1 $dev2 $dev3"
for ls in linear/4 raid1/1 raid5/3 raid6/2
do
s=${ls#*/} l=${ls%/*}
- mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
+ if [[ $l == 'raid1' ]]; then
+ mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 $dlist
+ else
+ mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
+ fi
testdev $md0 $s 19904 64
mdadm -S $md0
mdadm -A $md0 --update=metadata $dlist
--
2.38.1

@ -0,0 +1,85 @@
From da4d58b6d01ed8b0149b777eba7818861fde8c80 Mon Sep 17 00:00:00 2001
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
Date: Tue, 19 Mar 2024 11:15:29 +0100
Subject: [PATCH 41/41] Remove all "if zeros" pt.2
Commit e15e8b00cbce ("Remove all "if zeros"") did not remove all "if 0"
code blocks.
This commit is cleanup for that commit.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Build.c | 6 ------
Grow.c | 13 +------------
super1.c | 11 -----------
3 files changed, 1 insertion(+), 29 deletions(-)
diff --git a/Build.c b/Build.c
index 1fbf8596..1be90e41 100644
--- a/Build.c
+++ b/Build.c
@@ -156,12 +156,6 @@ int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s,
bitmap_fd = open(s->bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
int major = BITMAP_MAJOR_HI;
-#if 0
- if (s->bitmap_chunk == UnSet) {
- pr_err("%s cannot be opened.\n", s->bitmap_file);
- goto abort;
- }
-#endif
bitmapsize = s->size >> 9; /* FIXME wrong for RAID10 */
if (CreateBitmap(s->bitmap_file, 1, NULL,
s->bitmap_chunk, c->delay,
diff --git a/Grow.c b/Grow.c
index c69a342d..074f1995 100644
--- a/Grow.c
+++ b/Grow.c
@@ -4413,19 +4413,8 @@ static void validate(int afd, int bfd, unsigned long long offset)
lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
if ((unsigned long long)read(afd, abuf, len) != len)
fail("read first from array failed");
- if (memcmp(bbuf, abuf, len) != 0) {
-#if 0
- int i;
- printf("offset=%llu len=%llu\n",
- (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
- for (i=0; i<len; i++)
- if (bbuf[i] != abuf[i]) {
- printf("first diff byte %d\n", i);
- break;
- }
-#endif
+ if (memcmp(bbuf, abuf, len) != 0)
fail("data1 compare failed");
- }
}
if (bsb2.length2) {
unsigned long long len = __le64_to_cpu(bsb2.length2)*512;
diff --git a/super1.c b/super1.c
index 5439b7bb..81d29a65 100644
--- a/super1.c
+++ b/super1.c
@@ -575,17 +575,6 @@ static void examine_super1(struct supertype *st, char *homehost)
inconsistent = 1;
}
}
-#if 0
- /* This is confusing too */
- faulty = 0;
- for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
- int role = __le16_to_cpu(sb->dev_roles[i]);
- if (role == MD_DISK_ROLE_FAULTY)
- faulty++;
- }
- if (faulty)
- printf(" %d failed", faulty);
-#endif
printf(" ('A' == active, '.' == missing, 'R' == replacing)");
printf("\n");
for (d = 0; d < __le32_to_cpu(sb->max_dev); d++) {
--
2.40.1

@ -1,31 +0,0 @@
From a2c832465fc75202e244327b2081231dfa974617 Mon Sep 17 00:00:00 2001
From: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Date: Wed, 22 Jun 2022 14:25:16 -0600
Subject: [PATCH 41/83] tests/02lineargrow: clear the superblock at every
iteration
This fixes 02lineargrow test as prior metadata causes --add operation
to misbehave.
Signed-off-by: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
tests/02lineargrow | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/02lineargrow b/tests/02lineargrow
index e05c219d..595bf9f2 100644
--- a/tests/02lineargrow
+++ b/tests/02lineargrow
@@ -20,4 +20,6 @@ do
testdev $md0 3 $sz 1
mdadm -S $md0
+ mdadm --zero /dev/loop2
+ mdadm --zero /dev/loop3
done
--
2.38.1

@ -0,0 +1,44 @@
From cc75b0faaa016e54d569486c9a7abe6c39cb883a Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:15 +0100
Subject: [PATCH 42/66] mdadm: Move pr_vrb define to mdadm.h
Move pr_vrb define from super-intel.c to mdadm.h to make it widely
available. This change will be used in the next patches.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
mdadm.h | 2 ++
super-intel.c | 2 --
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index ae2106a2..fbb161ba 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1911,6 +1911,8 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
#define pr_info(fmt, args...) printf("%s: "fmt, Name, ##args)
+#define pr_vrb(fmt, arg...) ((void)(verbose && pr_err(fmt, ##arg)))
+
void *xmalloc(size_t len);
void *xrealloc(void *ptr, size_t len);
void *xcalloc(size_t num, size_t size);
diff --git a/super-intel.c b/super-intel.c
index 70f3c4ef..212387ec 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -393,8 +393,6 @@ struct md_list {
struct md_list *next;
};
-#define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
-
static __u8 migr_type(struct imsm_dev *dev)
{
if (dev->vol.migr_type == MIGR_VERIFY &&
--
2.41.0

@ -1,88 +0,0 @@
From a7bfcc716e235664dfb3b6c5a9590273e611ac72 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:17 -0600
Subject: [PATCH 42/83] mdadm/test: Add a mode to repeat specified tests
Many tests fail infrequently or rarely. To help find these, add
an option to run the tests multiple times by specifying --loop=N.
If --loop=0 is specified, the test will be looped forever.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
test | 36 ++++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/test b/test
index 711a3c7a..da6db5e0 100755
--- a/test
+++ b/test
@@ -10,6 +10,7 @@ devlist=
savelogs=0
exitonerror=1
+loop=1
prefix='[0-9][0-9]'
# use loop devices by default if doesn't specify --dev
@@ -117,6 +118,7 @@ do_help() {
--logdir=directory Directory to save all logfiles in
--save-logs Usually use with --logdir together
--keep-going | --no-error Don't stop on error, ie. run all tests
+ --loop=N Run tests N times (0 to run forever)
--dev=loop|lvm|ram|disk Use loop devices (default), LVM, RAM or disk
--disks= Provide a bunch of physical devices for test
--volgroup=name LVM volume group for LVM test
@@ -211,6 +213,9 @@ parse_args() {
--keep-going | --no-error )
exitonerror=0
;;
+ --loop=* )
+ loop="${i##*=}"
+ ;;
--disable-multipath )
unset MULTIPATH
;;
@@ -263,19 +268,26 @@ main() {
echo "Testing on linux-$(uname -r) kernel"
[ "$savelogs" == "1" ] &&
echo "Saving logs to $logdir"
- if [ "x$TESTLIST" != "x" ]
- then
- for script in ${TESTLIST[@]}
- do
- do_test $testdir/$script
- done
- else
- for script in $testdir/$prefix $testdir/$prefix*[^~]
- do
- do_test $script
- done
- fi
+ while true; do
+ if [ "x$TESTLIST" != "x" ]
+ then
+ for script in ${TESTLIST[@]}
+ do
+ do_test $testdir/$script
+ done
+ else
+ for script in $testdir/$prefix $testdir/$prefix*[^~]
+ do
+ do_test $script
+ done
+ fi
+
+ let loop=$loop-1
+ if [ "$loop" == "0" ]; then
+ break
+ fi
+ done
exit 0
}
--
2.38.1

@ -0,0 +1,463 @@
From cc48406887b3bc439e3462e8e4d20f992e81b87e Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:16 +0100
Subject: [PATCH 43/66] Add reading Opal NVMe encryption information
For NVMe devices with Opal support, encryption information, status and
ability are determined based on Opal Level 0 discovery response. Technical
documentation used is given in the implementation.
Ability in general describes what type of encryption is supported, Status
describes in what state the disk with encryption support is. The current
patch includes only the implementation of reading encryption information,
functions will be used in one of the next patches.
Motivation for adding this functionality is to block mixing of disks in
IMSM arrays with encryption enabled and disabled. The main goal is to not
allow stealing data by rebuilding array to not encrypted drive which can be
read elsewhere.
Value ENA_OTHER from enum encryption_ability will be used in the next
patch.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
Makefile | 4 +-
drive_encryption.c | 362 +++++++++++++++++++++++++++++++++++++++++++++
drive_encryption.h | 32 ++++
3 files changed, 396 insertions(+), 2 deletions(-)
create mode 100644 drive_encryption.c
create mode 100644 drive_encryption.h
diff --git a/Makefile b/Makefile
index cbdba49a..7c221a89 100644
--- a/Makefile
+++ b/Makefile
@@ -170,7 +170,7 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o uuid.o util.o maps.o lib.o u
mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
super-mbr.o super-gpt.o \
restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \
- platform-intel.o probe_roms.o crc32c.o
+ platform-intel.o probe_roms.o crc32c.o drive_encryption.o
CHECK_OBJS = restripe.o uuid.o sysfs.o maps.o lib.o xmalloc.o dlink.o
@@ -183,7 +183,7 @@ MON_OBJS = mdmon.o monitor.o managemon.o uuid.o util.o maps.o mdstat.o sysfs.o c
Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
super-mbr.o super-gpt.o \
super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
- platform-intel.o probe_roms.o crc32c.o
+ platform-intel.o probe_roms.o crc32c.o drive_encryption.o
MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
diff --git a/drive_encryption.c b/drive_encryption.c
new file mode 100644
index 00000000..b44585a7
--- /dev/null
+++ b/drive_encryption.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Read encryption information for Opal and ATA devices.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ * Author: Blazej Kucman <blazej.kucman@intel.com>
+ */
+
+#include "mdadm.h"
+
+#include <asm/types.h>
+#include <linux/nvme_ioctl.h>
+#include "drive_encryption.h"
+
+/*
+ * Opal defines
+ * TCG Storage Opal SSC 2.01 chapter 3.3.3
+ * NVM ExpressTM Revision 1.4c, chapter 5
+ */
+#define TCG_SECP_01 (0x01)
+#define TCG_SECP_00 (0x00)
+#define OPAL_DISCOVERY_COMID (0x0001)
+#define OPAL_LOCKING_FEATURE (0x0002)
+#define OPAL_IO_BUFFER_LEN 2048
+#define OPAL_DISCOVERY_FEATURE_HEADER_LEN (4)
+
+/*
+ * NVMe defines
+ * NVM ExpressTM Revision 1.4c, chapter 5
+ */
+#define NVME_SECURITY_RECV (0x82)
+#define NVME_IDENTIFY (0x06)
+#define NVME_IDENTIFY_RESPONSE_LEN 4096
+#define NVME_OACS_BYTE_POSITION (256)
+#define NVME_IDENTIFY_CONTROLLER_DATA (1)
+
+typedef enum drive_feature_support_status {
+ /* Drive feature is supported. */
+ DRIVE_FEAT_SUP_ST = 0,
+ /* Drive feature is not supported. */
+ DRIVE_FEAT_NOT_SUP_ST,
+ /* Drive feature support check failed. */
+ DRIVE_FEAT_CHECK_FAILED_ST
+} drive_feat_sup_st;
+
+/* TCG Storage Opal SSC 2.01 chapter 3.1.1.3 */
+typedef struct opal_locking_feature {
+ /* feature header */
+ __u16 feature_code;
+ __u8 reserved : 4;
+ __u8 version : 4;
+ __u8 description_length;
+ /* feature description */
+ __u8 locking_supported : 1;
+ __u8 locking_enabled : 1;
+ __u8 locked : 1;
+ __u8 media_encryption : 1;
+ __u8 mbr_enabled : 1;
+ __u8 mbr_done : 1;
+ __u8 mbr_shadowing_not_supported : 1;
+ __u8 hw_reset_for_dor_supported : 1;
+ __u8 reserved1[11];
+} __attribute__((__packed__)) opal_locking_feature_t;
+
+/* TCG Storage Opal SSC 2.01 chapter 3.1.1.1 */
+typedef struct opal_level0_header {
+ __u32 length;
+ __u32 version;
+ __u64 reserved;
+ __u8 vendor_specific[32];
+} opal_level0_header_t;
+
+/**
+ * NVM ExpressTM Revision 1.4c, Figure 249
+ * Structure specifies only OACS filed, which is needed in the current use case.
+ */
+typedef struct nvme_identify_ctrl {
+ __u8 reserved[255];
+ __u16 oacs;
+ __u8 reserved2[3839];
+} nvme_identify_ctrl_t;
+
+/* SCSI Primary Commands - 4 (SPC-4), Table 512 */
+typedef struct supported_security_protocols {
+ __u8 reserved[6];
+ __u16 list_length;
+ __u8 list[504];
+} supported_security_protocols_t;
+
+/**
+ * get_opal_locking_feature_description() - get opal locking feature description.
+ * @response: response from Opal Discovery Level 0.
+ *
+ * Based on the documentation TCG Storage Opal SSC 2.01 chapter 3.1.1,
+ * a Locking feature is searched for in Opal Level 0 Discovery response.
+ *
+ * Return: if locking feature is found, pointer to struct %opal_locking_feature_t, NULL otherwise.
+ */
+static opal_locking_feature_t *get_opal_locking_feature_description(__u8 *response)
+{
+ opal_level0_header_t *response_header = (opal_level0_header_t *)response;
+ int features_length = __be32_to_cpu(response_header->length);
+ int current_position = sizeof(*response_header);
+
+ while (current_position < features_length) {
+ opal_locking_feature_t *feature;
+
+ feature = (opal_locking_feature_t *)(response + current_position);
+
+ if (__be16_to_cpu(feature->feature_code) == OPAL_LOCKING_FEATURE)
+ return feature;
+
+ current_position += feature->description_length + OPAL_DISCOVERY_FEATURE_HEADER_LEN;
+ }
+
+ return NULL;
+}
+
+/**
+ * nvme_security_recv_ioctl() - nvme security receive ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @sec_protocol: security protocol.
+ * @comm_id: command id.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and
+ * NVM ExpressTM Revision 1.4c, chapter 5.25,
+ * read security receive command via ioctl().
+ * On success, @response_buffer is completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t
+nvme_security_recv_ioctl(int disk_fd, __u8 sec_protocol, __u16 comm_id, void *response_buffer,
+ size_t buf_size, const int verbose)
+{
+ struct nvme_admin_cmd nvme_cmd = {0};
+ int status;
+
+ nvme_cmd.opcode = NVME_SECURITY_RECV;
+ nvme_cmd.cdw10 = sec_protocol << 24 | comm_id << 8;
+ nvme_cmd.cdw11 = buf_size;
+ nvme_cmd.data_len = buf_size;
+ nvme_cmd.addr = (__u64)response_buffer;
+
+ status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd);
+ if (status != 0) {
+ pr_vrb("Failed to read NVMe security receive ioctl() for device /dev/%s, status: %d\n",
+ fd2kname(disk_fd), status);
+ return MDADM_STATUS_ERROR;
+ }
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * nvme_identify_ioctl() - NVMe identify ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and
+ * NVM ExpressTM Revision 1.4c, chapter 5.25,
+ * read NVMe identify via ioctl().
+ * On success, @response_buffer will be completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t
+nvme_identify_ioctl(int disk_fd, void *response_buffer, size_t buf_size, const int verbose)
+{
+ struct nvme_admin_cmd nvme_cmd = {0};
+ int status;
+
+ nvme_cmd.opcode = NVME_IDENTIFY;
+ nvme_cmd.cdw10 = NVME_IDENTIFY_CONTROLLER_DATA;
+ nvme_cmd.data_len = buf_size;
+ nvme_cmd.addr = (__u64)response_buffer;
+
+ status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd);
+ if (status != 0) {
+ pr_vrb("Failed to read NVMe identify ioctl() for device /dev/%s, status: %d\n",
+ fd2kname(disk_fd), status);
+ return MDADM_STATUS_ERROR;
+ }
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_sec_prot_01h_supported() - check if security protocol 01h supported.
+ * @security_protocols: struct with response from disk (NVMe, SATA) describing supported
+ * security protocols.
+ *
+ * Return: true if TCG_SECP_01 found, false otherwise.
+ */
+static bool is_sec_prot_01h_supported(supported_security_protocols_t *security_protocols)
+{
+ int list_length = be16toh(security_protocols->list_length);
+ int index;
+
+ for (index = 0 ; index < list_length; index++) {
+ if (security_protocols->list[index] == TCG_SECP_01)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * is_sec_prot_01h_supported_nvme() - check if security protocol 01h supported for given NVMe disk.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check.
+ */
+static drive_feat_sup_st is_sec_prot_01h_supported_nvme(int disk_fd, const int verbose)
+{
+ supported_security_protocols_t security_protocols = {0};
+
+ /* security_protocol: TCG_SECP_00, comm_id: not applicable */
+ if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_00, 0x0, &security_protocols,
+ sizeof(security_protocols), verbose))
+ return DRIVE_FEAT_CHECK_FAILED_ST;
+
+ if (is_sec_prot_01h_supported(&security_protocols))
+ return DRIVE_FEAT_SUP_ST;
+
+ return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * is_nvme_sec_send_recv_supported() - check if Security Send and Security Receive is supported.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Check if "Optional Admin Command Support" bit 0 is set in NVMe identify.
+ * Bit 0 set to 1 means controller supports the Security Send and Security Receive commands.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if security send/receive supported,
+ * %DRIVE_FEAT_NOT_SUP_ST if not supported, %DRIVE_FEAT_CHECK_FAILED_ST if check failed.
+ */
+static drive_feat_sup_st is_nvme_sec_send_recv_supported(int disk_fd, const int verbose)
+{
+ nvme_identify_ctrl_t nvme_identify = {0};
+ int status = 0;
+
+ status = nvme_identify_ioctl(disk_fd, &nvme_identify, sizeof(nvme_identify), verbose);
+ if (status)
+ return DRIVE_FEAT_CHECK_FAILED_ST;
+
+ if ((__le16_to_cpu(nvme_identify.oacs) & 0x1) == 0x1)
+ return DRIVE_FEAT_SUP_ST;
+
+ return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * get_opal_encryption_information() - get Opal encryption information.
+ * @buffer: buffer with Opal Level 0 Discovery response.
+ * @information: struct to fill out, describing encryption status of disk.
+ *
+ * If Locking feature frame is in response from Opal Level 0 discovery, &encryption_information_t
+ * structure is completed with status and ability otherwise the status is set to &None.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t get_opal_encryption_information(__u8 *buffer,
+ encryption_information_t *information)
+{
+ opal_locking_feature_t *opal_locking_feature =
+ get_opal_locking_feature_description(buffer);
+
+ if (!opal_locking_feature)
+ return MDADM_STATUS_ERROR;
+
+ if (opal_locking_feature->locking_supported == 1) {
+ information->ability = ENC_ABILITY_SED;
+
+ if (opal_locking_feature->locking_enabled == 0)
+ information->status = ENC_STATUS_UNENCRYPTED;
+ else if (opal_locking_feature->locked == 1)
+ information->status = ENC_STATUS_LOCKED;
+ else
+ information->status = ENC_STATUS_UNLOCKED;
+ } else {
+ information->ability = ENC_ABILITY_NONE;
+ information->status = ENC_STATUS_UNENCRYPTED;
+ }
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * get_nvme_opal_encryption_information() - get NVMe Opal encryption information.
+ * @disk_fd: a disk file descriptor.
+ * @information: struct to fill out, describing encryption status of disk.
+ * @verbose: verbose flag.
+ *
+ * In case the disk supports Opal Level 0 discovery, &encryption_information_t structure
+ * is completed with status and ability based on ioctl response,
+ * otherwise the ability is set to %ENC_ABILITY_NONE and &status to %ENC_STATUS_UNENCRYPTED.
+ * As the current use case does not need the knowledge of Opal support, if there is no support,
+ * %MDADM_STATUS_SUCCESS will be returned, with the values described above.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+mdadm_status_t
+get_nvme_opal_encryption_information(int disk_fd, encryption_information_t *information,
+ const int verbose)
+{
+ __u8 buffer[OPAL_IO_BUFFER_LEN];
+ int sec_send_recv_supported = 0;
+ int protocol_01h_supported = 0;
+ mdadm_status_t status;
+
+ information->ability = ENC_ABILITY_NONE;
+ information->status = ENC_STATUS_UNENCRYPTED;
+
+ sec_send_recv_supported = is_nvme_sec_send_recv_supported(disk_fd, verbose);
+ if (sec_send_recv_supported == DRIVE_FEAT_CHECK_FAILED_ST)
+ return MDADM_STATUS_ERROR;
+
+ /* Opal not supported */
+ if (sec_send_recv_supported == DRIVE_FEAT_NOT_SUP_ST)
+ return MDADM_STATUS_SUCCESS;
+
+ /**
+ * sec_send_recv_supported determine that it should be possible to read
+ * supported sec protocols
+ */
+ protocol_01h_supported = is_sec_prot_01h_supported_nvme(disk_fd, verbose);
+ if (protocol_01h_supported == DRIVE_FEAT_CHECK_FAILED_ST)
+ return MDADM_STATUS_ERROR;
+
+ /* Opal not supported */
+ if (sec_send_recv_supported == DRIVE_FEAT_SUP_ST &&
+ protocol_01h_supported == DRIVE_FEAT_NOT_SUP_ST)
+ return MDADM_STATUS_SUCCESS;
+
+ if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_01, OPAL_DISCOVERY_COMID, (void *)&buffer,
+ OPAL_IO_BUFFER_LEN, verbose))
+ return MDADM_STATUS_ERROR;
+
+ status = get_opal_encryption_information((__u8 *)&buffer, information);
+ if (status)
+ pr_vrb("Locking feature description not found in Level 0 discovery response. Device /dev/%s.\n",
+ fd2kname(disk_fd));
+
+ if (information->ability == ENC_ABILITY_NONE)
+ assert(information->status == ENC_STATUS_UNENCRYPTED);
+
+ return status;
+}
diff --git a/drive_encryption.h b/drive_encryption.h
new file mode 100644
index 00000000..82c2c624
--- /dev/null
+++ b/drive_encryption.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Read encryption information for Opal and ATA devices.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ * Author: Blazej Kucman <blazej.kucman@intel.com>
+ */
+
+typedef enum encryption_status {
+ /* The drive is not currently encrypted. */
+ ENC_STATUS_UNENCRYPTED = 0,
+ /* The drive is encrypted and the data is not accessible. */
+ ENC_STATUS_LOCKED,
+ /* The drive is encrypted but the data is accessible in unencrypted form. */
+ ENC_STATUS_UNLOCKED
+} encryption_status_t;
+
+typedef enum encryption_ability {
+ ENC_ABILITY_NONE = 0,
+ ENC_ABILITY_OTHER,
+ /* Self encrypted drive */
+ ENC_ABILITY_SED
+} encryption_ability_t;
+
+typedef struct encryption_information {
+ encryption_ability_t ability;
+ encryption_status_t status;
+} encryption_information_t;
+
+mdadm_status_t
+get_nvme_opal_encryption_information(int disk_fd, struct encryption_information *information,
+ const int verbose);
--
2.41.0

@ -1,120 +0,0 @@
From 28520bf114b3b0515a48ff44fff4ecbe9ed6dfad Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:18 -0600
Subject: [PATCH 43/83] mdadm/test: Mark and ignore broken test failures
Add functionality to continue if a test marked as broken fails.
To mark a test as broken, a file with the same name but with the suffix
'.broken' should exist. The first line in the file will be printed with
a KNOWN BROKEN message; the rest of the file can describe the how the
test is broken.
Also adds --skip-broken and --skip-always-broken to skip all the tests
that have a .broken file or to skip all tests whose .broken file's first
line contains the keyword always.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
test | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/test b/test
index da6db5e0..61d9ee83 100755
--- a/test
+++ b/test
@@ -10,6 +10,8 @@ devlist=
savelogs=0
exitonerror=1
+ctrl_c_error=0
+skipbroken=0
loop=1
prefix='[0-9][0-9]'
@@ -36,6 +38,7 @@ die() {
ctrl_c() {
exitonerror=1
+ ctrl_c_error=1
}
# mdadm always adds --quiet, and we want to see any unexpected messages
@@ -80,8 +83,21 @@ mdadm() {
do_test() {
_script=$1
_basename=`basename $_script`
+ _broken=0
+
if [ -f "$_script" ]
then
+ if [ -f "${_script}.broken" ]; then
+ _broken=1
+ _broken_msg=$(head -n1 "${_script}.broken" | tr -d '\n')
+ if [ "$skipbroken" == "all" ]; then
+ return
+ elif [ "$skipbroken" == "always" ] &&
+ [[ "$_broken_msg" == *always* ]]; then
+ return
+ fi
+ fi
+
rm -f $targetdir/stderr
# this might have been reset: restore the default.
echo 2000 > /proc/sys/dev/raid/speed_limit_max
@@ -98,10 +114,15 @@ do_test() {
else
save_log fail
_fail=1
+ if [ "$_broken" == "1" ]; then
+ echo " (KNOWN BROKEN TEST: $_broken_msg)"
+ fi
fi
[ "$savelogs" == "1" ] &&
mv -f $targetdir/log $logdir/$_basename.log
- [ "$_fail" == "1" -a "$exitonerror" == "1" ] && exit 1
+ [ "$ctrl_c_error" == "1" ] && exit 1
+ [ "$_fail" == "1" -a "$exitonerror" == "1" \
+ -a "$_broken" == "0" ] && exit 1
fi
}
@@ -119,6 +140,8 @@ do_help() {
--save-logs Usually use with --logdir together
--keep-going | --no-error Don't stop on error, ie. run all tests
--loop=N Run tests N times (0 to run forever)
+ --skip-broken Skip tests that are known to be broken
+ --skip-always-broken Skip tests that are known to always fail
--dev=loop|lvm|ram|disk Use loop devices (default), LVM, RAM or disk
--disks= Provide a bunch of physical devices for test
--volgroup=name LVM volume group for LVM test
@@ -216,6 +239,12 @@ parse_args() {
--loop=* )
loop="${i##*=}"
;;
+ --skip-broken )
+ skipbroken=all
+ ;;
+ --skip-always-broken )
+ skipbroken=always
+ ;;
--disable-multipath )
unset MULTIPATH
;;
@@ -279,7 +308,11 @@ main() {
else
for script in $testdir/$prefix $testdir/$prefix*[^~]
do
- do_test $script
+ case $script in
+ *.broken) ;;
+ *)
+ do_test $script
+ esac
done
fi
--
2.38.1

@ -0,0 +1,459 @@
From df38df3052c3386c0fd076e0d534b4f688b5c8a4 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:17 +0100
Subject: [PATCH 44/66] Add reading SATA encryption information
Functionality reads information about SATA disk encryption. Technical
documentation used is given in the implementation.
The implementation is able to recognized two encryption standards for SATA
drives, OPAL and ATA security.
If the SATA drive supports OPAL, encryption status and ability are
determined based on Opal Level 0 discovery response, for ATA security,
based on ATA identify response. If SATA supports OPAL, ability is set to
"SED", for ATA security to "Other".
SED(Self-Encrypting Drive) is commonly used to describe drive which using
OPAL or Enterprise standards developed by Trusted Computing Group. Ability
"Other" is used for ATA security because we rely only on information from
ATA identify which describe the overall state of encryption.
It is allowed to mix disks with different encryption ability such as "SED"
and "Other" and it is not security gap.
Motivation for adding this functionality is to block mixing of disks in
IMSM arrays with encryption enabled and disabled. The main goal is to not
allow stealing data by rebuilding array to not encrypted drive which can be
read elsewhere.
For SATA Opal drives, libata allow_tmp parameter enabled is required, which
is necessary for Opal Security commands to work, therefore, if the
parameter is not enabled, SATA Opal disk cannot be used in case the
encryption will be checked by metadata.
Implemented functions will be used in one of the next patches. In one of
the next patches, a flag will be added to enable disabling SATA Opal
encryption checking due to allow_tpm kernel setting dependency.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
drive_encryption.c | 318 +++++++++++++++++++++++++++++++++++++++++++++
drive_encryption.h | 3 +
mdadm.h | 1 +
sysfs.c | 29 +++++
4 files changed, 351 insertions(+)
diff --git a/drive_encryption.c b/drive_encryption.c
index b44585a7..d520f0c7 100644
--- a/drive_encryption.c
+++ b/drive_encryption.c
@@ -10,8 +10,12 @@
#include <asm/types.h>
#include <linux/nvme_ioctl.h>
+#include <scsi/sg.h>
+#include <scsi/scsi.h>
#include "drive_encryption.h"
+#define DEFAULT_SECTOR_SIZE (512)
+
/*
* Opal defines
* TCG Storage Opal SSC 2.01 chapter 3.3.3
@@ -34,6 +38,35 @@
#define NVME_OACS_BYTE_POSITION (256)
#define NVME_IDENTIFY_CONTROLLER_DATA (1)
+/*
+ * ATA defines
+ * ATA/ATAPI Command Set ATA8-ACS
+ * SCSI / ATA Translation - 3 (SAT-3)
+ * SCSI Primary Commands - 4 (SPC-4)
+ * AT Attachment-8 - ATA Serial Transport (ATA8-AST)
+ * ATA Command Pass-Through
+ */
+#define ATA_IDENTIFY (0xec)
+#define ATA_TRUSTED_RECEIVE (0x5c)
+#define ATA_SECURITY_WORD_POSITION (128)
+#define HDIO_DRIVE_CMD (0x031f)
+#define ATA_TRUSTED_COMPUTING_POS (48)
+#define ATA_PASS_THROUGH_12 (0xa1)
+#define ATA_IDENTIFY_RESPONSE_LEN (512)
+#define ATA_PIO_DATA_IN (4)
+#define SG_CHECK_CONDITION (0x02)
+#define ATA_STATUS_RETURN_DESCRIPTOR (0x09)
+#define ATA_PT_INFORMATION_AVAILABLE_ASCQ (0x1d)
+#define ATA_PT_INFORMATION_AVAILABLE_ASC (0x00)
+#define ATA_INQUIRY_LENGTH (0x0c)
+#define SG_INTERFACE_ID 'S'
+#define SG_IO_TIMEOUT (60000)
+#define SG_SENSE_SIZE (32)
+#define SENSE_DATA_CURRENT_FIXED (0x70)
+#define SENSE_DATA_CURRENT_DESC (0x72)
+#define SENSE_CURRENT_RES_DESC_POS (8)
+#define SG_DRIVER_SENSE (0x08)
+
typedef enum drive_feature_support_status {
/* Drive feature is supported. */
DRIVE_FEAT_SUP_ST = 0,
@@ -87,6 +120,27 @@ typedef struct supported_security_protocols {
__u8 list[504];
} supported_security_protocols_t;
+/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */
+typedef struct ata_security_status {
+ __u16 security_supported : 1;
+ __u16 security_enabled : 1;
+ __u16 security_locked : 1;
+ __u16 security_frozen : 1;
+ __u16 security_count_expired : 1;
+ __u16 enhanced_security_erase_supported : 1;
+ __u16 reserved1 : 2;
+ __u16 security_level : 1;
+ __u16 reserved2 : 7;
+} __attribute__((__packed__)) ata_security_status_t;
+
+/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */
+typedef struct ata_trusted_computing {
+ __u16 tc_feature :1;
+ __u16 reserved : 13;
+ __u16 var1 : 1;
+ __u16 var2 : 1;
+} __attribute__((__packed__)) ata_trusted_computing_t;
+
/**
* get_opal_locking_feature_description() - get opal locking feature description.
* @response: response from Opal Discovery Level 0.
@@ -360,3 +414,267 @@ get_nvme_opal_encryption_information(int disk_fd, encryption_information_t *info
return status;
}
+
+/**
+ * ata_pass_through12_ioctl() - ata pass through12 ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @ata_command: ata command.
+ * @sec_protocol: security protocol.
+ * @comm_id: additional command id.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations ATA Command Pass-Through, chapter 13.2.2 and
+ * ATA Translation - 3 (SAT-3), send read ata pass through 12 command via ioctl().
+ * On success, @response_buffer will be completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+static mdadm_status_t
+ata_pass_through12_ioctl(int disk_fd, __u8 ata_command, __u8 sec_protocol, __u16 comm_id,
+ void *response_buffer, size_t buf_size, const int verbose)
+{
+ __u8 cdb[ATA_INQUIRY_LENGTH] = {0};
+ __u8 sense[SG_SENSE_SIZE] = {0};
+ __u8 *sense_desc = NULL;
+ sg_io_hdr_t sg = {0};
+
+ /*
+ * ATA Command Pass-Through, chapter 13.2.2
+ * SCSI Primary Commands - 4 (SPC-4)
+ * ATA Translation - 3 (SAT-3)
+ */
+ cdb[0] = ATA_PASS_THROUGH_12;
+ /* protocol, bits 1-4 */
+ cdb[1] = ATA_PIO_DATA_IN << 1;
+ /* Bytes: CK_COND=1, T_DIR = 1, BYTE_BLOCK = 1, Length in Sector Count = 2 */
+ cdb[2] = 0x2E;
+ cdb[3] = sec_protocol;
+ /* Sector count */
+ cdb[4] = buf_size / DEFAULT_SECTOR_SIZE;
+ cdb[6] = (comm_id) & 0xFF;
+ cdb[7] = (comm_id >> 8) & 0xFF;
+ cdb[9] = ata_command;
+
+ sg.interface_id = SG_INTERFACE_ID;
+ sg.cmd_len = sizeof(cdb);
+ sg.mx_sb_len = sizeof(sense);
+ sg.dxfer_direction = SG_DXFER_FROM_DEV;
+ sg.dxfer_len = buf_size;
+ sg.dxferp = response_buffer;
+ sg.cmdp = cdb;
+ sg.sbp = sense;
+ sg.timeout = SG_IO_TIMEOUT;
+ sg.usr_ptr = NULL;
+
+ if (ioctl(disk_fd, SG_IO, &sg) < 0) {
+ pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+ return MDADM_STATUS_ERROR;
+ }
+
+ if ((sg.status && sg.status != SG_CHECK_CONDITION) || sg.host_status ||
+ (sg.driver_status && sg.driver_status != SG_DRIVER_SENSE)) {
+ pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+ pr_vrb("SG_IO error: ATA_12 Status: %d Host Status: %d, Driver Status: %d\n",
+ sg.status, sg.host_status, sg.driver_status);
+ return MDADM_STATUS_ERROR;
+ }
+
+ /* verify expected sense response code */
+ if (!(sense[0] == SENSE_DATA_CURRENT_DESC || sense[0] == SENSE_DATA_CURRENT_FIXED)) {
+ pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+ return MDADM_STATUS_ERROR;
+ }
+
+ sense_desc = sense + SENSE_CURRENT_RES_DESC_POS;
+ /* verify sense data current response with descriptor format */
+ if (sense[0] == SENSE_DATA_CURRENT_DESC &&
+ !(sense_desc[0] == ATA_STATUS_RETURN_DESCRIPTOR &&
+ sense_desc[1] == ATA_INQUIRY_LENGTH)) {
+ pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n",
+ fd2kname(disk_fd), sense[2], sense[3]);
+ return MDADM_STATUS_ERROR;
+ }
+
+ /* verify sense data current response with fixed format */
+ if (sense[0] == SENSE_DATA_CURRENT_FIXED &&
+ !(sense[12] == ATA_PT_INFORMATION_AVAILABLE_ASC &&
+ sense[13] == ATA_PT_INFORMATION_AVAILABLE_ASCQ)) {
+ pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n",
+ fd2kname(disk_fd), sense[12], sense[13]);
+ return MDADM_STATUS_ERROR;
+ }
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_sec_prot_01h_supported_ata() - check if security protocol 01h supported for given SATA disk.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed.
+ */
+static drive_feat_sup_st is_sec_prot_01h_supported_ata(int disk_fd, const int verbose)
+{
+ supported_security_protocols_t security_protocols;
+
+ mdadm_status_t result = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_00,
+ 0x0, &security_protocols,
+ sizeof(security_protocols), verbose);
+ if (result)
+ return DRIVE_FEAT_CHECK_FAILED_ST;
+
+ if (is_sec_prot_01h_supported(&security_protocols))
+ return DRIVE_FEAT_SUP_ST;
+
+ return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * is_ata_trusted_computing_supported() - check if ata trusted computing supported.
+ * @buffer: buffer with ATA identify response, not NULL.
+ *
+ * Return: true if trusted computing bit set, false otherwise.
+ */
+bool is_ata_trusted_computing_supported(__u16 *buffer)
+{
+ /* Added due to warnings from the compiler about a possible uninitialized variable below. */
+ assert(buffer);
+
+ __u16 security_tc_frame = __le16_to_cpu(buffer[ATA_TRUSTED_COMPUTING_POS]);
+ ata_trusted_computing_t *security_tc = (ata_trusted_computing_t *)&security_tc_frame;
+
+ if (security_tc->tc_feature == 1)
+ return true;
+
+ return false;
+}
+
+/**
+ * get_ata_standard_security_status() - get ATA disk encryption information from ATA identify.
+ * @buffer: buffer with response from ATA identify, not NULL.
+ * @information: struct to fill out, describing encryption status of disk.
+ *
+ * The function based on the Security status frame from ATA identify,
+ * completed encryption information.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+static mdadm_status_t get_ata_standard_security_status(__u16 *buffer,
+ struct encryption_information *information)
+{
+ /* Added due to warnings from the compiler about a possible uninitialized variable below. */
+ assert(buffer);
+
+ __u16 security_status_frame = __le16_to_cpu(buffer[ATA_SECURITY_WORD_POSITION]);
+ ata_security_status_t *security_status = (ata_security_status_t *)&security_status_frame;
+
+ if (!security_status->security_supported) {
+ information->ability = ENC_ABILITY_NONE;
+ information->status = ENC_STATUS_UNENCRYPTED;
+
+ return MDADM_STATUS_SUCCESS;
+ }
+
+ information->ability = ENC_ABILITY_OTHER;
+
+ if (security_status->security_enabled == 0)
+ information->status = ENC_STATUS_UNENCRYPTED;
+ else if (security_status->security_locked == 1)
+ information->status = ENC_STATUS_LOCKED;
+ else
+ information->status = ENC_STATUS_UNLOCKED;
+
+ return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_ata_opal() - check if SATA disk support Opal.
+ * @disk_fd: a disk file descriptor.
+ * @buffer: buffer with ATA identify response.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check.
+ */
+static drive_feat_sup_st is_ata_opal(int disk_fd, __u16 *buffer_identify, const int verbose)
+{
+ bool tc_status = is_ata_trusted_computing_supported(buffer_identify);
+ drive_feat_sup_st tcg_sec_prot_status;
+
+ if (!tc_status)
+ return DRIVE_FEAT_NOT_SUP_ST;
+
+ tcg_sec_prot_status = is_sec_prot_01h_supported_ata(disk_fd, verbose);
+
+ if (tcg_sec_prot_status == DRIVE_FEAT_CHECK_FAILED_ST) {
+ pr_vrb("Failed to verify if security protocol 01h supported. Device /dev/%s.\n",
+ fd2kname(disk_fd));
+ return DRIVE_FEAT_CHECK_FAILED_ST;
+ }
+
+ if (tc_status && tcg_sec_prot_status == DRIVE_FEAT_SUP_ST)
+ return DRIVE_FEAT_SUP_ST;
+
+ return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * get_ata_encryption_information() - get ATA disk encryption information.
+ * @disk_fd: a disk file descriptor.
+ * @information: struct to fill out, describing encryption status of disk.
+ * @verbose: verbose flag.
+ *
+ * The function reads information about encryption, if the disk supports Opal,
+ * the information is completed based on Opal Level 0 discovery, otherwise,
+ * based on ATA security status frame from ATA identification response.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Based on the documentations ATA/ATAPI Command Set ATA8-ACS and
+ * AT Attachment-8 - ATA Serial Transport (ATA8-AST).
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+mdadm_status_t
+get_ata_encryption_information(int disk_fd, struct encryption_information *information,
+ const int verbose)
+{
+ __u8 buffer_opal_level0_discovery[OPAL_IO_BUFFER_LEN] = {0};
+ __u16 buffer_identify[ATA_IDENTIFY_RESPONSE_LEN] = {0};
+ drive_feat_sup_st ata_opal_status;
+ mdadm_status_t status;
+
+ /* Get disk ATA identification */
+ status = ata_pass_through12_ioctl(disk_fd, ATA_IDENTIFY, 0x0, 0x0, buffer_identify,
+ sizeof(buffer_identify), verbose);
+ if (status == MDADM_STATUS_ERROR)
+ return MDADM_STATUS_ERROR;
+
+ if (is_ata_trusted_computing_supported(buffer_identify) &&
+ !sysfs_is_libata_allow_tpm_enabled(verbose)) {
+ pr_vrb("For SATA with Trusted Computing support, required libata.tpm_enabled=1.\n");
+ return MDADM_STATUS_ERROR;
+ }
+
+ ata_opal_status = is_ata_opal(disk_fd, buffer_identify, verbose);
+ if (ata_opal_status == DRIVE_FEAT_CHECK_FAILED_ST)
+ return MDADM_STATUS_ERROR;
+
+ if (ata_opal_status == DRIVE_FEAT_NOT_SUP_ST)
+ return get_ata_standard_security_status(buffer_identify, information);
+
+ /* SATA Opal */
+ status = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_01,
+ OPAL_DISCOVERY_COMID, buffer_opal_level0_discovery,
+ OPAL_IO_BUFFER_LEN, verbose);
+ if (status != MDADM_STATUS_SUCCESS)
+ return MDADM_STATUS_ERROR;
+
+ return get_opal_encryption_information(buffer_opal_level0_discovery, information);
+}
diff --git a/drive_encryption.h b/drive_encryption.h
index 82c2c624..77c7f10f 100644
--- a/drive_encryption.h
+++ b/drive_encryption.h
@@ -30,3 +30,6 @@ typedef struct encryption_information {
mdadm_status_t
get_nvme_opal_encryption_information(int disk_fd, struct encryption_information *information,
const int verbose);
+mdadm_status_t
+get_ata_encryption_information(int disk_fd, struct encryption_information *information,
+ const int verbose);
diff --git a/mdadm.h b/mdadm.h
index fbb161ba..52a66b9a 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -853,6 +853,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
int source, unsigned long long read_offset,
unsigned long long start, unsigned long long length,
char *src_buf);
+extern bool sysfs_is_libata_allow_tpm_enabled(const int verbose);
#ifndef Sendmail
#define Sendmail "/usr/lib/sendmail -t"
diff --git a/sysfs.c b/sysfs.c
index 4ded1672..20fe1e9e 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -1121,3 +1121,32 @@ void sysfsline(char *line)
sr->next = sysfs_rules;
sysfs_rules = sr;
}
+
+/**
+ * sysfs_is_libata_allow_tpm_enabled() - check if libata allow_tmp is enabled.
+ * @verbose: verbose flag.
+ *
+ * Check if libata allow_tmp flag is set, this is required for SATA Opal Security commands to work.
+ *
+ * Return: true if allow_tpm enable, false otherwise.
+ */
+bool sysfs_is_libata_allow_tpm_enabled(const int verbose)
+{
+ const char *path = "/sys/module/libata/parameters/allow_tpm";
+ const char *expected_value = "1";
+ int fd = open(path, O_RDONLY);
+ char buf[3];
+
+ if (!is_fd_valid(fd)) {
+ pr_vrb("Failed open file descriptor to %s. Cannot check libata allow_tpm param.\n",
+ path);
+ return false;
+ }
+
+ sysfs_fd_get_str(fd, buf, sizeof(buf));
+ close(fd);
+
+ if (strncmp(buf, expected_value, 1) == 0)
+ return true;
+ return false;
+}
--
2.41.0

@ -1,447 +0,0 @@
From daa86d6634761796ada1f535c13e47fdd3cc95eb Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 22 Jun 2022 14:25:19 -0600
Subject: [PATCH 44/83] tests: Add broken files for all broken tests
Each broken file contains the rough frequency of brokeness as well
as a brief explanation of what happens when it breaks. Estimates
of failure rates are not statistically significant and can vary
run to run.
This is really just a view from my window. Tests were done on a
small VM with the default loop devices, not real hardware. We've
seen different kernel configurations can cause bugs to appear as well
(ie. different block schedulers). It may also be that different race
conditions will be seen on machines with different performance
characteristics.
These annotations were done with the kernel currently in md/md-next:
facef3b96c5b ("md: Notify sysfs sync_completed in md_reap_sync_thread()")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jes@trained-monkey.org>
---
tests/01r5integ.broken | 7 ++++
tests/01raid6integ.broken | 7 ++++
tests/04r5swap.broken | 7 ++++
tests/07autoassemble.broken | 8 ++++
tests/07autodetect.broken | 5 +++
tests/07changelevelintr.broken | 9 +++++
tests/07changelevels.broken | 9 +++++
tests/07reshape5intr.broken | 45 ++++++++++++++++++++++
tests/07revert-grow.broken | 31 +++++++++++++++
tests/07revert-shrink.broken | 9 +++++
tests/07testreshape5.broken | 12 ++++++
tests/09imsm-assemble.broken | 6 +++
tests/09imsm-create-fail-rebuild.broken | 5 +++
tests/09imsm-overlap.broken | 7 ++++
tests/10ddf-assemble-missing.broken | 6 +++
tests/10ddf-fail-create-race.broken | 7 ++++
tests/10ddf-fail-two-spares.broken | 5 +++
tests/10ddf-incremental-wrong-order.broken | 9 +++++
tests/14imsm-r1_2d-grow-r1_3d.broken | 5 +++
tests/14imsm-r1_2d-takeover-r0_2d.broken | 6 +++
tests/18imsm-r10_4d-takeover-r0_2d.broken | 5 +++
tests/18imsm-r1_2d-takeover-r0_1d.broken | 6 +++
tests/19raid6auto-repair.broken | 5 +++
tests/19raid6repair.broken | 5 +++
24 files changed, 226 insertions(+)
create mode 100644 tests/01r5integ.broken
create mode 100644 tests/01raid6integ.broken
create mode 100644 tests/04r5swap.broken
create mode 100644 tests/07autoassemble.broken
create mode 100644 tests/07autodetect.broken
create mode 100644 tests/07changelevelintr.broken
create mode 100644 tests/07changelevels.broken
create mode 100644 tests/07reshape5intr.broken
create mode 100644 tests/07revert-grow.broken
create mode 100644 tests/07revert-shrink.broken
create mode 100644 tests/07testreshape5.broken
create mode 100644 tests/09imsm-assemble.broken
create mode 100644 tests/09imsm-create-fail-rebuild.broken
create mode 100644 tests/09imsm-overlap.broken
create mode 100644 tests/10ddf-assemble-missing.broken
create mode 100644 tests/10ddf-fail-create-race.broken
create mode 100644 tests/10ddf-fail-two-spares.broken
create mode 100644 tests/10ddf-incremental-wrong-order.broken
create mode 100644 tests/14imsm-r1_2d-grow-r1_3d.broken
create mode 100644 tests/14imsm-r1_2d-takeover-r0_2d.broken
create mode 100644 tests/18imsm-r10_4d-takeover-r0_2d.broken
create mode 100644 tests/18imsm-r1_2d-takeover-r0_1d.broken
create mode 100644 tests/19raid6auto-repair.broken
create mode 100644 tests/19raid6repair.broken
diff --git a/tests/01r5integ.broken b/tests/01r5integ.broken
new file mode 100644
index 00000000..20737637
--- /dev/null
+++ b/tests/01r5integ.broken
@@ -0,0 +1,7 @@
+fails rarely
+
+Fails about 1 in every 30 runs with a sha mismatch error:
+
+ c49ab26e1b01def7874af9b8a6d6d0c29fdfafe6 /dev/md0 does not match
+ 15dc2f73262f811ada53c65e505ceec9cf025cb9 /dev/md0 with /dev/loop3
+ missing
diff --git a/tests/01raid6integ.broken b/tests/01raid6integ.broken
new file mode 100644
index 00000000..1df735f0
--- /dev/null
+++ b/tests/01raid6integ.broken
@@ -0,0 +1,7 @@
+fails infrequently
+
+Fails about 1 in 5 with a sha mismatch:
+
+ 8286c2bc045ae2cfe9f8b7ae3a898fa25db6926f /dev/md0 does not match
+ a083a0738b58caab37fd568b91b177035ded37df /dev/md0 with /dev/loop2 and
+ /dev/loop3 missing
diff --git a/tests/04r5swap.broken b/tests/04r5swap.broken
new file mode 100644
index 00000000..e38987db
--- /dev/null
+++ b/tests/04r5swap.broken
@@ -0,0 +1,7 @@
+always fails
+
+Fails with errors:
+
+ mdadm: /dev/loop0 has no superblock - assembly aborted
+
+ ERROR: no recovery happening
diff --git a/tests/07autoassemble.broken b/tests/07autoassemble.broken
new file mode 100644
index 00000000..8be09407
--- /dev/null
+++ b/tests/07autoassemble.broken
@@ -0,0 +1,8 @@
+always fails
+
+Prints lots of messages, but the array doesn't assemble. Error
+possibly related to:
+
+ mdadm: /dev/md/1 is busy - skipping
+ mdadm: no recogniseable superblock on /dev/md/testing:0
+ mdadm: /dev/md/2 is busy - skipping
diff --git a/tests/07autodetect.broken b/tests/07autodetect.broken
new file mode 100644
index 00000000..294954a1
--- /dev/null
+++ b/tests/07autodetect.broken
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+ ERROR: no resync happening
diff --git a/tests/07changelevelintr.broken b/tests/07changelevelintr.broken
new file mode 100644
index 00000000..284b4906
--- /dev/null
+++ b/tests/07changelevelintr.broken
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+ mdadm: this change will reduce the size of the array.
+ use --grow --array-size first to truncate array.
+ e.g. mdadm --grow /dev/md0 --array-size 56832
+
+ ERROR: no reshape happening
diff --git a/tests/07changelevels.broken b/tests/07changelevels.broken
new file mode 100644
index 00000000..9b930d93
--- /dev/null
+++ b/tests/07changelevels.broken
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+ mdadm: /dev/loop0 is smaller than given size. 18976K < 19968K + metadata
+ mdadm: /dev/loop1 is smaller than given size. 18976K < 19968K + metadata
+ mdadm: /dev/loop2 is smaller than given size. 18976K < 19968K + metadata
+
+ ERROR: /dev/md0 isn't a block device.
diff --git a/tests/07reshape5intr.broken b/tests/07reshape5intr.broken
new file mode 100644
index 00000000..efe52a66
--- /dev/null
+++ b/tests/07reshape5intr.broken
@@ -0,0 +1,45 @@
+always fails
+
+This patch, recently added to md-next causes the test to always fail:
+
+7e6ba434cc60 ("md: don't unregister sync_thread with reconfig_mutex
+held")
+
+The new error is simply:
+
+ ERROR: no reshape happening
+
+Before the patch, the error seen is below.
+
+--
+
+fails infrequently
+
+Fails roughly 1 in 4 runs with errors:
+
+ mdadm: Merging with already-assembled /dev/md/0
+ mdadm: cannot re-read metadata from /dev/loop6 - aborting
+
+ ERROR: no reshape happening
+
+Also have seen a random deadlock:
+
+ INFO: task mdadm:109702 blocked for more than 30 seconds.
+ Not tainted 5.18.0-rc3-eid-vmlocalyes-dbg-00095-g3c2b5427979d #2040
+ "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ task:mdadm state:D stack: 0 pid:109702 ppid: 1 flags:0x00004000
+ Call Trace:
+ <TASK>
+ __schedule+0x67e/0x13b0
+ schedule+0x82/0x110
+ mddev_suspend+0x2e1/0x330
+ suspend_lo_store+0xbd/0x140
+ md_attr_store+0xcb/0x130
+ sysfs_kf_write+0x89/0xb0
+ kernfs_fop_write_iter+0x202/0x2c0
+ new_sync_write+0x222/0x330
+ vfs_write+0x3bc/0x4d0
+ ksys_write+0xd9/0x180
+ __x64_sys_write+0x43/0x50
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
diff --git a/tests/07revert-grow.broken b/tests/07revert-grow.broken
new file mode 100644
index 00000000..9b6db86f
--- /dev/null
+++ b/tests/07revert-grow.broken
@@ -0,0 +1,31 @@
+always fails
+
+This patch, recently added to md-next causes the test to always fail:
+
+7e6ba434cc60 ("md: don't unregister sync_thread with reconfig_mutex held")
+
+The errors are:
+
+ mdadm: No active reshape to revert on /dev/loop0
+ ERROR: active raid5 not found
+
+Before the patch, the error seen is below.
+
+--
+
+fails rarely
+
+Fails about 1 in every 30 runs with errors:
+
+ mdadm: Merging with already-assembled /dev/md/0
+ mdadm: backup file /tmp/md-backup inaccessible: No such file or directory
+ mdadm: failed to add /dev/loop1 to /dev/md/0: Invalid argument
+ mdadm: failed to add /dev/loop2 to /dev/md/0: Invalid argument
+ mdadm: failed to add /dev/loop3 to /dev/md/0: Invalid argument
+ mdadm: failed to add /dev/loop0 to /dev/md/0: Invalid argument
+ mdadm: /dev/md/0 assembled from 1 drive - need all 5 to start it
+ (use --run to insist).
+
+ grep: /sys/block/md*/md/sync_action: No such file or directory
+
+ ERROR: active raid5 not found
diff --git a/tests/07revert-shrink.broken b/tests/07revert-shrink.broken
new file mode 100644
index 00000000..c33c39ec
--- /dev/null
+++ b/tests/07revert-shrink.broken
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+ mdadm: this change will reduce the size of the array.
+ use --grow --array-size first to truncate array.
+ e.g. mdadm --grow /dev/md0 --array-size 53760
+
+ ERROR: active raid5 not found
diff --git a/tests/07testreshape5.broken b/tests/07testreshape5.broken
new file mode 100644
index 00000000..a8ce03e4
--- /dev/null
+++ b/tests/07testreshape5.broken
@@ -0,0 +1,12 @@
+always fails
+
+Test seems to run 'test_stripe' at $dir directory, but $dir is never
+set. If $dir is adjusted to $PWD, the test still fails with:
+
+ mdadm: /dev/loop2 is not suitable for this array.
+ mdadm: create aborted
+ ++ return 1
+ ++ cmp -s -n 8192 /dev/md0 /tmp/RandFile
+ ++ echo cmp failed
+ cmp failed
+ ++ exit 2
diff --git a/tests/09imsm-assemble.broken b/tests/09imsm-assemble.broken
new file mode 100644
index 00000000..a6d4d5cf
--- /dev/null
+++ b/tests/09imsm-assemble.broken
@@ -0,0 +1,6 @@
+fails infrequently
+
+Fails roughly 1 in 10 runs with errors:
+
+ mdadm: /dev/loop2 is still in use, cannot remove.
+ /dev/loop2 removal from /dev/md/container should have succeeded
diff --git a/tests/09imsm-create-fail-rebuild.broken b/tests/09imsm-create-fail-rebuild.broken
new file mode 100644
index 00000000..40c4b294
--- /dev/null
+++ b/tests/09imsm-create-fail-rebuild.broken
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+ **Error**: Array size mismatch - expected 3072, actual 16384
diff --git a/tests/09imsm-overlap.broken b/tests/09imsm-overlap.broken
new file mode 100644
index 00000000..e7ccab76
--- /dev/null
+++ b/tests/09imsm-overlap.broken
@@ -0,0 +1,7 @@
+always fails
+
+Fails with errors:
+
+ **Error**: Offset mismatch - expected 15360, actual 0
+ **Error**: Offset mismatch - expected 15360, actual 0
+ /dev/md/vol3 failed check
diff --git a/tests/10ddf-assemble-missing.broken b/tests/10ddf-assemble-missing.broken
new file mode 100644
index 00000000..bfd8d103
--- /dev/null
+++ b/tests/10ddf-assemble-missing.broken
@@ -0,0 +1,6 @@
+always fails
+
+Fails with errors:
+
+ ERROR: /dev/md/vol0 has unexpected state on /dev/loop10
+ ERROR: unexpected number of online disks on /dev/loop10
diff --git a/tests/10ddf-fail-create-race.broken b/tests/10ddf-fail-create-race.broken
new file mode 100644
index 00000000..6c0df023
--- /dev/null
+++ b/tests/10ddf-fail-create-race.broken
@@ -0,0 +1,7 @@
+usually fails
+
+Fails about 9 out of 10 times with many errors:
+
+ mdadm: cannot open MISSING: No such file or directory
+ ERROR: non-degraded array found
+ ERROR: disk 0 not marked as failed in meta data
diff --git a/tests/10ddf-fail-two-spares.broken b/tests/10ddf-fail-two-spares.broken
new file mode 100644
index 00000000..eeea56d9
--- /dev/null
+++ b/tests/10ddf-fail-two-spares.broken
@@ -0,0 +1,5 @@
+fails infrequently
+
+Fails roughly 1 in 3 with error:
+
+ ERROR: /dev/md/vol1 should be optimal in meta data
diff --git a/tests/10ddf-incremental-wrong-order.broken b/tests/10ddf-incremental-wrong-order.broken
new file mode 100644
index 00000000..a5af3bab
--- /dev/null
+++ b/tests/10ddf-incremental-wrong-order.broken
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+ ERROR: sha1sum of /dev/md/vol0 has changed
+ ERROR: /dev/md/vol0 has unexpected state on /dev/loop10
+ ERROR: unexpected number of online disks on /dev/loop10
+ ERROR: /dev/md/vol0 has unexpected state on /dev/loop8
+ ERROR: unexpected number of online disks on /dev/loop8
+ ERROR: sha1sum of /dev/md/vol0 has changed
diff --git a/tests/14imsm-r1_2d-grow-r1_3d.broken b/tests/14imsm-r1_2d-grow-r1_3d.broken
new file mode 100644
index 00000000..4ef1d406
--- /dev/null
+++ b/tests/14imsm-r1_2d-grow-r1_3d.broken
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+ mdadm/tests/func.sh: line 325: dvsize/chunk: division by 0 (error token is "chunk")
diff --git a/tests/14imsm-r1_2d-takeover-r0_2d.broken b/tests/14imsm-r1_2d-takeover-r0_2d.broken
new file mode 100644
index 00000000..89cd4e57
--- /dev/null
+++ b/tests/14imsm-r1_2d-takeover-r0_2d.broken
@@ -0,0 +1,6 @@
+always fails
+
+Fails with error:
+
+ tests/func.sh: line 325: dvsize/chunk: division by 0 (error token
+ is "chunk")
diff --git a/tests/18imsm-r10_4d-takeover-r0_2d.broken b/tests/18imsm-r10_4d-takeover-r0_2d.broken
new file mode 100644
index 00000000..a27399f5
--- /dev/null
+++ b/tests/18imsm-r10_4d-takeover-r0_2d.broken
@@ -0,0 +1,5 @@
+fails rarely
+
+Fails about 1 run in 100 with message:
+
+ ERROR: size is wrong for /dev/md/vol0: 2 * 5120 (chunk=128) = 20480, not 0
diff --git a/tests/18imsm-r1_2d-takeover-r0_1d.broken b/tests/18imsm-r1_2d-takeover-r0_1d.broken
new file mode 100644
index 00000000..aa1982e6
--- /dev/null
+++ b/tests/18imsm-r1_2d-takeover-r0_1d.broken
@@ -0,0 +1,6 @@
+always fails
+
+Fails with error:
+
+ tests/func.sh: line 325: dvsize/chunk: division by 0 (error token
+ is "chunk")
diff --git a/tests/19raid6auto-repair.broken b/tests/19raid6auto-repair.broken
new file mode 100644
index 00000000..e91a1425
--- /dev/null
+++ b/tests/19raid6auto-repair.broken
@@ -0,0 +1,5 @@
+always fails
+
+Fails with:
+
+ "should detect errors"
diff --git a/tests/19raid6repair.broken b/tests/19raid6repair.broken
new file mode 100644
index 00000000..e91a1425
--- /dev/null
+++ b/tests/19raid6repair.broken
@@ -0,0 +1,5 @@
+always fails
+
+Fails with:
+
+ "should detect errors"
--
2.38.1

@ -0,0 +1,163 @@
From 336e13fc5ef43bc5b4633a9dadac5f7208e6c241 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:18 +0100
Subject: [PATCH 45/66] Add key ENCRYPTION_NO_VERIFY to conf
Add ENCRYPTION_NO_VERIFY config key and allow to disable checking
encryption status for given type of drives.
The key is introduced because of SATA Opal disks for which TPM commands
must be enabled in libata kernel module, (libata.allow_tpm=1), otherwise
it is impossible to verify encryption status. TPM commands are disabled by
default.
Currently the key only supports the "sata_opal" value, if necessary,
the functionality is ready to support more types of disks. This
functionality will be used in the next patches.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
config.c | 25 ++++++++++++++++++++++++-
drive_encryption.c | 16 ++++++++++++----
mdadm.conf.5.in | 13 +++++++++++++
mdadm.h | 1 +
4 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/config.c b/config.c
index 44f7dd2f..b46d71cb 100644
--- a/config.c
+++ b/config.c
@@ -81,7 +81,7 @@ char DefaultAltConfDir[] = CONFFILE2 ".d";
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs,
- MonitorDelay, LTEnd };
+ MonitorDelay, EncryptionNoVerify, LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
@@ -96,6 +96,7 @@ char *keywords[] = {
[PartPolicy]="part-policy",
[Sysfs] = "sysfs",
[MonitorDelay] = "monitordelay",
+ [EncryptionNoVerify] = "ENCRYPTION_NO_VERIFY",
[LTEnd] = NULL
};
@@ -729,6 +730,19 @@ void monitordelayline(char *line)
}
}
+static bool sata_opal_encryption_no_verify;
+void encryption_no_verify_line(char *line)
+{
+ char *word;
+
+ for (word = dl_next(line); word != line; word = dl_next(word)) {
+ if (strcasecmp(word, "sata_opal") == 0)
+ sata_opal_encryption_no_verify = true;
+ else
+ pr_err("unrecognised word on ENCRYPTION_NO_VERIFY line: %s\n", word);
+ }
+}
+
char auto_yes[] = "yes";
char auto_no[] = "no";
char auto_homehost[] = "homehost";
@@ -913,6 +927,9 @@ void conf_file(FILE *f)
case MonitorDelay:
monitordelayline(line);
break;
+ case EncryptionNoVerify:
+ encryption_no_verify_line(line);
+ break;
default:
pr_err("Unknown keyword %s\n", line);
}
@@ -1075,6 +1092,12 @@ int conf_get_monitor_delay(void)
return monitor_delay;
}
+bool conf_get_sata_opal_encryption_no_verify(void)
+{
+ load_conffile();
+ return sata_opal_encryption_no_verify;
+}
+
struct createinfo *conf_get_create_info(void)
{
load_conffile();
diff --git a/drive_encryption.c b/drive_encryption.c
index d520f0c7..6b2bd358 100644
--- a/drive_encryption.c
+++ b/drive_encryption.c
@@ -656,10 +656,18 @@ get_ata_encryption_information(int disk_fd, struct encryption_information *infor
if (status == MDADM_STATUS_ERROR)
return MDADM_STATUS_ERROR;
- if (is_ata_trusted_computing_supported(buffer_identify) &&
- !sysfs_is_libata_allow_tpm_enabled(verbose)) {
- pr_vrb("For SATA with Trusted Computing support, required libata.tpm_enabled=1.\n");
- return MDADM_STATUS_ERROR;
+ /* Possible OPAL support, further checks require tpm_enabled.*/
+ if (is_ata_trusted_computing_supported(buffer_identify)) {
+ /* OPAL SATA encryption checking disabled. */
+ if (conf_get_sata_opal_encryption_no_verify())
+ return MDADM_STATUS_SUCCESS;
+
+ if (!sysfs_is_libata_allow_tpm_enabled(verbose)) {
+ pr_vrb("Detected SATA drive /dev/%s with Trusted Computing support.\n",
+ fd2kname(disk_fd));
+ pr_vrb("Cannot verify encryption state. Requires libata.tpm_enabled=1.\n");
+ return MDADM_STATUS_ERROR;
+ }
}
ata_opal_status = is_ata_opal(disk_fd, buffer_identify, verbose);
diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in
index 787e51e9..afb0a296 100644
--- a/mdadm.conf.5.in
+++ b/mdadm.conf.5.in
@@ -636,6 +636,17 @@ If multiple
.B MINITORDELAY
lines are provided, only first non-zero value is considered.
+.TP
+.B ENCRYPTION_NO_VERIFY
+The
+.B ENCRYPTION_NO_VERIFY
+disables encryption verification for devices with particular encryption support detected.
+Currently, only verification of SATA OPAL encryption can be disabled.
+It does not disable ATA security encryption verification.
+Available parameter
+.I "sata_opal".
+
+
.SH FILES
.SS {CONFFILE}
@@ -744,6 +755,8 @@ SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4
sync_speed_max=1000000
.br
MONITORDELAY 60
+.br
+ENCRYPTION_NO_VERIFY sata_opal
.SH SEE ALSO
.BR mdadm (8),
diff --git a/mdadm.h b/mdadm.h
index 52a66b9a..2640b396 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1673,6 +1673,7 @@ extern char *conf_get_program(void);
extern char *conf_get_homehost(int *require_homehostp);
extern char *conf_get_homecluster(void);
extern int conf_get_monitor_delay(void);
+extern bool conf_get_sata_opal_encryption_no_verify(void);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
extern void print_quoted(char *str);
--
2.41.0

@ -1,316 +0,0 @@
From 239b3cc0b5da87e966746533b1873c439db54b16 Mon Sep 17 00:00:00 2001
From: Mateusz Grzonka <mateusz.grzonka@intel.com>
Date: Fri, 12 Aug 2022 16:36:02 +0200
Subject: [PATCH 45/83] mdadm: Replace obsolete usleep with nanosleep
According to POSIX.1-2001, usleep is considered obsolete.
Replace it with a wrapper that uses nanosleep, as recommended in man.
Add handy macros for conversions between msec, usec and nsec.
Signed-off-by: Mateusz Grzonka <mateusz.grzonka@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 2 +-
Grow.c | 4 ++--
Manage.c | 10 +++++-----
managemon.c | 8 ++++----
mdadm.h | 4 ++++
mdmon.c | 4 ++--
super-intel.c | 6 +++---
util.c | 42 +++++++++++++++++++++++++++++++++---------
8 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 6df6bfbc..be2160b4 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1947,7 +1947,7 @@ out:
break;
close(mdfd);
}
- usleep(usecs);
+ sleep_for(0, USEC_TO_NSEC(usecs), true);
usecs <<= 1;
}
}
diff --git a/Grow.c b/Grow.c
index 97f22c75..5780635a 100644
--- a/Grow.c
+++ b/Grow.c
@@ -954,7 +954,7 @@ int start_reshape(struct mdinfo *sra, int already_running,
err = sysfs_set_str(sra, NULL, "sync_action",
"reshape");
if (err)
- sleep(1);
+ sleep_for(1, 0, true);
} while (err && errno == EBUSY && cnt-- > 0);
}
return err;
@@ -5058,7 +5058,7 @@ int Grow_continue_command(char *devname, int fd,
}
st->ss->getinfo_super(st, content, NULL);
if (!content->reshape_active)
- sleep(3);
+ sleep_for(3, 0, true);
else
break;
} while (cnt-- > 0);
diff --git a/Manage.c b/Manage.c
index e5e6abe4..a142f8bd 100644
--- a/Manage.c
+++ b/Manage.c
@@ -244,7 +244,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
"array_state",
"inactive")) < 0 &&
errno == EBUSY) {
- usleep(200000);
+ sleep_for(0, MSEC_TO_NSEC(200), true);
count--;
}
if (err) {
@@ -328,7 +328,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
/* must be in the critical section - wait a bit */
delay -= 1;
- usleep(100000);
+ sleep_for(0, MSEC_TO_NSEC(100), true);
}
if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
@@ -405,7 +405,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
* quite started yet. Wait a bit and
* check 'sync_action' to see.
*/
- usleep(10000);
+ sleep_for(0, MSEC_TO_NSEC(10), true);
sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
if (strncmp(buf, "reshape", 7) != 0)
break;
@@ -447,7 +447,7 @@ done:
count = 25; err = 0;
while (count && fd >= 0 &&
(err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
- usleep(200000);
+ sleep_for(0, MSEC_TO_NSEC(200), true);
count --;
}
if (fd >= 0 && err) {
@@ -1105,7 +1105,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
ret = sysfs_unique_holder(devnm, rdev);
if (ret < 2)
break;
- usleep(100 * 1000); /* 100ms */
+ sleep_for(0, MSEC_TO_NSEC(100), true);
} while (--count > 0);
if (ret == 0) {
diff --git a/managemon.c b/managemon.c
index 0e9bdf00..a7bfa8f6 100644
--- a/managemon.c
+++ b/managemon.c
@@ -207,7 +207,7 @@ static void replace_array(struct supertype *container,
remove_old();
while (pending_discard) {
while (discard_this == NULL)
- sleep(1);
+ sleep_for(1, 0, true);
remove_old();
}
pending_discard = old;
@@ -568,7 +568,7 @@ static void manage_member(struct mdstat_ent *mdstat,
updates = NULL;
while (update_queue_pending || update_queue) {
check_update_queue(container);
- usleep(15*1000);
+ sleep_for(0, MSEC_TO_NSEC(15), true);
}
replace_array(container, a, newa);
if (sysfs_set_str(&a->info, NULL,
@@ -822,7 +822,7 @@ static void handle_message(struct supertype *container, struct metadata_update *
if (msg->len <= 0)
while (update_queue_pending || update_queue) {
check_update_queue(container);
- usleep(15*1000);
+ sleep_for(0, MSEC_TO_NSEC(15), true);
}
if (msg->len == 0) { /* ping_monitor */
@@ -836,7 +836,7 @@ static void handle_message(struct supertype *container, struct metadata_update *
wakeup_monitor();
while (monitor_loop_cnt - cnt < 0)
- usleep(10 * 1000);
+ sleep_for(0, MSEC_TO_NSEC(10), true);
} else if (msg->len == -1) { /* ping_manager */
struct mdstat_ent *mdstat = mdstat_read(1, 0);
diff --git a/mdadm.h b/mdadm.h
index 163f4a49..add9c0b6 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1720,6 +1720,10 @@ extern int cluster_get_dlmlock(void);
extern int cluster_release_dlmlock(void);
extern void set_dlm_hooks(void);
+#define MSEC_TO_NSEC(msec) ((msec) * 1000000)
+#define USEC_TO_NSEC(usec) ((usec) * 1000)
+extern void sleep_for(unsigned int sec, long nsec, bool wake_after_interrupt);
+
#define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1))
#define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base))
#define ROUND_UP_PTR(ptr, base) ((typeof(ptr)) \
diff --git a/mdmon.c b/mdmon.c
index c057da63..e9d035eb 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -99,7 +99,7 @@ static int clone_monitor(struct supertype *container)
if (rc)
return rc;
while (mon_tid == -1)
- usleep(10);
+ sleep_for(0, USEC_TO_NSEC(10), true);
pthread_attr_destroy(&attr);
mgr_tid = syscall(SYS_gettid);
@@ -209,7 +209,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
rv = kill(pid, SIGUSR1);
if (rv < 0)
break;
- usleep(200000);
+ sleep_for(0, MSEC_TO_NSEC(200), true);
}
}
diff --git a/super-intel.c b/super-intel.c
index 4ddfcf94..4d82af3d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -5275,7 +5275,7 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d
/* retry the load if we might have raced against mdmon */
if (err == 3 && devnm && mdmon_running(devnm))
for (retry = 0; retry < 3; retry++) {
- usleep(3000);
+ sleep_for(0, MSEC_TO_NSEC(3), true);
err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
if (err != 3)
break;
@@ -5377,7 +5377,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) {
for (retry = 0; retry < 3; retry++) {
- usleep(3000);
+ sleep_for(0, MSEC_TO_NSEC(3), true);
rv = load_and_parse_mpb(fd, super, devname, 0);
if (rv != 3)
break;
@@ -12084,7 +12084,7 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
close(fd);
return 1;
}
- usleep(30000);
+ sleep_for(0, MSEC_TO_NSEC(30), true);
} else
break;
} while (retry--);
diff --git a/util.c b/util.c
index 38f0420e..ca48d976 100644
--- a/util.c
+++ b/util.c
@@ -166,7 +166,7 @@ retry:
pr_err("error %d when get PW mode on lock %s\n", errno, str);
/* let's try several times if EAGAIN happened */
if (dlm_lock_res->lksb.sb_status == EAGAIN && retry_count < 10) {
- sleep(10);
+ sleep_for(10, 0, true);
retry_count++;
goto retry;
}
@@ -1085,7 +1085,7 @@ int open_dev_excl(char *devnm)
int i;
int flags = O_RDWR;
dev_t devid = devnm2devid(devnm);
- long delay = 1000;
+ unsigned int delay = 1; // miliseconds
sprintf(buf, "%d:%d", major(devid), minor(devid));
for (i = 0; i < 25; i++) {
@@ -1098,8 +1098,8 @@ int open_dev_excl(char *devnm)
}
if (errno != EBUSY)
return fd;
- usleep(delay);
- if (delay < 200000)
+ sleep_for(0, MSEC_TO_NSEC(delay), true);
+ if (delay < 200)
delay *= 2;
}
return -1;
@@ -1123,7 +1123,7 @@ void wait_for(char *dev, int fd)
{
int i;
struct stat stb_want;
- long delay = 1000;
+ unsigned int delay = 1; // miliseconds
if (fstat(fd, &stb_want) != 0 ||
(stb_want.st_mode & S_IFMT) != S_IFBLK)
@@ -1135,8 +1135,8 @@ void wait_for(char *dev, int fd)
(stb.st_mode & S_IFMT) == S_IFBLK &&
(stb.st_rdev == stb_want.st_rdev))
return;
- usleep(delay);
- if (delay < 200000)
+ sleep_for(0, MSEC_TO_NSEC(delay), true);
+ if (delay < 200)
delay *= 2;
}
if (i == 25)
@@ -1821,7 +1821,7 @@ int hot_remove_disk(int mdfd, unsigned long dev, int force)
while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
errno == EBUSY &&
cnt-- > 0)
- usleep(10000);
+ sleep_for(0, MSEC_TO_NSEC(10), true);
return ret;
}
@@ -1834,7 +1834,7 @@ int sys_hot_remove_disk(int statefd, int force)
while ((ret = write(statefd, "remove", 6)) == -1 &&
errno == EBUSY &&
cnt-- > 0)
- usleep(10000);
+ sleep_for(0, MSEC_TO_NSEC(10), true);
return ret == 6 ? 0 : -1;
}
@@ -2375,3 +2375,27 @@ out:
close(fd_zero);
return ret;
}
+
+/**
+ * sleep_for() - Sleeps for specified time.
+ * @sec: Seconds to sleep for.
+ * @nsec: Nanoseconds to sleep for, has to be less than one second.
+ * @wake_after_interrupt: If set, wake up if interrupted.
+ *
+ * Function immediately returns if error different than EINTR occurs.
+ */
+void sleep_for(unsigned int sec, long nsec, bool wake_after_interrupt)
+{
+ struct timespec delay = {.tv_sec = sec, .tv_nsec = nsec};
+
+ assert(nsec < MSEC_TO_NSEC(1000));
+
+ do {
+ errno = 0;
+ nanosleep(&delay, &delay);
+ if (errno != 0 && errno != EINTR) {
+ pr_err("Error sleeping for %us %ldns: %s\n", sec, nsec, strerror(errno));
+ return;
+ }
+ } while (!wake_after_interrupt && errno == EINTR);
+}
--
2.38.1

@ -0,0 +1,217 @@
From bf62ed5d9642aa60abf4ac2d1d89f173bd66ae48 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:19 +0100
Subject: [PATCH 46/66] imsm: print disk encryption information
Print SATA/NVMe disk encryption information in --detail-platform.
Encryption Ability and Status will be printed for each disk.
There is one exception, Opal SATA drives encryption is not checked when
ENCRYPTION_NO_VERIFY key with "sata_opal" value is set in conf, for this
reason such drives are treated as without encryption support.
To test this feature, drives SATA/NVMe with Opal support or SATA drives
with encryption support have to be used.
Example outputs of --detail-platform:
Non Opal, encryption enabled, SATA drive:
Port0 : /dev/sdc (CVPR050600G3120LGN)
Encryption(Ability|Status): Other|Unlocked
NVMe drive without Opal support:
NVMe under VMD : /dev/nvme2n1 (PHLF737302GB1P0GGN)
Encryption(Ability|Status): None|Unencrypted
Unencrypted SATA drive with OPAL support:
- default allow_tpm, we will get an error from mdadm:
Port6 : /dev/sdi (CVTS4246015V180IGN)
mdadm: Detected SATA drive /dev/sdi with Trusted Computing support.
mdadm: Cannot verify encryption state. Requires libata.tpm_enabled=1.
mdadm: Failed to get drive encrytpion information.
- default "allow_tpm" and config entry "ENCRYPTION_NO_VERIFY sata_opal":
Port6 : /dev/sdi (CVTS4246015V180IGN)
Encryption(Ability|Status): None|Unencrypted
- added "libata.allow_tpm=1" to boot parameters(requires reboot),
the status will be read correctly:
Port6 : /dev/sdi (CVTS4246015V180IGN)
Encryption(Ability|Status): SED|Unencrypted
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
drive_encryption.c | 36 ++++++++++++++++++++++++++++++++++++
drive_encryption.h | 2 ++
mdadm.conf.5.in | 3 +++
super-intel.c | 42 ++++++++++++++++++++++++++++++++++++++----
4 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/drive_encryption.c b/drive_encryption.c
index 6b2bd358..27da9621 100644
--- a/drive_encryption.c
+++ b/drive_encryption.c
@@ -141,6 +141,42 @@ typedef struct ata_trusted_computing {
__u16 var2 : 1;
} __attribute__((__packed__)) ata_trusted_computing_t;
+mapping_t encryption_ability_map[] = {
+ { "None", ENC_ABILITY_NONE },
+ { "Other", ENC_ABILITY_OTHER },
+ { "SED", ENC_ABILITY_SED },
+ { NULL, UnSet }
+};
+
+mapping_t encryption_status_map[] = {
+ { "Unencrypted", ENC_STATUS_UNENCRYPTED },
+ { "Locked", ENC_STATUS_LOCKED },
+ { "Unlocked", ENC_STATUS_UNLOCKED },
+ { NULL, UnSet }
+};
+
+/**
+ * get_encryption_ability_string() - get encryption ability name string.
+ * @ability: encryption ability enum.
+ *
+ * Return: encryption ability string.
+ */
+const char *get_encryption_ability_string(enum encryption_ability ability)
+{
+ return map_num_s(encryption_ability_map, ability);
+}
+
+/**
+ * get_encryption_status_string() - get encryption status name string.
+ * @ability: encryption status enum.
+ *
+ * Return: encryption status string.
+ */
+const char *get_encryption_status_string(enum encryption_status status)
+{
+ return map_num_s(encryption_status_map, status);
+}
+
/**
* get_opal_locking_feature_description() - get opal locking feature description.
* @response: response from Opal Discovery Level 0.
diff --git a/drive_encryption.h b/drive_encryption.h
index 77c7f10f..0cb8ff1b 100644
--- a/drive_encryption.h
+++ b/drive_encryption.h
@@ -33,3 +33,5 @@ get_nvme_opal_encryption_information(int disk_fd, struct encryption_information
mdadm_status_t
get_ata_encryption_information(int disk_fd, struct encryption_information *information,
const int verbose);
+const char *get_encryption_ability_string(enum encryption_ability ability);
+const char *get_encryption_status_string(enum encryption_status status);
diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in
index afb0a296..14302a91 100644
--- a/mdadm.conf.5.in
+++ b/mdadm.conf.5.in
@@ -643,6 +643,9 @@ The
disables encryption verification for devices with particular encryption support detected.
Currently, only verification of SATA OPAL encryption can be disabled.
It does not disable ATA security encryption verification.
+Currently effective only for
+.I IMSM
+metadata.
Available parameter
.I "sata_opal".
diff --git a/super-intel.c b/super-intel.c
index 212387ec..fbd1c11f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -27,6 +27,7 @@
#include <scsi/sg.h>
#include <ctype.h>
#include <dirent.h>
+#include "drive_encryption.h"
/* MPB == Metadata Parameter Block */
#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
@@ -2349,12 +2350,41 @@ static int imsm_read_serial(int fd, char *devname, __u8 *serial,
size_t serial_buf_len);
static void fd2devname(int fd, char *name);
-static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
+void print_encryption_information(int disk_fd, enum sys_dev_type hba_type)
+{
+ struct encryption_information information = {0};
+ mdadm_status_t status = MDADM_STATUS_SUCCESS;
+ const char *indent = " ";
+
+ switch (hba_type) {
+ case SYS_DEV_VMD:
+ case SYS_DEV_NVME:
+ status = get_nvme_opal_encryption_information(disk_fd, &information, 1);
+ break;
+ case SYS_DEV_SATA:
+ case SYS_DEV_SATA_VMD:
+ status = get_ata_encryption_information(disk_fd, &information, 1);
+ break;
+ default:
+ return;
+ }
+
+ if (status) {
+ pr_err("Failed to get drive encryption information.\n");
+ return;
+ }
+
+ printf("%sEncryption(Ability|Status): %s|%s\n", indent,
+ get_encryption_ability_string(information.ability),
+ get_encryption_status_string(information.status));
+}
+
+static int ahci_enumerate_ports(struct sys_dev *hba, int port_count, int host_base, int verbose)
{
/* dump an unsorted list of devices attached to AHCI Intel storage
* controller, as well as non-connected ports
*/
- int hba_len = strlen(hba_path) + 1;
+ int hba_len = strlen(hba->path) + 1;
struct dirent *ent;
DIR *dir;
char *path = NULL;
@@ -2390,7 +2420,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
path = devt_to_devpath(makedev(major, minor), 1, NULL);
if (!path)
continue;
- if (!path_attached_to_hba(path, hba_path)) {
+ if (!path_attached_to_hba(path, hba->path)) {
free(path);
path = NULL;
continue;
@@ -2493,6 +2523,8 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
printf(" (%s)\n", buf);
else
printf(" ()\n");
+
+ print_encryption_information(fd, hba->type);
close(fd);
}
free(path);
@@ -2557,6 +2589,8 @@ static int print_nvme_info(struct sys_dev *hba)
else
printf("()\n");
+ print_encryption_information(fd, hba->type);
+
skip:
close_fd(&fd);
}
@@ -2812,7 +2846,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
hba->path, get_sys_dev_type(hba->type));
if (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD) {
host_base = ahci_get_port_count(hba->path, &port_count);
- if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
+ if (ahci_enumerate_ports(hba, port_count, host_base, verbose)) {
if (verbose > 0)
pr_err("failed to enumerate ports on %s controller at %s.\n",
get_sys_dev_type(hba->type), hba->pci_id);
--
2.41.0

@ -1,36 +0,0 @@
From 39b381252c32275079344d30de18b76fda4bba26 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 27 Jul 2022 15:52:45 -0600
Subject: [PATCH 46/83] tests/00readonly: Run udevadm settle before setting ro
In some recent kernel versions, 00readonly fails with:
mdadm: failed to set readonly for /dev/md0: Device or resource busy
ERROR: array is not read-only!
This was traced down to a race condition with udev holding a reference
to the block device at the same time as trying to set it read only.
To fix this, call udevadm settle before setting the array read only.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
tests/00readonly | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/00readonly b/tests/00readonly
index 39202487..afe243b3 100644
--- a/tests/00readonly
+++ b/tests/00readonly
@@ -12,6 +12,7 @@ do
$dev1 $dev2 $dev3 $dev4 --assume-clean
check nosync
check $level
+ udevadm settle
mdadm -ro $md0
check readonly
state=$(cat /sys/block/md0/md/array_state)
--
2.38.1

@ -0,0 +1,114 @@
From acb8f13be88c224eb1e01f72c1e1fda955bc80ba Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 22 Mar 2024 12:51:20 +0100
Subject: [PATCH 47/66] imsm: drive encryption policy implementation
IMSM cares about drive encryption state. It is not allowed to mix disks
with different encryption state within one md device. This policy will
verify that attempt to use disks with different encryption states will
fail. Verification is performed for devices NVMe/SATA Opal and SATA.
There is one exception, Opal SATA drives encryption is not checked when
ENCRYPTION_NO_VERIFY key with "sata_opal" value is set in conf, for this
reason such drives are treated as without encryption support.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
super-intel.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index fbd1c11f..1faab607 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -11291,6 +11291,78 @@ test_and_add_drive_controller_policy_imsm(const char * const type, dev_policy_t
return MDADM_STATUS_ERROR;
}
+/**
+ * test_and_add_drive_encryption_policy_imsm() - add disk encryption to policies list.
+ * @type: policy type to search in the list.
+ * @pols: list of currently recorded policies.
+ * @disk_fd: file descriptor of the device to check.
+ * @hba: The hba to which the drive is attached, could be NULL if verification is disabled.
+ * @verbose: verbose flag.
+ *
+ * IMSM cares about drive encryption state. It is not allowed to mix disks with different
+ * encryption state within one md device.
+ * If there is no encryption policy on pols we are free to add first one.
+ * If there is a policy then, new must be the same.
+ */
+static mdadm_status_t
+test_and_add_drive_encryption_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd,
+ struct sys_dev *hba, const int verbose)
+{
+ struct dev_policy *expected_policy = pol_find(*pols, (char *)type);
+ struct encryption_information information = {0};
+ char *encryption_state = "Unknown";
+ int status = MDADM_STATUS_SUCCESS;
+ bool encryption_checked = true;
+ char devname[PATH_MAX];
+
+ if (!hba)
+ goto check_policy;
+
+ switch (hba->type) {
+ case SYS_DEV_NVME:
+ case SYS_DEV_VMD:
+ status = get_nvme_opal_encryption_information(disk_fd, &information, verbose);
+ break;
+ case SYS_DEV_SATA:
+ case SYS_DEV_SATA_VMD:
+ status = get_ata_encryption_information(disk_fd, &information, verbose);
+ break;
+ default:
+ encryption_checked = false;
+ }
+
+ if (status) {
+ fd2devname(disk_fd, devname);
+ pr_vrb("Failed to read encryption information of device %s\n", devname);
+ return MDADM_STATUS_ERROR;
+ }
+
+ if (encryption_checked) {
+ if (information.status == ENC_STATUS_LOCKED) {
+ fd2devname(disk_fd, devname);
+ pr_vrb("Device %s is in Locked state, cannot use. Aborting.\n", devname);
+ return MDADM_STATUS_ERROR;
+ }
+ encryption_state = (char *)get_encryption_status_string(information.status);
+ }
+
+check_policy:
+ if (expected_policy) {
+ if (strcmp(expected_policy->value, encryption_state) == 0)
+ return MDADM_STATUS_SUCCESS;
+
+ fd2devname(disk_fd, devname);
+ pr_vrb("Encryption status \"%s\" detected for disk %s, but \"%s\" status was detected eariler.\n",
+ encryption_state, devname, expected_policy->value);
+ pr_vrb("Disks with different encryption status cannot be used.\n");
+ return MDADM_STATUS_ERROR;
+ }
+
+ pol_add(pols, (char *)type, encryption_state, "imsm");
+
+ return MDADM_STATUS_SUCCESS;
+}
+
struct imsm_drive_policy {
char *type;
mdadm_status_t (*test_and_add_drive_policy)(const char * const type,
@@ -11300,6 +11372,7 @@ struct imsm_drive_policy {
struct imsm_drive_policy imsm_policies[] = {
{"controller", test_and_add_drive_controller_policy_imsm},
+ {"encryption", test_and_add_drive_encryption_policy_imsm}
};
mdadm_status_t test_and_add_drive_policies_imsm(struct dev_policy **pols, int disk_fd,
--
2.41.0

@ -1,119 +0,0 @@
From b7671c82010ffc04dfaecff2dd19ef8b2283e2b6 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 19 Jul 2022 14:48:21 +0200
Subject: [PATCH 47/83] tests: add test for names
Current behavior is not documented and tested. This test is a base for
future improvements. It is enough to test it only with native metadata,
because it is generic code. Generated properties are passed to metadata
handler.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
tests/00createnames | 93 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
create mode 100644 tests/00createnames
diff --git a/tests/00createnames b/tests/00createnames
new file mode 100644
index 00000000..64b81b92
--- /dev/null
+++ b/tests/00createnames
@@ -0,0 +1,93 @@
+set -x -e
+
+# Test how <devname> and --name= are handled for create mode.
+# We need to check three properties, generated from those parameters:
+# - devnode name
+# - link in /dev/md/ (MD_DEVNAME property from --detail --export)
+# - name in metadata (MD_NAME property from --examine --export)
+
+function _verify() {
+ local DEVNODE_NAME="$1"
+ local WANTED_LINK="$2"
+ local WANTED_NAME="$3"
+
+ local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_DEVNAME)"
+ if [[ "$?" != "0" ]]; then
+ echo "Cannot get details for $DEVNODE_NAME - unexpected devnode."
+ exit 1
+ fi
+
+ if [[ "$WANTED_LINK" != "empty" ]]; then
+ local EXPECTED="MD_DEVNAME=$WANTED_LINK"
+ if [[ "$RES" != "$EXPECTED" ]]; then
+ echo "$RES doesn't match $EXPECTED."
+ exit 1
+ fi
+ fi
+
+
+ local RES="$(mdadm -E --export $dev0 | grep MD_NAME)"
+ if [[ "$?" != "0" ]]; then
+ echo "Cannot get metadata from $dev0."
+ exit 1
+ fi
+
+ local EXPECTED="MD_NAME=$(hostname):$WANTED_NAME"
+ if [[ "$RES" != "$EXPECTED" ]]; then
+ echo "$RES doesn't match $EXPECTED."
+ exit 1
+ fi
+}
+
+function _create() {
+ local DEVNAME=$1
+ local NAME=$2
+
+ if [[ -z "$NAME" ]]; then
+ mdadm -CR "$DEVNAME" -l0 -n 1 $dev0 --force
+ else
+ mdadm -CR "$DEVNAME" --name="$NAME" -l0 -n 1 $dev0 --force
+ fi
+
+ if [[ "$?" != "0" ]]; then
+ echo "Cannot create device."
+ exit 1
+ fi
+}
+
+# The most trivial case.
+_create "/dev/md/name"
+_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+_create "name"
+_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+# Use 'mdX' as name.
+_create "/dev/md/md0"
+_verify "/dev/md127" "md0" "md0"
+mdadm -S "/dev/md127"
+
+_create "md0"
+_verify "/dev/md127" "md0" "md0"
+mdadm -S "/dev/md127"
+
+# <devnode> is used to create MD_DEVNAME but, name is used to create MD_NAME.
+_create "/dev/md/devnode" "name"
+_verify "/dev/md127" "devnode" "name"
+mdadm -S "/dev/md127"
+
+_create "devnode" "name"
+_verify "/dev/md127" "devnode" "name"
+mdadm -S "/dev/md127"
+
+# Devnode points to /dev/ directory. MD_DEVNAME doesn't exist.
+_create "/dev/md0"
+_verify "/dev/md0" "empty" "0"
+mdadm -S "/dev/md0"
+
+# Devnode points to /dev/ directory and name is set.
+_create "/dev/md0" "name"
+_verify "/dev/md0" "empty" "name"
+mdadm -S "/dev/md0"
--
2.38.1

File diff suppressed because it is too large Load Diff

@ -1,176 +0,0 @@
From e4a030a0d3a953b8e74c118200e58dc83c2fc608 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 19 Jul 2022 14:48:22 +0200
Subject: [PATCH 48/83] mdadm: remove symlink option
The option is not used. Remove it from code.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
ReadMe.c | 1 -
config.c | 7 +------
mdadm.8.in | 9 ---------
mdadm.c | 20 --------------------
mdadm.conf.5.in | 15 ---------------
mdadm.h | 2 --
6 files changed, 1 insertion(+), 53 deletions(-)
diff --git a/ReadMe.c b/ReadMe.c
index 7518a32a..7f94847e 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -147,7 +147,6 @@ struct option long_options[] = {
{"nofailfast",0, 0, NoFailFast},
{"re-add", 0, 0, ReAdd},
{"homehost", 1, 0, HomeHost},
- {"symlinks", 1, 0, Symlinks},
{"data-offset",1, 0, DataOffset},
{"nodes",1, 0, Nodes}, /* also for --assemble */
{"home-cluster",1, 0, ClusterName},
diff --git a/config.c b/config.c
index 9c725457..dc1620c1 100644
--- a/config.c
+++ b/config.c
@@ -194,7 +194,6 @@ struct mddev_dev *load_containers(void)
struct createinfo createinfo = {
.autof = 2, /* by default, create devices with standard names */
- .symlinks = 1,
.names = 0, /* By default, stick with numbered md devices. */
.bblist = 1, /* Use a bad block list by default */
#ifdef DEBIAN
@@ -310,11 +309,7 @@ static void createline(char *line)
if (!createinfo.supertype)
pr_err("metadata format %s unknown, ignoring\n",
w+9);
- } else if (strncasecmp(w, "symlinks=yes", 12) == 0)
- createinfo.symlinks = 1;
- else if (strncasecmp(w, "symlinks=no", 11) == 0)
- createinfo.symlinks = 0;
- else if (strncasecmp(w, "names=yes", 12) == 0)
+ } else if (strncasecmp(w, "names=yes", 12) == 0)
createinfo.names = 1;
else if (strncasecmp(w, "names=no", 11) == 0)
createinfo.names = 0;
diff --git a/mdadm.8.in b/mdadm.8.in
index 0be02e4a..f2736226 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1048,11 +1048,6 @@ simultaneously. If not specified, this defaults to 4.
Specify journal device for the RAID-4/5/6 array. The journal device
should be a SSD with reasonable lifetime.
-.TP
-.BR \-\-symlinks
-Auto creation of symlinks in /dev to /dev/md, option --symlinks must
-be 'no' or 'yes' and work with --create and --build.
-
.TP
.BR \-k ", " \-\-consistency\-policy=
Specify how the array maintains consistency in case of unexpected shutdown.
@@ -1405,10 +1400,6 @@ Reshape can be continued later using the
.B \-\-continue
option for the grow command.
-.TP
-.BR \-\-symlinks
-See this option under Create and Build options.
-
.SH For Manage mode:
.TP
diff --git a/mdadm.c b/mdadm.c
index 56722ed9..180f7a9c 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -59,7 +59,6 @@ int main(int argc, char *argv[])
struct mddev_dev *dv;
mdu_array_info_t array;
int devs_found = 0;
- char *symlinks = NULL;
int grow_continue = 0;
/* autof indicates whether and how to create device node.
* bottom 3 bits are style. Rest (when shifted) are number of parts
@@ -663,13 +662,6 @@ int main(int argc, char *argv[])
case O(ASSEMBLE,Auto): /* auto-creation of device node */
c.autof = parse_auto(optarg, "--auto flag", 0);
continue;
-
- case O(CREATE,Symlinks):
- case O(BUILD,Symlinks):
- case O(ASSEMBLE,Symlinks): /* auto creation of symlinks in /dev to /dev/md */
- symlinks = optarg;
- continue;
-
case O(BUILD,'f'): /* force honouring '-n 1' */
case O(BUILD,Force): /* force honouring '-n 1' */
case O(GROW,'f'): /* ditto */
@@ -1325,18 +1317,6 @@ int main(int argc, char *argv[])
exit(2);
}
- if (symlinks) {
- struct createinfo *ci = conf_get_create_info();
-
- if (strcasecmp(symlinks, "yes") == 0)
- ci->symlinks = 1;
- else if (strcasecmp(symlinks, "no") == 0)
- ci->symlinks = 0;
- else {
- pr_err("option --symlinks must be 'no' or 'yes'\n");
- exit(2);
- }
- }
/* Ok, got the option parsing out of the way
* hopefully it's mostly right but there might be some stuff
* missing
diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in
index cd4e6a9d..bc2295c2 100644
--- a/mdadm.conf.5.in
+++ b/mdadm.conf.5.in
@@ -338,21 +338,6 @@ missing device entries should be created.
The name of the metadata format to use if none is explicitly given.
This can be useful to impose a system-wide default of version-1 superblocks.
-.TP
-.B symlinks=no
-Normally when creating devices in
-.B /dev/md/
-.I mdadm
-will create a matching symlink from
-.B /dev/
-with a name starting
-.B md
-or
-.BR md_ .
-Give
-.B symlinks=no
-to suppress this symlink creation.
-
.TP
.B names=yes
Since Linux 2.6.29 it has been possible to create
diff --git a/mdadm.h b/mdadm.h
index add9c0b6..93e72786 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -394,7 +394,6 @@ struct createinfo {
int gid;
int autof;
int mode;
- int symlinks;
int names;
int bblist;
struct supertype *supertype;
@@ -442,7 +441,6 @@ enum special_options {
BackupFile,
HomeHost,
AutoHomeHost,
- Symlinks,
AutoDetect,
Waitclean,
DetailPlatform,
--
2.38.1

@ -0,0 +1,66 @@
From 21d6c5d96a5a467b5877ba1d38106b3746005bcc Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 26 Mar 2024 13:21:11 +0100
Subject: [PATCH 49/66] mdadm: Add MAINTAINERS.md
Describe rules maintainer should follow.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
MAINTAINERS.md | 44 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100644 MAINTAINERS.md
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
new file mode 100644
index 00000000..9c79ba87
--- /dev/null
+++ b/MAINTAINERS.md
@@ -0,0 +1,44 @@
+# Maintainer tools
+
+Useful tools used in daily routines:
+- [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html)
+- [kup](https://korg.docs.kernel.org/kup.html)
+- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer)
+- [b4](https://b4.docs.kernel.org/en/latest/)
+
+# Checklist before applying patch
+
+We don't have CI testing yet, so all those steps must be performed manually:
+- Style check with [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html):
+
+ This is the current code style follows. We are not strict to all rules. It must be run
+ by **checkpatch --no-tree**, see README.md.
+
+- [Commit style](https://www.kernel.org/doc/html/v4.10/process/submitting-patches.html):
+
+ It doesn't need to be followed as strictly as is in kernel but changes should be logically
+ separated. Submitter should care at least to mention "It is used in next patches" if unused
+ externs/files are added in patch. We love: *Reported-by:*, *Suggested-by:*, *Fixes:* tags.
+
+- Compilation, ideally on various gcc versions.
+- Mdadm test suite execution.
+- Consider requesting new tests from submitter, especially for new functionalities.
+- Ensure that maintainer *sign-off* is added, before pushing.
+
+# Making a release
+
+Assuming that maintainer is certain that release is safe, following steps must be done:
+
+- Update versions strings in release commit, please refer to previous releases for examples.
+
+- Create GPG signed tag and push it to repo. Use same format as was used previously, prefixed by
+ **mdadm-**, e.g. **mdadm-3.1.2**, **mdadm-4.1**.
+
+- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer):
+
+ Adopt script to our release tag model. When ready, push signed note to repository. If it is done
+ correctly, then *(sig)* is added to the package automatically generated by kernel.org automation.
+ There is no need to upload archive manually.
+
+- Update CHANGELOG.md.
+- Write "ANNOUNCE" mail to linux-raid@kernel.org to notify community.
--
2.41.0

@ -1,232 +0,0 @@
From ae5dfc56b7a96805d5a0b50eaf93b9fec8604298 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Tue, 19 Jul 2022 14:48:23 +0200
Subject: [PATCH 49/83] mdadm: move data_offset to struct shape
Data offset is a shape property so move it there to remove additional
parameter from some functions.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 16 ++++++++--------
Grow.c | 7 +++----
mdadm.c | 20 +++++++++-----------
mdadm.h | 5 ++---
4 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/Create.c b/Create.c
index c84c1ac8..e06ec2ae 100644
--- a/Create.c
+++ b/Create.c
@@ -95,7 +95,7 @@ int Create(struct supertype *st, char *mddev,
char *name, int *uuid,
int subdevs, struct mddev_dev *devlist,
struct shape *s,
- struct context *c, unsigned long long data_offset)
+ struct context *c)
{
/*
* Create a new raid array.
@@ -288,7 +288,7 @@ int Create(struct supertype *st, char *mddev,
newsize = s->size * 2;
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
- data_offset, NULL,
+ s->data_offset, NULL,
&newsize, s->consistency_policy,
c->verbose >= 0))
return 1;
@@ -323,10 +323,10 @@ int Create(struct supertype *st, char *mddev,
info.array.working_disks = 0;
dnum = 0;
for (dv = devlist; dv; dv = dv->next)
- if (data_offset == VARIABLE_OFFSET)
+ if (s->data_offset == VARIABLE_OFFSET)
dv->data_offset = INVALID_SECTORS;
else
- dv->data_offset = data_offset;
+ dv->data_offset = s->data_offset;
for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
char *dname = dv->devname;
@@ -342,7 +342,7 @@ int Create(struct supertype *st, char *mddev,
missing_disks ++;
continue;
}
- if (data_offset == VARIABLE_OFFSET) {
+ if (s->data_offset == VARIABLE_OFFSET) {
doff = strchr(dname, ':');
if (doff) {
*doff++ = 0;
@@ -350,7 +350,7 @@ int Create(struct supertype *st, char *mddev,
} else
dv->data_offset = INVALID_SECTORS;
} else
- dv->data_offset = data_offset;
+ dv->data_offset = s->data_offset;
dfd = open(dname, O_RDONLY);
if (dfd < 0) {
@@ -535,7 +535,7 @@ int Create(struct supertype *st, char *mddev,
if (!st->ss->validate_geometry(st, s->level, s->layout,
s->raiddisks,
&s->chunk, minsize*2,
- data_offset,
+ s->data_offset,
NULL, NULL,
s->consistency_policy, 0)) {
pr_err("devices too large for RAID level %d\n", s->level);
@@ -754,7 +754,7 @@ int Create(struct supertype *st, char *mddev,
}
}
if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
- data_offset))
+ s->data_offset))
goto abort_locked;
total_slots = info.array.nr_disks;
diff --git a/Grow.c b/Grow.c
index 5780635a..868bdc3a 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1775,7 +1775,6 @@ static int reshape_container(char *container, char *devname,
int Grow_reshape(char *devname, int fd,
struct mddev_dev *devlist,
- unsigned long long data_offset,
struct context *c, struct shape *s)
{
/* Make some changes in the shape of an array.
@@ -1821,7 +1820,7 @@ int Grow_reshape(char *devname, int fd,
return 1;
}
- if (data_offset != INVALID_SECTORS && array.level != 10 &&
+ if (s->data_offset != INVALID_SECTORS && array.level != 10 &&
(array.level < 4 || array.level > 6)) {
pr_err("--grow --data-offset not yet supported\n");
return 1;
@@ -2179,7 +2178,7 @@ size_change_error:
if ((s->level == UnSet || s->level == array.level) &&
(s->layout_str == NULL) &&
(s->chunk == 0 || s->chunk == array.chunk_size) &&
- data_offset == INVALID_SECTORS &&
+ s->data_offset == INVALID_SECTORS &&
(s->raiddisks == 0 || s->raiddisks == array.raid_disks)) {
/* Nothing more to do */
if (!changed && c->verbose >= 0)
@@ -2379,7 +2378,7 @@ size_change_error:
}
sync_metadata(st);
rv = reshape_array(container, fd, devname, st, &info, c->force,
- devlist, data_offset, c->backup_file,
+ devlist, s->data_offset, c->backup_file,
c->verbose, 0, 0, 0);
frozen = 0;
}
diff --git a/mdadm.c b/mdadm.c
index 180f7a9c..845e4466 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -49,7 +49,6 @@ int main(int argc, char *argv[])
int i;
unsigned long long array_size = 0;
- unsigned long long data_offset = INVALID_SECTORS;
struct mddev_ident ident;
char *configfile = NULL;
int devmode = 0;
@@ -79,6 +78,7 @@ int main(int argc, char *argv[])
.layout = UnSet,
.bitmap_chunk = UnSet,
.consistency_policy = CONSISTENCY_POLICY_UNKNOWN,
+ .data_offset = INVALID_SECTORS,
};
char sys_hostname[256];
@@ -479,15 +479,15 @@ int main(int argc, char *argv[])
case O(CREATE,DataOffset):
case O(GROW,DataOffset):
- if (data_offset != INVALID_SECTORS) {
+ if (s.data_offset != INVALID_SECTORS) {
pr_err("data-offset may only be specified one. Second value is %s.\n", optarg);
exit(2);
}
if (mode == CREATE && strcmp(optarg, "variable") == 0)
- data_offset = VARIABLE_OFFSET;
+ s.data_offset = VARIABLE_OFFSET;
else
- data_offset = parse_size(optarg);
- if (data_offset == INVALID_SECTORS) {
+ s.data_offset = parse_size(optarg);
+ if (s.data_offset == INVALID_SECTORS) {
pr_err("invalid data-offset: %s\n",
optarg);
exit(2);
@@ -1416,7 +1416,7 @@ int main(int argc, char *argv[])
exit(1);
}
- if (c.backup_file && data_offset != INVALID_SECTORS) {
+ if (c.backup_file && s.data_offset != INVALID_SECTORS) {
pr_err("--backup-file and --data-offset are incompatible\n");
exit(2);
}
@@ -1587,8 +1587,7 @@ int main(int argc, char *argv[])
rv = Create(ss, devlist->devname,
ident.name, ident.uuid_set ? ident.uuid : NULL,
- devs_found-1, devlist->next,
- &s, &c, data_offset);
+ devs_found - 1, devlist->next, &s, &c);
break;
case MISC:
if (devmode == 'E') {
@@ -1706,10 +1705,9 @@ int main(int argc, char *argv[])
c.verbose);
else if (s.size > 0 || s.raiddisks || s.layout_str ||
s.chunk != 0 || s.level != UnSet ||
- data_offset != INVALID_SECTORS) {
+ s.data_offset != INVALID_SECTORS) {
rv = Grow_reshape(devlist->devname, mdfd,
- devlist->next,
- data_offset, &c, &s);
+ devlist->next, &c, &s);
} else if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
} else if (array_size == 0)
diff --git a/mdadm.h b/mdadm.h
index 93e72786..adb7cdaa 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -595,6 +595,7 @@ struct shape {
int assume_clean;
int write_behind;
unsigned long long size;
+ unsigned long long data_offset;
int consistency_policy;
};
@@ -1431,7 +1432,6 @@ extern int Grow_addbitmap(char *devname, int fd,
struct context *c, struct shape *s);
extern int Grow_reshape(char *devname, int fd,
struct mddev_dev *devlist,
- unsigned long long data_offset,
struct context *c, struct shape *s);
extern int Grow_restart(struct supertype *st, struct mdinfo *info,
int *fdlist, int cnt, char *backup_file, int verbose);
@@ -1462,8 +1462,7 @@ extern int Create(struct supertype *st, char *mddev,
char *name, int *uuid,
int subdevs, struct mddev_dev *devlist,
struct shape *s,
- struct context *c,
- unsigned long long data_offset);
+ struct context *c);
extern int Detail(char *dev, struct context *c);
extern int Detail_Platform(struct superswitch *ss, int scan, int verbose, int export, char *controller_path);
--
2.38.1

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save