Blame SOURCES/0050-mdadm-Don-t-open-md-device-for-CREATE-and-ASSEMBLE.patch

b33395
From 27ad4900501c615b7c6b266bf23948e5606dba53 Mon Sep 17 00:00:00 2001
b33395
From: Logan Gunthorpe <logang@deltatee.com>
b33395
Date: Wed, 27 Jul 2022 15:52:46 -0600
b33395
Subject: [PATCH 50/52] mdadm: Don't open md device for CREATE and ASSEMBLE
b33395
b33395
The mdadm command tries to open the md device for most modes, first
b33395
thing, no matter what. When running to create or assemble an array,
b33395
in most cases, the md device will not exist, the open call will fail
b33395
and everything will proceed correctly.
b33395
b33395
However, when running tests, a create or assembly command may be run
b33395
shortly after stopping an array and the old md device file may still
b33395
be around. Then, if create_on_open is set in the kernel, a new md
b33395
device will be created when mdadm does its initial open.
b33395
b33395
When mdadm gets around to creating the new device with the new_array
b33395
parameter it issues this error:
b33395
b33395
   mdadm: Fail to create md0 when using
b33395
   /sys/module/md_mod/parameters/new_array, fallback to creation via node
b33395
b33395
This is because an mddev was already created by the kernel with the
b33395
earlier open() call and thus the new one being created will fail with
b33395
EEXIST. The mdadm command will still successfully be created due to
b33395
falling back to the node creation method. However, the error message
b33395
itself will fail any test that's running it.
b33395
b33395
This issue is a race condition that is very rare, but a recent change
b33395
in the kernel caused this to happen more frequently: about 1 in 50
b33395
times.
b33395
b33395
To fix this, don't bother trying to open the md device for CREATE,
b33395
ASSEMBLE and BUILD commands, as the file descriptor will never be used
b33395
anyway even if it is successfully openned. The mdfd has not been used
b33395
for these commands since:
b33395
b33395
   7f91af49ad09 ("Delay creation of array devices for assemble/build/create")
b33395
b33395
The checks that were done on the open device can be changed to being
b33395
done with stat.
b33395
b33395
Side note: it would be nice to disable create_on_open as well to help
b33395
solve this, but it seems the work for this was never finished. By default,
b33395
mdadm will create using the old node interface when a name is specified
b33395
unless the user specifically puts names=yes in a config file, which
b33395
doesn't seem to be common or desirable to require this..
b33395
b33395
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
b33395
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
b33395
---
b33395
 lib.c   | 12 ++++++++++++
b33395
 mdadm.c | 40 ++++++++++++++++++++--------------------
b33395
 mdadm.h |  1 +
b33395
 3 files changed, 33 insertions(+), 20 deletions(-)
b33395
b33395
diff --git a/lib.c b/lib.c
b33395
index 7e3e3d47..e395b28d 100644
b33395
--- a/lib.c
b33395
+++ b/lib.c
b33395
@@ -164,6 +164,18 @@ char *stat2devnm(struct stat *st)
b33395
 	return devid2devnm(st->st_rdev);
b33395
 }
b33395
 
b33395
+bool stat_is_md_dev(struct stat *st)
b33395
+{
b33395
+	if ((S_IFMT & st->st_mode) != S_IFBLK)
b33395
+		return false;
b33395
+	if (major(st->st_rdev) == MD_MAJOR)
b33395
+		return true;
b33395
+	if (major(st->st_rdev) == (unsigned)get_mdp_major())
b33395
+		return true;
b33395
+
b33395
+	return false;
b33395
+}
b33395
+
b33395
 char *fd2devnm(int fd)
b33395
 {
b33395
 	struct stat stb;
b33395
diff --git a/mdadm.c b/mdadm.c
b33395
index 845e4466..972adb52 100644
b33395
--- a/mdadm.c
b33395
+++ b/mdadm.c
b33395
@@ -1329,6 +1329,9 @@ int main(int argc, char *argv[])
b33395
 
b33395
 	if (mode == MANAGE || mode == BUILD || mode == CREATE ||
b33395
 	    mode == GROW || (mode == ASSEMBLE && ! c.scan)) {
b33395
+		struct stat stb;
b33395
+		int ret;
b33395
+
b33395
 		if (devs_found < 1) {
b33395
 			pr_err("an md device must be given in this mode\n");
b33395
 			exit(2);
b33395
@@ -1341,6 +1344,12 @@ int main(int argc, char *argv[])
b33395
 			mdfd = open_mddev(devlist->devname, 1);
b33395
 			if (mdfd < 0)
b33395
 				exit(1);
b33395
+
b33395
+			ret = fstat(mdfd, &stb;;
b33395
+			if (ret) {
b33395
+				pr_err("fstat failed on %s.\n", devlist->devname);
b33395
+				exit(1);
b33395
+			}
b33395
 		} else {
b33395
 			char *bname = basename(devlist->devname);
b33395
 
b33395
@@ -1348,30 +1357,21 @@ int main(int argc, char *argv[])
b33395
 				pr_err("Name %s is too long.\n", devlist->devname);
b33395
 				exit(1);
b33395
 			}
b33395
-			/* non-existent device is OK */
b33395
-			mdfd = open_mddev(devlist->devname, 0);
b33395
-		}
b33395
-		if (mdfd == -2) {
b33395
-			pr_err("device %s exists but is not an md array.\n", devlist->devname);
b33395
-			exit(1);
b33395
-		}
b33395
-		if ((int)ident.super_minor == -2) {
b33395
-			struct stat stb;
b33395
-			if (mdfd < 0) {
b33395
+
b33395
+			ret = stat(devlist->devname, &stb;;
b33395
+			if (ident.super_minor == -2 && ret != 0) {
b33395
 				pr_err("--super-minor=dev given, and listed device %s doesn't exist.\n",
b33395
-					devlist->devname);
b33395
+				       devlist->devname);
b33395
+				exit(1);
b33395
+			}
b33395
+
b33395
+			if (!ret && !stat_is_md_dev(&stb)) {
b33395
+				pr_err("device %s exists but is not an md array.\n", devlist->devname);
b33395
 				exit(1);
b33395
 			}
b33395
-			fstat(mdfd, &stb;;
b33395
-			ident.super_minor = minor(stb.st_rdev);
b33395
-		}
b33395
-		if (mdfd >= 0 && mode != MANAGE && mode != GROW) {
b33395
-			/* We don't really want this open yet, we just might
b33395
-			 * have wanted to check some things
b33395
-			 */
b33395
-			close(mdfd);
b33395
-			mdfd = -1;
b33395
 		}
b33395
+		if (ident.super_minor == -2)
b33395
+			ident.super_minor = minor(stb.st_rdev);
b33395
 	}
b33395
 
b33395
 	if (s.raiddisks) {
b33395
diff --git a/mdadm.h b/mdadm.h
b33395
index adb7cdaa..8208b81e 100644
b33395
--- a/mdadm.h
b33395
+++ b/mdadm.h
b33395
@@ -1672,6 +1672,7 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
b33395
 extern char *stat2kname(struct stat *st);
b33395
 extern char *fd2kname(int fd);
b33395
 extern char *stat2devnm(struct stat *st);
b33395
+bool stat_is_md_dev(struct stat *st);
b33395
 extern char *fd2devnm(int fd);
b33395
 extern void udev_block(char *devnm);
b33395
 extern void udev_unblock(void);
b33395
-- 
b33395
2.31.1
b33395