Blame SOURCES/0050-mdadm-Don-t-open-md-device-for-CREATE-and-ASSEMBLE.patch

25786b
From 27ad4900501c615b7c6b266bf23948e5606dba53 Mon Sep 17 00:00:00 2001
25786b
From: Logan Gunthorpe <logang@deltatee.com>
25786b
Date: Wed, 27 Jul 2022 15:52:46 -0600
91179e
Subject: [PATCH 50/83] mdadm: Don't open md device for CREATE and ASSEMBLE
25786b
25786b
The mdadm command tries to open the md device for most modes, first
25786b
thing, no matter what. When running to create or assemble an array,
25786b
in most cases, the md device will not exist, the open call will fail
25786b
and everything will proceed correctly.
25786b
25786b
However, when running tests, a create or assembly command may be run
25786b
shortly after stopping an array and the old md device file may still
25786b
be around. Then, if create_on_open is set in the kernel, a new md
25786b
device will be created when mdadm does its initial open.
25786b
25786b
When mdadm gets around to creating the new device with the new_array
25786b
parameter it issues this error:
25786b
25786b
   mdadm: Fail to create md0 when using
25786b
   /sys/module/md_mod/parameters/new_array, fallback to creation via node
25786b
25786b
This is because an mddev was already created by the kernel with the
25786b
earlier open() call and thus the new one being created will fail with
25786b
EEXIST. The mdadm command will still successfully be created due to
25786b
falling back to the node creation method. However, the error message
25786b
itself will fail any test that's running it.
25786b
25786b
This issue is a race condition that is very rare, but a recent change
25786b
in the kernel caused this to happen more frequently: about 1 in 50
25786b
times.
25786b
25786b
To fix this, don't bother trying to open the md device for CREATE,
25786b
ASSEMBLE and BUILD commands, as the file descriptor will never be used
25786b
anyway even if it is successfully openned. The mdfd has not been used
25786b
for these commands since:
25786b
25786b
   7f91af49ad09 ("Delay creation of array devices for assemble/build/create")
25786b
25786b
The checks that were done on the open device can be changed to being
25786b
done with stat.
25786b
25786b
Side note: it would be nice to disable create_on_open as well to help
25786b
solve this, but it seems the work for this was never finished. By default,
25786b
mdadm will create using the old node interface when a name is specified
25786b
unless the user specifically puts names=yes in a config file, which
25786b
doesn't seem to be common or desirable to require this..
25786b
25786b
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
25786b
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
25786b
---
25786b
 lib.c   | 12 ++++++++++++
25786b
 mdadm.c | 40 ++++++++++++++++++++--------------------
25786b
 mdadm.h |  1 +
25786b
 3 files changed, 33 insertions(+), 20 deletions(-)
25786b
25786b
diff --git a/lib.c b/lib.c
25786b
index 7e3e3d47..e395b28d 100644
25786b
--- a/lib.c
25786b
+++ b/lib.c
25786b
@@ -164,6 +164,18 @@ char *stat2devnm(struct stat *st)
25786b
 	return devid2devnm(st->st_rdev);
25786b
 }
25786b
 
25786b
+bool stat_is_md_dev(struct stat *st)
25786b
+{
25786b
+	if ((S_IFMT & st->st_mode) != S_IFBLK)
25786b
+		return false;
25786b
+	if (major(st->st_rdev) == MD_MAJOR)
25786b
+		return true;
25786b
+	if (major(st->st_rdev) == (unsigned)get_mdp_major())
25786b
+		return true;
25786b
+
25786b
+	return false;
25786b
+}
25786b
+
25786b
 char *fd2devnm(int fd)
25786b
 {
25786b
 	struct stat stb;
25786b
diff --git a/mdadm.c b/mdadm.c
25786b
index 845e4466..972adb52 100644
25786b
--- a/mdadm.c
25786b
+++ b/mdadm.c
25786b
@@ -1329,6 +1329,9 @@ int main(int argc, char *argv[])
25786b
 
25786b
 	if (mode == MANAGE || mode == BUILD || mode == CREATE ||
25786b
 	    mode == GROW || (mode == ASSEMBLE && ! c.scan)) {
25786b
+		struct stat stb;
25786b
+		int ret;
25786b
+
25786b
 		if (devs_found < 1) {
25786b
 			pr_err("an md device must be given in this mode\n");
25786b
 			exit(2);
25786b
@@ -1341,6 +1344,12 @@ int main(int argc, char *argv[])
25786b
 			mdfd = open_mddev(devlist->devname, 1);
25786b
 			if (mdfd < 0)
25786b
 				exit(1);
25786b
+
25786b
+			ret = fstat(mdfd, &stb;;
25786b
+			if (ret) {
25786b
+				pr_err("fstat failed on %s.\n", devlist->devname);
25786b
+				exit(1);
25786b
+			}
25786b
 		} else {
25786b
 			char *bname = basename(devlist->devname);
25786b
 
25786b
@@ -1348,30 +1357,21 @@ int main(int argc, char *argv[])
25786b
 				pr_err("Name %s is too long.\n", devlist->devname);
25786b
 				exit(1);
25786b
 			}
25786b
-			/* non-existent device is OK */
25786b
-			mdfd = open_mddev(devlist->devname, 0);
25786b
-		}
25786b
-		if (mdfd == -2) {
25786b
-			pr_err("device %s exists but is not an md array.\n", devlist->devname);
25786b
-			exit(1);
25786b
-		}
25786b
-		if ((int)ident.super_minor == -2) {
25786b
-			struct stat stb;
25786b
-			if (mdfd < 0) {
25786b
+
25786b
+			ret = stat(devlist->devname, &stb;;
25786b
+			if (ident.super_minor == -2 && ret != 0) {
25786b
 				pr_err("--super-minor=dev given, and listed device %s doesn't exist.\n",
25786b
-					devlist->devname);
25786b
+				       devlist->devname);
25786b
+				exit(1);
25786b
+			}
25786b
+
25786b
+			if (!ret && !stat_is_md_dev(&stb)) {
25786b
+				pr_err("device %s exists but is not an md array.\n", devlist->devname);
25786b
 				exit(1);
25786b
 			}
25786b
-			fstat(mdfd, &stb;;
25786b
-			ident.super_minor = minor(stb.st_rdev);
25786b
-		}
25786b
-		if (mdfd >= 0 && mode != MANAGE && mode != GROW) {
25786b
-			/* We don't really want this open yet, we just might
25786b
-			 * have wanted to check some things
25786b
-			 */
25786b
-			close(mdfd);
25786b
-			mdfd = -1;
25786b
 		}
25786b
+		if (ident.super_minor == -2)
25786b
+			ident.super_minor = minor(stb.st_rdev);
25786b
 	}
25786b
 
25786b
 	if (s.raiddisks) {
25786b
diff --git a/mdadm.h b/mdadm.h
25786b
index adb7cdaa..8208b81e 100644
25786b
--- a/mdadm.h
25786b
+++ b/mdadm.h
25786b
@@ -1672,6 +1672,7 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
25786b
 extern char *stat2kname(struct stat *st);
25786b
 extern char *fd2kname(int fd);
25786b
 extern char *stat2devnm(struct stat *st);
25786b
+bool stat_is_md_dev(struct stat *st);
25786b
 extern char *fd2devnm(int fd);
25786b
 extern void udev_block(char *devnm);
25786b
 extern void udev_unblock(void);
25786b
-- 
91179e
2.38.1
25786b