Blame SOURCES/0025-mdmon-fix-wrong-array-state-when-disk-fails-during-m.patch

6e099e
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
6e099e
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
6e099e
Date: Tue, 7 May 2019 16:08:47 +0200
6e099e
Subject: [RHEL7.8 PATCH V2 25/47] mdmon: fix wrong array state when disk fails
6e099e
 during mdmon startup
6e099e
6e099e
If a member drive disappears and is set faulty by the kernel during
6e099e
mdmon startup, after ss->load_container() but before manage_new(), mdmon
6e099e
will try to readd the faulty drive to the array and start rebuilding.
6e099e
Metadata on the active drive is updated, but the faulty drive is not
6e099e
removed from the array and is left in a "blocked" state and any write
6e099e
request to the array will block. If the faulty drive reappears in the
6e099e
system e.g. after a reboot, the array will not assemble because metadata
6e099e
on the drives will be incompatible (at least on imsm).
6e099e
6e099e
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
6e099e
is an extension for the "GET_DEVS" option and causes all member devices
6e099e
to be returned, even if the associated block device has been removed.
6e099e
Use this option in manage_new() to include the faulty device on the
6e099e
active_array's devices list. Mdmon will then properly remove the faulty
6e099e
device from the array and update the metadata to reflect the degraded
6e099e
state.
6e099e
6e099e
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
6e099e
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
6e099e
---
6e099e
 managemon.c   |  2 +-
6e099e
 mdadm.h       |  1 +
6e099e
 super-intel.c |  2 +-
6e099e
 sysfs.c       | 23 ++++++++++++++---------
6e099e
 4 files changed, 17 insertions(+), 11 deletions(-)
6e099e
6e099e
diff --git a/managemon.c b/managemon.c
6e099e
index 29b91ba..200cf83 100644
6e099e
--- a/managemon.c
6e099e
+++ b/managemon.c
6e099e
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
6e099e
 	mdi = sysfs_read(-1, mdstat->devnm,
6e099e
 			 GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
6e099e
 			 GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
6e099e
-			 GET_LAYOUT);
6e099e
+			 GET_LAYOUT|GET_DEVS_ALL);
6e099e
 
6e099e
 	if (!mdi)
6e099e
 		return;
6e099e
diff --git a/mdadm.h b/mdadm.h
6e099e
index 705bd9b..427cc52 100644
6e099e
--- a/mdadm.h
6e099e
+++ b/mdadm.h
6e099e
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
6e099e
 	GET_ERROR	= (1 << 24),
6e099e
 	GET_ARRAY_STATE = (1 << 25),
6e099e
 	GET_CONSISTENCY_POLICY	= (1 << 26),
6e099e
+	GET_DEVS_ALL	= (1 << 27),
6e099e
 };
6e099e
 
6e099e
 /* If fd >= 0, get the array it is open on,
6e099e
diff --git a/super-intel.c b/super-intel.c
6e099e
index 2ba045a..4fd5e84 100644
6e099e
--- a/super-intel.c
6e099e
+++ b/super-intel.c
6e099e
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
6e099e
 	disk = get_imsm_disk(super, ord_to_idx(ord));
6e099e
 
6e099e
 	/* check for new failures */
6e099e
-	if (state & DS_FAULTY) {
6e099e
+	if (disk && (state & DS_FAULTY)) {
6e099e
 		if (mark_failure(super, dev, disk, ord_to_idx(ord)))
6e099e
 			super->updates_pending++;
6e099e
 	}
6e099e
diff --git a/sysfs.c b/sysfs.c
6e099e
index df6fdda..2dd9ab6 100644
6e099e
--- a/sysfs.c
6e099e
+++ b/sysfs.c
6e099e
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
6e099e
 			/* assume this is a stale reference to a hot
6e099e
 			 * removed device
6e099e
 			 */
6e099e
-			free(dev);
6e099e
-			continue;
6e099e
+			if (!(options & GET_DEVS_ALL)) {
6e099e
+				free(dev);
6e099e
+				continue;
6e099e
+			}
6e099e
+		} else {
6e099e
+			sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
6e099e
 		}
6e099e
-		sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
6e099e
 
6e099e
-		/* special case check for block devices that can go 'offline' */
6e099e
-		strcpy(dbase, "block/device/state");
6e099e
-		if (load_sys(fname, buf, sizeof(buf)) == 0 &&
6e099e
-		    strncmp(buf, "offline", 7) == 0) {
6e099e
-			free(dev);
6e099e
-			continue;
6e099e
+		if (!(options & GET_DEVS_ALL)) {
6e099e
+			/* special case check for block devices that can go 'offline' */
6e099e
+			strcpy(dbase, "block/device/state");
6e099e
+			if (load_sys(fname, buf, sizeof(buf)) == 0 &&
6e099e
+			    strncmp(buf, "offline", 7) == 0) {
6e099e
+				free(dev);
6e099e
+				continue;
6e099e
+			}
6e099e
 		}
6e099e
 
6e099e
 		/* finally add this disk to the array */
6e099e
-- 
6e099e
2.7.5
6e099e