Blame SOURCES/0025-mdmon-fix-wrong-array-state-when-disk-fails-during-m.patch

f9a9f5
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
f9a9f5
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
f9a9f5
Date: Tue, 7 May 2019 16:08:47 +0200
f9a9f5
Subject: [RHEL7.8 PATCH V2 25/47] mdmon: fix wrong array state when disk fails
f9a9f5
 during mdmon startup
f9a9f5
f9a9f5
If a member drive disappears and is set faulty by the kernel during
f9a9f5
mdmon startup, after ss->load_container() but before manage_new(), mdmon
f9a9f5
will try to readd the faulty drive to the array and start rebuilding.
f9a9f5
Metadata on the active drive is updated, but the faulty drive is not
f9a9f5
removed from the array and is left in a "blocked" state and any write
f9a9f5
request to the array will block. If the faulty drive reappears in the
f9a9f5
system e.g. after a reboot, the array will not assemble because metadata
f9a9f5
on the drives will be incompatible (at least on imsm).
f9a9f5
f9a9f5
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
f9a9f5
is an extension for the "GET_DEVS" option and causes all member devices
f9a9f5
to be returned, even if the associated block device has been removed.
f9a9f5
Use this option in manage_new() to include the faulty device on the
f9a9f5
active_array's devices list. Mdmon will then properly remove the faulty
f9a9f5
device from the array and update the metadata to reflect the degraded
f9a9f5
state.
f9a9f5
f9a9f5
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
f9a9f5
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
f9a9f5
---
f9a9f5
 managemon.c   |  2 +-
f9a9f5
 mdadm.h       |  1 +
f9a9f5
 super-intel.c |  2 +-
f9a9f5
 sysfs.c       | 23 ++++++++++++++---------
f9a9f5
 4 files changed, 17 insertions(+), 11 deletions(-)
f9a9f5
f9a9f5
diff --git a/managemon.c b/managemon.c
f9a9f5
index 29b91ba..200cf83 100644
f9a9f5
--- a/managemon.c
f9a9f5
+++ b/managemon.c
f9a9f5
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
f9a9f5
 	mdi = sysfs_read(-1, mdstat->devnm,
f9a9f5
 			 GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
f9a9f5
 			 GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
f9a9f5
-			 GET_LAYOUT);
f9a9f5
+			 GET_LAYOUT|GET_DEVS_ALL);
f9a9f5
 
f9a9f5
 	if (!mdi)
f9a9f5
 		return;
f9a9f5
diff --git a/mdadm.h b/mdadm.h
f9a9f5
index 705bd9b..427cc52 100644
f9a9f5
--- a/mdadm.h
f9a9f5
+++ b/mdadm.h
f9a9f5
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
f9a9f5
 	GET_ERROR	= (1 << 24),
f9a9f5
 	GET_ARRAY_STATE = (1 << 25),
f9a9f5
 	GET_CONSISTENCY_POLICY	= (1 << 26),
f9a9f5
+	GET_DEVS_ALL	= (1 << 27),
f9a9f5
 };
f9a9f5
 
f9a9f5
 /* If fd >= 0, get the array it is open on,
f9a9f5
diff --git a/super-intel.c b/super-intel.c
f9a9f5
index 2ba045a..4fd5e84 100644
f9a9f5
--- a/super-intel.c
f9a9f5
+++ b/super-intel.c
f9a9f5
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
f9a9f5
 	disk = get_imsm_disk(super, ord_to_idx(ord));
f9a9f5
 
f9a9f5
 	/* check for new failures */
f9a9f5
-	if (state & DS_FAULTY) {
f9a9f5
+	if (disk && (state & DS_FAULTY)) {
f9a9f5
 		if (mark_failure(super, dev, disk, ord_to_idx(ord)))
f9a9f5
 			super->updates_pending++;
f9a9f5
 	}
f9a9f5
diff --git a/sysfs.c b/sysfs.c
f9a9f5
index df6fdda..2dd9ab6 100644
f9a9f5
--- a/sysfs.c
f9a9f5
+++ b/sysfs.c
f9a9f5
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
f9a9f5
 			/* assume this is a stale reference to a hot
f9a9f5
 			 * removed device
f9a9f5
 			 */
f9a9f5
-			free(dev);
f9a9f5
-			continue;
f9a9f5
+			if (!(options & GET_DEVS_ALL)) {
f9a9f5
+				free(dev);
f9a9f5
+				continue;
f9a9f5
+			}
f9a9f5
+		} else {
f9a9f5
+			sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
f9a9f5
 		}
f9a9f5
-		sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
f9a9f5
 
f9a9f5
-		/* special case check for block devices that can go 'offline' */
f9a9f5
-		strcpy(dbase, "block/device/state");
f9a9f5
-		if (load_sys(fname, buf, sizeof(buf)) == 0 &&
f9a9f5
-		    strncmp(buf, "offline", 7) == 0) {
f9a9f5
-			free(dev);
f9a9f5
-			continue;
f9a9f5
+		if (!(options & GET_DEVS_ALL)) {
f9a9f5
+			/* special case check for block devices that can go 'offline' */
f9a9f5
+			strcpy(dbase, "block/device/state");
f9a9f5
+			if (load_sys(fname, buf, sizeof(buf)) == 0 &&
f9a9f5
+			    strncmp(buf, "offline", 7) == 0) {
f9a9f5
+				free(dev);
f9a9f5
+				continue;
f9a9f5
+			}
f9a9f5
 		}
f9a9f5
 
f9a9f5
 		/* finally add this disk to the array */
f9a9f5
-- 
f9a9f5
2.7.5
f9a9f5