Blame SOURCES/0055-Manage-Block-unsafe-member-failing.patch

37f2b0
From fc6fd4063769f4194c3fb8f77b32b2819e140fb9 Mon Sep 17 00:00:00 2001
37f2b0
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
37f2b0
Date: Thu, 18 Aug 2022 11:47:21 +0200
37f2b0
Subject: [PATCH 55/83] Manage: Block unsafe member failing
37f2b0
37f2b0
Kernel may or may not block mdadm from removing member device if it
37f2b0
will cause arrays failed state. It depends on raid personality
37f2b0
implementation in kernel.
37f2b0
Add verification on requested removal path (#mdadm --set-faulty
37f2b0
command).
37f2b0
37f2b0
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
37f2b0
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
37f2b0
---
37f2b0
 Manage.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
37f2b0
 1 file changed, 52 insertions(+), 1 deletion(-)
37f2b0
37f2b0
diff --git a/Manage.c b/Manage.c
37f2b0
index a142f8bd..b1d0e630 100644
37f2b0
--- a/Manage.c
37f2b0
+++ b/Manage.c
37f2b0
@@ -1285,6 +1285,50 @@ int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
37f2b0
 	return -1;
37f2b0
 }
37f2b0
 
37f2b0
+/**
37f2b0
+ * is_remove_safe() - Check if remove is safe.
37f2b0
+ * @array: Array info.
37f2b0
+ * @fd: Array file descriptor.
37f2b0
+ * @devname: Name of device to remove.
37f2b0
+ * @verbose: Verbose.
37f2b0
+ *
37f2b0
+ * The function determines if array will be operational
37f2b0
+ * after removing &devname.
37f2b0
+ *
37f2b0
+ * Return: True if array will be operational, false otherwise.
37f2b0
+ */
37f2b0
+bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
37f2b0
+{
37f2b0
+	dev_t devid = devnm2devid(devname + 5);
37f2b0
+	struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
37f2b0
+
37f2b0
+	if (!mdi) {
37f2b0
+		if (verbose)
37f2b0
+			pr_err("Failed to read sysfs attributes for %s\n", devname);
37f2b0
+		return false;
37f2b0
+	}
37f2b0
+
37f2b0
+	char *avail = xcalloc(array->raid_disks, sizeof(char));
37f2b0
+
37f2b0
+	for (mdi = mdi->devs; mdi; mdi = mdi->next) {
37f2b0
+		if (mdi->disk.raid_disk < 0)
37f2b0
+			continue;
37f2b0
+		if (!(mdi->disk.state & (1 << MD_DISK_SYNC)))
37f2b0
+			continue;
37f2b0
+		if (makedev(mdi->disk.major, mdi->disk.minor) == devid)
37f2b0
+			continue;
37f2b0
+		avail[mdi->disk.raid_disk] = 1;
37f2b0
+	}
37f2b0
+	sysfs_free(mdi);
37f2b0
+
37f2b0
+	bool is_enough = enough(array->level, array->raid_disks,
37f2b0
+				array->layout, (array->state & 1),
37f2b0
+				avail);
37f2b0
+
37f2b0
+	free(avail);
37f2b0
+	return is_enough;
37f2b0
+}
37f2b0
+
37f2b0
 int Manage_subdevs(char *devname, int fd,
37f2b0
 		   struct mddev_dev *devlist, int verbose, int test,
37f2b0
 		   char *update, int force)
37f2b0
@@ -1598,7 +1642,14 @@ int Manage_subdevs(char *devname, int fd,
37f2b0
 			break;
37f2b0
 
37f2b0
 		case 'f': /* set faulty */
37f2b0
-			/* FIXME check current member */
37f2b0
+			if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
37f2b0
+				pr_err("Cannot remove %s from %s, array will be failed.\n",
37f2b0
+				       dv->devname, devname);
37f2b0
+				if (sysfd >= 0)
37f2b0
+					close(sysfd);
37f2b0
+				goto abort;
37f2b0
+			}
37f2b0
+
37f2b0
 			if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
37f2b0
 			    (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
37f2b0
 						rdev))) {
37f2b0
-- 
37f2b0
2.38.1
37f2b0