Blame SOURCES/0055-Manage-Block-unsafe-member-failing.patch

c3a0e9
From fc6fd4063769f4194c3fb8f77b32b2819e140fb9 Mon Sep 17 00:00:00 2001
c3a0e9
From: Mateusz Kusiak <mateusz.kusiak@intel.com>
c3a0e9
Date: Thu, 18 Aug 2022 11:47:21 +0200
91179e
Subject: [PATCH 55/83] Manage: Block unsafe member failing
c3a0e9
c3a0e9
Kernel may or may not block mdadm from removing member device if it
c3a0e9
will cause arrays failed state. It depends on raid personality
c3a0e9
implementation in kernel.
c3a0e9
Add verification on requested removal path (#mdadm --set-faulty
c3a0e9
command).
c3a0e9
c3a0e9
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
c3a0e9
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
c3a0e9
---
c3a0e9
 Manage.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
c3a0e9
 1 file changed, 52 insertions(+), 1 deletion(-)
c3a0e9
c3a0e9
diff --git a/Manage.c b/Manage.c
c3a0e9
index a142f8bd..b1d0e630 100644
c3a0e9
--- a/Manage.c
c3a0e9
+++ b/Manage.c
c3a0e9
@@ -1285,6 +1285,50 @@ int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
c3a0e9
 	return -1;
c3a0e9
 }
c3a0e9
 
c3a0e9
+/**
c3a0e9
+ * is_remove_safe() - Check if remove is safe.
c3a0e9
+ * @array: Array info.
c3a0e9
+ * @fd: Array file descriptor.
c3a0e9
+ * @devname: Name of device to remove.
c3a0e9
+ * @verbose: Verbose.
c3a0e9
+ *
c3a0e9
+ * The function determines if array will be operational
c3a0e9
+ * after removing &devname.
c3a0e9
+ *
c3a0e9
+ * Return: True if array will be operational, false otherwise.
c3a0e9
+ */
c3a0e9
+bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
c3a0e9
+{
c3a0e9
+	dev_t devid = devnm2devid(devname + 5);
c3a0e9
+	struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
c3a0e9
+
c3a0e9
+	if (!mdi) {
c3a0e9
+		if (verbose)
c3a0e9
+			pr_err("Failed to read sysfs attributes for %s\n", devname);
c3a0e9
+		return false;
c3a0e9
+	}
c3a0e9
+
c3a0e9
+	char *avail = xcalloc(array->raid_disks, sizeof(char));
c3a0e9
+
c3a0e9
+	for (mdi = mdi->devs; mdi; mdi = mdi->next) {
c3a0e9
+		if (mdi->disk.raid_disk < 0)
c3a0e9
+			continue;
c3a0e9
+		if (!(mdi->disk.state & (1 << MD_DISK_SYNC)))
c3a0e9
+			continue;
c3a0e9
+		if (makedev(mdi->disk.major, mdi->disk.minor) == devid)
c3a0e9
+			continue;
c3a0e9
+		avail[mdi->disk.raid_disk] = 1;
c3a0e9
+	}
c3a0e9
+	sysfs_free(mdi);
c3a0e9
+
c3a0e9
+	bool is_enough = enough(array->level, array->raid_disks,
c3a0e9
+				array->layout, (array->state & 1),
c3a0e9
+				avail);
c3a0e9
+
c3a0e9
+	free(avail);
c3a0e9
+	return is_enough;
c3a0e9
+}
c3a0e9
+
c3a0e9
 int Manage_subdevs(char *devname, int fd,
c3a0e9
 		   struct mddev_dev *devlist, int verbose, int test,
c3a0e9
 		   char *update, int force)
c3a0e9
@@ -1598,7 +1642,14 @@ int Manage_subdevs(char *devname, int fd,
c3a0e9
 			break;
c3a0e9
 
c3a0e9
 		case 'f': /* set faulty */
c3a0e9
-			/* FIXME check current member */
c3a0e9
+			if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
c3a0e9
+				pr_err("Cannot remove %s from %s, array will be failed.\n",
c3a0e9
+				       dv->devname, devname);
c3a0e9
+				if (sysfd >= 0)
c3a0e9
+					close(sysfd);
c3a0e9
+				goto abort;
c3a0e9
+			}
c3a0e9
+
c3a0e9
 			if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
c3a0e9
 			    (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
c3a0e9
 						rdev))) {
c3a0e9
-- 
c3a0e9
2.38.1
c3a0e9