Blame SOURCES/0006-imsm-update-metadata-correctly-while-raid10-double-d.patch

5d5466
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
5d5466
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
5d5466
Date: Wed, 17 Oct 2018 12:11:41 +0200
5d5466
Subject: [RHEL7.7 PATCH 06/24] imsm: update metadata correctly while raid10
5d5466
 double degradation
5d5466
5d5466
Mdmon calls end_migration() when map state changes from normal to
5d5466
degraded. It is not valid because in raid 10 double degradation case
5d5466
mdmon breaks checkpointing but array is still rebuilding.
5d5466
In this case mdmon has to mark map as degraded and continues marking
5d5466
recovery checkpoint in metadata. Migration can be finished only if newly
5d5466
failed device is a rebuilding device.
5d5466
5d5466
Add catching double degraded to degraded transition. Migration is
5d5466
finished but map state doesn't change, array is still degraded.
5d5466
5d5466
Update failed_disk_num correctly. If double degradation
5d5466
happens rebuild will start on the lowest slot, but this variable points
5d5466
to the first failed slot. If second fail happens while rebuild this
5d5466
variable shouldn't be updated until rebuild is not finished.
5d5466
5d5466
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
5d5466
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
5d5466
---
5d5466
 super-intel.c | 25 +++++++++++++++++++------
5d5466
 1 file changed, 19 insertions(+), 6 deletions(-)
5d5466
5d5466
diff --git a/super-intel.c b/super-intel.c
5d5466
index 6438987..d2035cc 100644
5d5466
--- a/super-intel.c
5d5466
+++ b/super-intel.c
5d5466
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
5d5466
 			set_imsm_ord_tbl_ent(map2, slot2,
5d5466
 					     idx | IMSM_ORD_REBUILD);
5d5466
 	}
5d5466
-	if (map->failed_disk_num == 0xff)
5d5466
+	if (map->failed_disk_num == 0xff ||
5d5466
+		(!is_rebuilding(dev) && map->failed_disk_num > slot))
5d5466
 		map->failed_disk_num = slot;
5d5466
 
5d5466
 	clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
5d5466
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
5d5466
 			break;
5d5466
 		}
5d5466
 		if (is_rebuilding(dev)) {
5d5466
-			dprintf_cont("while rebuilding.");
5d5466
+			dprintf_cont("while rebuilding ");
5d5466
 			if (map->map_state != map_state)  {
5d5466
-				dprintf_cont(" Map state change");
5d5466
-				end_migration(dev, super, map_state);
5d5466
+				dprintf_cont("map state change ");
5d5466
+				if (n == map->failed_disk_num) {
5d5466
+					dprintf_cont("end migration");
5d5466
+					end_migration(dev, super, map_state);
5d5466
+				} else {
5d5466
+					dprintf_cont("raid10 double degradation, map state change");
5d5466
+					map->map_state = map_state;
5d5466
+				}
5d5466
 				super->updates_pending++;
5d5466
-			} else if (!rebuild_done) {
5d5466
+			} else if (!rebuild_done)
5d5466
 				break;
5d5466
+			else if (n == map->failed_disk_num) {
5d5466
+				/* r10 double degraded to degraded transition */
5d5466
+				dprintf_cont("raid10 double degradation end migration");
5d5466
+				end_migration(dev, super, map_state);
5d5466
+				a->last_checkpoint = 0;
5d5466
+				super->updates_pending++;
5d5466
 			}
5d5466
 
5d5466
 			/* check if recovery is really finished */
5d5466
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
5d5466
 				}
5d5466
 			if (recovery_not_finished) {
5d5466
 				dprintf_cont("\n");
5d5466
-				dprintf("Rebuild has not finished yet, state not changed");
5d5466
+				dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
5d5466
 				if (a->last_checkpoint < mdi->recovery_start) {
5d5466
 					a->last_checkpoint =
5d5466
 						mdi->recovery_start;
5d5466
-- 
5d5466
2.7.5
5d5466