Blame SOURCES/imsm-update-metadata-correctly-while-raid10-double-degradation.patch

1f6b6a
commit d7a1fda2769ba272d89de6caeab35d52b73a9c3c
1f6b6a
Author: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
1f6b6a
Date:   Wed Oct 17 12:11:41 2018 +0200
1f6b6a
1f6b6a
    imsm: update metadata correctly while raid10 double degradation
1f6b6a
    
1f6b6a
    Mdmon calls end_migration() when map state changes from normal to
1f6b6a
    degraded. It is not valid because in raid 10 double degradation case
1f6b6a
    mdmon breaks checkpointing but array is still rebuilding.
1f6b6a
    In this case mdmon has to mark map as degraded and continues marking
1f6b6a
    recovery checkpoint in metadata. Migration can be finished only if newly
1f6b6a
    failed device is a rebuilding device.
1f6b6a
    
1f6b6a
    Add catching double degraded to degraded transition. Migration is
1f6b6a
    finished but map state doesn't change, array is still degraded.
1f6b6a
    
1f6b6a
    Update failed_disk_num correctly. If double degradation
1f6b6a
    happens rebuild will start on the lowest slot, but this variable points
1f6b6a
    to the first failed slot. If second fail happens while rebuild this
1f6b6a
    variable shouldn't be updated until rebuild is not finished.
1f6b6a
    
1f6b6a
    Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
1f6b6a
    Signed-off-by: Jes Sorensen <jsorensen@fb.com>
1f6b6a
1f6b6a
diff --git a/super-intel.c b/super-intel.c
1f6b6a
index 6438987..d2035cc 100644
1f6b6a
--- a/super-intel.c
1f6b6a
+++ b/super-intel.c
1f6b6a
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
1f6b6a
 			set_imsm_ord_tbl_ent(map2, slot2,
1f6b6a
 					     idx | IMSM_ORD_REBUILD);
1f6b6a
 	}
1f6b6a
-	if (map->failed_disk_num == 0xff)
1f6b6a
+	if (map->failed_disk_num == 0xff ||
1f6b6a
+		(!is_rebuilding(dev) && map->failed_disk_num > slot))
1f6b6a
 		map->failed_disk_num = slot;
1f6b6a
 
1f6b6a
 	clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
1f6b6a
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
1f6b6a
 			break;
1f6b6a
 		}
1f6b6a
 		if (is_rebuilding(dev)) {
1f6b6a
-			dprintf_cont("while rebuilding.");
1f6b6a
+			dprintf_cont("while rebuilding ");
1f6b6a
 			if (map->map_state != map_state)  {
1f6b6a
-				dprintf_cont(" Map state change");
1f6b6a
-				end_migration(dev, super, map_state);
1f6b6a
+				dprintf_cont("map state change ");
1f6b6a
+				if (n == map->failed_disk_num) {
1f6b6a
+					dprintf_cont("end migration");
1f6b6a
+					end_migration(dev, super, map_state);
1f6b6a
+				} else {
1f6b6a
+					dprintf_cont("raid10 double degradation, map state change");
1f6b6a
+					map->map_state = map_state;
1f6b6a
+				}
1f6b6a
 				super->updates_pending++;
1f6b6a
-			} else if (!rebuild_done) {
1f6b6a
+			} else if (!rebuild_done)
1f6b6a
 				break;
1f6b6a
+			else if (n == map->failed_disk_num) {
1f6b6a
+				/* r10 double degraded to degraded transition */
1f6b6a
+				dprintf_cont("raid10 double degradation end migration");
1f6b6a
+				end_migration(dev, super, map_state);
1f6b6a
+				a->last_checkpoint = 0;
1f6b6a
+				super->updates_pending++;
1f6b6a
 			}
1f6b6a
 
1f6b6a
 			/* check if recovery is really finished */
1f6b6a
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
1f6b6a
 				}
1f6b6a
 			if (recovery_not_finished) {
1f6b6a
 				dprintf_cont("\n");
1f6b6a
-				dprintf("Rebuild has not finished yet, state not changed");
1f6b6a
+				dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
1f6b6a
 				if (a->last_checkpoint < mdi->recovery_start) {
1f6b6a
 					a->last_checkpoint =
1f6b6a
 						mdi->recovery_start;