dcavalca / rpms / mdadm

Forked from rpms/mdadm 3 years ago
Clone

Blame SOURCES/generic-support-for-consistency-policy-and-PPL.patch

2c1b57
commit 5308f11727b889965efe5ac0e854d197c2b51f6d
2c1b57
Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
Date:   Wed Mar 29 11:54:15 2017 +0200
2c1b57
2c1b57
    Generic support for --consistency-policy and PPL
2c1b57
    
2c1b57
    Add a new parameter to mdadm: --consistency-policy=. It determines how
2c1b57
    the array maintains consistency in case of unexpected shutdown. This
2c1b57
    maps to the md sysfs attribute 'consistency_policy'. It can be used to
2c1b57
    create a raid5 array using PPL. Add the necessary plumbing to pass this
2c1b57
    option to metadata handlers. The write journal and bitmap
2c1b57
    functionalities are treated as different policies, which are implicitly
2c1b57
    selected when using --write-journal or --bitmap options.
2c1b57
    
2c1b57
    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
    Signed-off-by: Jes Sorensen <Jes.Sorensen@gmail.com>
2c1b57
2c1b57
diff --git a/Create.c b/Create.c
2c1b57
index 2721884..4080bf6 100644
2c1b57
--- a/Create.c
2c1b57
+++ b/Create.c
2c1b57
@@ -259,7 +259,8 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 	if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
2c1b57
 					      &s->chunk, s->size*2,
2c1b57
 					      data_offset, NULL,
2c1b57
-					      &newsize, c->verbose>=0))
2c1b57
+					      &newsize, s->consistency_policy,
2c1b57
+					      c->verbose>=0))
2c1b57
 		return 1;
2c1b57
 
2c1b57
 	if (s->chunk && s->chunk != UnSet) {
2c1b57
@@ -358,7 +359,8 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 						st, s->level, s->layout, s->raiddisks,
2c1b57
 						&s->chunk, s->size*2,
2c1b57
 						dv->data_offset, dname,
2c1b57
-						&freesize, c->verbose > 0)) {
2c1b57
+						&freesize, s->consistency_policy,
2c1b57
+						c->verbose > 0)) {
2c1b57
 				case -1: /* Not valid, message printed, and not
2c1b57
 					  * worth checking any further */
2c1b57
 					exit(2);
2c1b57
@@ -395,6 +397,7 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 						       &s->chunk, s->size*2,
2c1b57
 						       dv->data_offset,
2c1b57
 						       dname, &freesize,
2c1b57
+						       s->consistency_policy,
2c1b57
 						       c->verbose >= 0)) {
2c1b57
 
2c1b57
 				pr_err("%s is not suitable for this array.\n",
2c1b57
@@ -501,7 +504,8 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 						       s->raiddisks,
2c1b57
 						       &s->chunk, minsize*2,
2c1b57
 						       data_offset,
2c1b57
-						       NULL, NULL, 0)) {
2c1b57
+						       NULL, NULL,
2c1b57
+						       s->consistency_policy, 0)) {
2c1b57
 				pr_err("devices too large for RAID level %d\n", s->level);
2c1b57
 				return 1;
2c1b57
 			}
2c1b57
@@ -528,6 +532,12 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 	if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
2c1b57
 		s->bitmap_file = NULL;
2c1b57
 
2c1b57
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
2c1b57
+	    !st->ss->write_init_ppl) {
2c1b57
+		pr_err("%s metadata does not support PPL\n", st->ss->name);
2c1b57
+		return 1;
2c1b57
+	}
2c1b57
+
2c1b57
 	if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
2c1b57
 		if (c->runstop != 1 || c->verbose >= 0)
2c1b57
 			pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
2c1b57
@@ -720,7 +730,7 @@ int Create(struct supertype *st, char *mddev,
2c1b57
 				name += 2;
2c1b57
 		}
2c1b57
 	}
2c1b57
-	if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
2c1b57
+	if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
2c1b57
 				data_offset))
2c1b57
 		goto abort_locked;
2c1b57
 
2c1b57
diff --git a/Kill.c b/Kill.c
2c1b57
index f2fdb85..ff52561 100644
2c1b57
--- a/Kill.c
2c1b57
+++ b/Kill.c
2c1b57
@@ -63,7 +63,7 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
2c1b57
 	rv = st->ss->load_super(st, fd, dev);
2c1b57
 	if (rv == 0 || (force && rv >= 2)) {
2c1b57
 		st->ss->free_super(st);
2c1b57
-		st->ss->init_super(st, NULL, 0, "", NULL, NULL,
2c1b57
+		st->ss->init_super(st, NULL, NULL, "", NULL, NULL,
2c1b57
 				   INVALID_SECTORS);
2c1b57
 		if (st->ss->store_super(st, fd)) {
2c1b57
 			if (verbose >= 0)
2c1b57
diff --git a/ReadMe.c b/ReadMe.c
2c1b57
index 50d3807..fc04c2c 100644
2c1b57
--- a/ReadMe.c
2c1b57
+++ b/ReadMe.c
2c1b57
@@ -78,11 +78,11 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
2c1b57
  *     found, it is started.
2c1b57
  */
2c1b57
 
2c1b57
-char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
2c1b57
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
2c1b57
 char short_bitmap_options[]=
2c1b57
-		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
2c1b57
+		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
2c1b57
 char short_bitmap_auto_options[]=
2c1b57
-		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
2c1b57
+		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
2c1b57
 
2c1b57
 struct option long_options[] = {
2c1b57
     {"manage",    0, 0, ManageOpt},
2c1b57
@@ -148,6 +148,7 @@ struct option long_options[] = {
2c1b57
     {"nodes",1, 0, Nodes}, /* also for --assemble */
2c1b57
     {"home-cluster",1, 0, ClusterName},
2c1b57
     {"write-journal",1, 0, WriteJournal},
2c1b57
+    {"consistency-policy", 1, 0, 'k'},
2c1b57
 
2c1b57
     /* For assemble */
2c1b57
     {"uuid",      1, 0, 'u'},
2c1b57
@@ -362,27 +363,29 @@ char Help_create[] =
2c1b57
 " other levels.\n"
2c1b57
 "\n"
2c1b57
 " Options that are valid with --create (-C) are:\n"
2c1b57
-"  --bitmap=          : Create a bitmap for the array with the given filename\n"
2c1b57
-"                     : or an internal bitmap is 'internal' is given\n"
2c1b57
-"  --chunk=      -c   : chunk size in kibibytes\n"
2c1b57
-"  --rounding=        : rounding factor for linear array (==chunk size)\n"
2c1b57
-"  --level=      -l   : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
2c1b57
-"  --parity=     -p   : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
2c1b57
-"  --layout=          : same as --parity, for RAID10: [fno]NN \n"
2c1b57
-"  --raid-devices= -n : number of active devices in array\n"
2c1b57
-"  --spare-devices= -x: number of spare (eXtra) devices in initial array\n"
2c1b57
-"  --size=       -z   : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
2c1b57
-"  --data-offset=     : Space to leave between start of device and start\n"
2c1b57
-"                     : of array data.\n"
2c1b57
-"  --force       -f   : Honour devices as listed on command line.  Don't\n"
2c1b57
-"                     : insert a missing drive for RAID5.\n"
2c1b57
-"  --run         -R   : insist of running the array even if not all\n"
2c1b57
-"                     : devices are present or some look odd.\n"
2c1b57
-"  --readonly    -o   : start the array readonly - not supported yet.\n"
2c1b57
-"  --name=       -N   : Textual name for array - max 32 characters\n"
2c1b57
-"  --bitmap-chunk=    : bitmap chunksize in Kilobytes.\n"
2c1b57
-"  --delay=      -d   : bitmap update delay in seconds.\n"
2c1b57
-"  --write-journal=   : Specify journal device for RAID-4/5/6 array\n"
2c1b57
+"  --bitmap=          -b : Create a bitmap for the array with the given filename\n"
2c1b57
+"                        : or an internal bitmap if 'internal' is given\n"
2c1b57
+"  --chunk=           -c : chunk size in kibibytes\n"
2c1b57
+"  --rounding=           : rounding factor for linear array (==chunk size)\n"
2c1b57
+"  --level=           -l : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
2c1b57
+"  --parity=          -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
2c1b57
+"  --layout=             : same as --parity, for RAID10: [fno]NN \n"
2c1b57
+"  --raid-devices=    -n : number of active devices in array\n"
2c1b57
+"  --spare-devices=   -x : number of spare (eXtra) devices in initial array\n"
2c1b57
+"  --size=            -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
2c1b57
+"  --data-offset=        : Space to leave between start of device and start\n"
2c1b57
+"                        : of array data.\n"
2c1b57
+"  --force            -f : Honour devices as listed on command line.  Don't\n"
2c1b57
+"                        : insert a missing drive for RAID5.\n"
2c1b57
+"  --run              -R : insist of running the array even if not all\n"
2c1b57
+"                        : devices are present or some look odd.\n"
2c1b57
+"  --readonly         -o : start the array readonly - not supported yet.\n"
2c1b57
+"  --name=            -N : Textual name for array - max 32 characters\n"
2c1b57
+"  --bitmap-chunk=       : bitmap chunksize in Kilobytes.\n"
2c1b57
+"  --delay=           -d : bitmap update delay in seconds.\n"
2c1b57
+"  --write-journal=      : Specify journal device for RAID-4/5/6 array\n"
2c1b57
+"  --consistency-policy= : Specify the policy that determines how the array\n"
2c1b57
+"                     -k : maintains consistency in case of unexpected shutdown.\n"
2c1b57
 "\n"
2c1b57
 ;
2c1b57
 
2c1b57
diff --git a/maps.c b/maps.c
2c1b57
index 64f1df2..d9ee7de 100644
2c1b57
--- a/maps.c
2c1b57
+++ b/maps.c
2c1b57
@@ -129,6 +129,16 @@ mapping_t faultylayout[] = {
2c1b57
 	{ NULL, 0}
2c1b57
 };
2c1b57
 
2c1b57
+mapping_t consistency_policies[] = {
2c1b57
+	{ "unknown", CONSISTENCY_POLICY_UNKNOWN},
2c1b57
+	{ "none", CONSISTENCY_POLICY_NONE},
2c1b57
+	{ "resync", CONSISTENCY_POLICY_RESYNC},
2c1b57
+	{ "bitmap", CONSISTENCY_POLICY_BITMAP},
2c1b57
+	{ "journal", CONSISTENCY_POLICY_JOURNAL},
2c1b57
+	{ "ppl", CONSISTENCY_POLICY_PPL},
2c1b57
+	{ NULL, 0}
2c1b57
+};
2c1b57
+
2c1b57
 char *map_num(mapping_t *map, int num)
2c1b57
 {
2c1b57
 	while (map->name) {
2c1b57
diff --git a/mdadm.8.in b/mdadm.8.in
2c1b57
index df1d460..cad5db5 100644
2c1b57
--- a/mdadm.8.in
2c1b57
+++ b/mdadm.8.in
2c1b57
@@ -724,7 +724,9 @@ When creating an array on devices which are 100G or larger,
2c1b57
 .I mdadm
2c1b57
 automatically adds an internal bitmap as it will usually be
2c1b57
 beneficial.  This can be suppressed with
2c1b57
-.B "\-\-bitmap=none".
2c1b57
+.B "\-\-bitmap=none"
2c1b57
+or by selecting a different consistency policy with
2c1b57
+.BR \-\-consistency\-policy .
2c1b57
 
2c1b57
 .TP
2c1b57
 .BR \-\-bitmap\-chunk=
2c1b57
@@ -1020,6 +1022,36 @@ should be a SSD with reasonable lifetime.
2c1b57
 Auto creation of symlinks in /dev to /dev/md, option --symlinks must
2c1b57
 be 'no' or 'yes' and work with --create and --build.
2c1b57
 
2c1b57
+.TP
2c1b57
+.BR \-k ", " \-\-consistency\-policy=
2c1b57
+Specify how the array maintains consistency in case of unexpected shutdown.
2c1b57
+Only relevant for RAID levels with redundancy.
2c1b57
+Currently supported options are:
2c1b57
+.RS
2c1b57
+
2c1b57
+.TP
2c1b57
+.B resync
2c1b57
+Full resync is performed and all redundancy is regenerated when the array is
2c1b57
+started after unclean shutdown.
2c1b57
+
2c1b57
+.TP
2c1b57
+.B bitmap
2c1b57
+Resync assisted by a write-intent bitmap. Implicitly selected when using
2c1b57
+.BR \-\-bitmap .
2c1b57
+
2c1b57
+.TP
2c1b57
+.B journal
2c1b57
+For RAID levels 4/5/6, journal device is used to log transactions and replay
2c1b57
+after unclean shutdown. Implicitly selected when using
2c1b57
+.BR \-\-write\-journal .
2c1b57
+
2c1b57
+.TP
2c1b57
+.B ppl
2c1b57
+For RAID5 only, Partial Parity Log is used to close the write hole and
2c1b57
+eliminate resync. PPL is stored in the metadata region of RAID member drives,
2c1b57
+no additional journal drive is needed.
2c1b57
+.RE
2c1b57
+
2c1b57
 
2c1b57
 .SH For assemble:
2c1b57
 
2c1b57
@@ -2153,8 +2185,10 @@ in the array exceed 100G is size, an internal write-intent bitmap
2c1b57
 will automatically be added unless some other option is explicitly
2c1b57
 requested with the
2c1b57
 .B \-\-bitmap
2c1b57
-option.  In any case space for a bitmap will be reserved so that one
2c1b57
-can be added layer with
2c1b57
+option or a different consistency policy is selected with the
2c1b57
+.B \-\-consistency\-policy
2c1b57
+option. In any case space for a bitmap will be reserved so that one
2c1b57
+can be added later with
2c1b57
 .BR "\-\-grow \-\-bitmap=internal" .
2c1b57
 
2c1b57
 If the metadata type supports it (currently only 1.x metadata), space
2c1b57
diff --git a/mdadm.c b/mdadm.c
2c1b57
index 08ddcab..d4e8286 100644
2c1b57
--- a/mdadm.c
2c1b57
+++ b/mdadm.c
2c1b57
@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
2c1b57
 		.level		= UnSet,
2c1b57
 		.layout		= UnSet,
2c1b57
 		.bitmap_chunk	= UnSet,
2c1b57
+		.consistency_policy	= UnSet,
2c1b57
 	};
2c1b57
 
2c1b57
 	char sys_hostname[256];
2c1b57
@@ -1215,6 +1216,16 @@ int main(int argc, char *argv[])
2c1b57
 
2c1b57
 			s.journaldisks = 1;
2c1b57
 			continue;
2c1b57
+		case O(CREATE, 'k'):
2c1b57
+			s.consistency_policy = map_name(consistency_policies,
2c1b57
+							optarg);
2c1b57
+			if (s.consistency_policy == UnSet ||
2c1b57
+			    s.consistency_policy < CONSISTENCY_POLICY_RESYNC) {
2c1b57
+				pr_err("Invalid consistency policy: %s\n",
2c1b57
+				       optarg);
2c1b57
+				exit(2);
2c1b57
+			}
2c1b57
+			continue;
2c1b57
 		}
2c1b57
 		/* We have now processed all the valid options. Anything else is
2c1b57
 		 * an error
2c1b57
@@ -1242,9 +1253,47 @@ int main(int argc, char *argv[])
2c1b57
 		exit(0);
2c1b57
 	}
2c1b57
 
2c1b57
-	if (s.journaldisks && (s.level < 4 || s.level > 6)) {
2c1b57
-		pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
2c1b57
-		exit(2);
2c1b57
+	if (s.journaldisks) {
2c1b57
+		if (s.level < 4 || s.level > 6) {
2c1b57
+			pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
2c1b57
+			exit(2);
2c1b57
+		}
2c1b57
+		if (s.consistency_policy != UnSet &&
2c1b57
+		    s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
2c1b57
+			pr_err("--write-journal is not supported with consistency policy: %s\n",
2c1b57
+			       map_num(consistency_policies, s.consistency_policy));
2c1b57
+			exit(2);
2c1b57
+		}
2c1b57
+	}
2c1b57
+
2c1b57
+	if (mode == CREATE && s.consistency_policy != UnSet) {
2c1b57
+		if (s.level <= 0) {
2c1b57
+			pr_err("--consistency-policy not meaningful with level %s.\n",
2c1b57
+			       map_num(pers, s.level));
2c1b57
+			exit(2);
2c1b57
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
2c1b57
+			   !s.journaldisks) {
2c1b57
+			pr_err("--write-journal is required for consistency policy: %s\n",
2c1b57
+			       map_num(consistency_policies, s.consistency_policy));
2c1b57
+			exit(2);
2c1b57
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
2c1b57
+			   s.level != 5) {
2c1b57
+			pr_err("PPL consistency policy is only supported for RAID level 5.\n");
2c1b57
+			exit(2);
2c1b57
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
2c1b57
+			   (!s.bitmap_file ||
2c1b57
+			    strcmp(s.bitmap_file, "none") == 0)) {
2c1b57
+			pr_err("--bitmap is required for consistency policy: %s\n",
2c1b57
+			       map_num(consistency_policies, s.consistency_policy));
2c1b57
+			exit(2);
2c1b57
+		} else if (s.bitmap_file &&
2c1b57
+			   strcmp(s.bitmap_file, "none") != 0 &&
2c1b57
+			   s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
2c1b57
+			   s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
2c1b57
+			pr_err("--bitmap is not compatible with consistency policy: %s\n",
2c1b57
+			       map_num(consistency_policies, s.consistency_policy));
2c1b57
+			exit(2);
2c1b57
+		}
2c1b57
 	}
2c1b57
 
2c1b57
 	if (!mode && devs_found) {
2c1b57
diff --git a/mdadm.h b/mdadm.h
2c1b57
index cebc0c0..b52d4d3 100644
2c1b57
--- a/mdadm.h
2c1b57
+++ b/mdadm.h
2c1b57
@@ -279,6 +279,15 @@ struct mdinfo {
2c1b57
 	int journal_device_required;
2c1b57
 	int journal_clean;
2c1b57
 
2c1b57
+	enum {
2c1b57
+		CONSISTENCY_POLICY_UNKNOWN,
2c1b57
+		CONSISTENCY_POLICY_NONE,
2c1b57
+		CONSISTENCY_POLICY_RESYNC,
2c1b57
+		CONSISTENCY_POLICY_BITMAP,
2c1b57
+		CONSISTENCY_POLICY_JOURNAL,
2c1b57
+		CONSISTENCY_POLICY_PPL,
2c1b57
+	} consistency_policy;
2c1b57
+
2c1b57
 	/* During reshape we can sometimes change the data_offset to avoid
2c1b57
 	 * over-writing still-valid data.  We need to know if there is space.
2c1b57
 	 * So getinfo_super will fill in space_before and space_after in sectors.
2c1b57
@@ -426,6 +435,7 @@ enum special_options {
2c1b57
 	ClusterName,
2c1b57
 	ClusterConfirm,
2c1b57
 	WriteJournal,
2c1b57
+	ConsistencyPolicy,
2c1b57
 };
2c1b57
 
2c1b57
 enum prefix_standard {
2c1b57
@@ -527,6 +537,7 @@ struct shape {
2c1b57
 	int	assume_clean;
2c1b57
 	int	write_behind;
2c1b57
 	unsigned long long size;
2c1b57
+	int	consistency_policy;
2c1b57
 };
2c1b57
 
2c1b57
 /* List of device names - wildcards expanded */
2c1b57
@@ -618,6 +629,7 @@ enum sysfs_read_flags {
2c1b57
 	GET_STATE	= (1 << 23),
2c1b57
 	GET_ERROR	= (1 << 24),
2c1b57
 	GET_ARRAY_STATE = (1 << 25),
2c1b57
+	GET_CONSISTENCY_POLICY	= (1 << 26),
2c1b57
 };
2c1b57
 
2c1b57
 /* If fd >= 0, get the array it is open on,
2c1b57
@@ -701,7 +713,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
2c1b57
 
2c1b57
 extern char *map_num(mapping_t *map, int num);
2c1b57
 extern int map_name(mapping_t *map, char *name);
2c1b57
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
2c1b57
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[], consistency_policies[];
2c1b57
 
2c1b57
 extern char *map_dev_preferred(int major, int minor, int create,
2c1b57
 			       char *prefer);
2c1b57
@@ -863,7 +875,7 @@ extern struct superswitch {
2c1b57
 	 * metadata.
2c1b57
 	 */
2c1b57
 	int (*init_super)(struct supertype *st, mdu_array_info_t *info,
2c1b57
-			  unsigned long long size, char *name,
2c1b57
+			  struct shape *s, char *name,
2c1b57
 			  char *homehost, int *uuid,
2c1b57
 			  unsigned long long data_offset);
2c1b57
 
2c1b57
@@ -961,7 +973,7 @@ extern struct superswitch {
2c1b57
 				 int *chunk, unsigned long long size,
2c1b57
 				 unsigned long long data_offset,
2c1b57
 				 char *subdev, unsigned long long *freesize,
2c1b57
-				 int verbose);
2c1b57
+				 int consistency_policy, int verbose);
2c1b57
 
2c1b57
 	/* Return a linked list of 'mdinfo' structures for all arrays
2c1b57
 	 * in the container.  For non-containers, it is like
2c1b57
@@ -1059,6 +1071,9 @@ extern struct superswitch {
2c1b57
 	/* validate container after assemble */
2c1b57
 	int (*validate_container)(struct mdinfo *info);
2c1b57
 
2c1b57
+	/* write initial empty PPL on device */
2c1b57
+	int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
2c1b57
+
2c1b57
 	/* records new bad block in metadata */
2c1b57
 	int (*record_bad_block)(struct active_array *a, int n,
2c1b57
 					unsigned long long sector, int length);
2c1b57
diff --git a/super-ddf.c b/super-ddf.c
2c1b57
index 1707ad1..cdd16a4 100644
2c1b57
--- a/super-ddf.c
2c1b57
+++ b/super-ddf.c
2c1b57
@@ -2290,7 +2290,7 @@ static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2c1b57
 
2c1b57
 static int init_super_ddf(struct supertype *st,
2c1b57
 			  mdu_array_info_t *info,
2c1b57
-			  unsigned long long size, char *name, char *homehost,
2c1b57
+			  struct shape *s, char *name, char *homehost,
2c1b57
 			  int *uuid, unsigned long long data_offset)
2c1b57
 {
2c1b57
 	/* This is primarily called by Create when creating a new array.
2c1b57
@@ -2328,7 +2328,7 @@ static int init_super_ddf(struct supertype *st,
2c1b57
 	struct virtual_disk *vd;
2c1b57
 
2c1b57
 	if (st->sb)
2c1b57
-		return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2c1b57
+		return init_super_ddf_bvd(st, info, s->size, name, homehost, uuid,
2c1b57
 					  data_offset);
2c1b57
 
2c1b57
 	if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2c1b57
@@ -3347,7 +3347,7 @@ static int validate_geometry_ddf(struct supertype *st,
2c1b57
 				 int *chunk, unsigned long long size,
2c1b57
 				 unsigned long long data_offset,
2c1b57
 				 char *dev, unsigned long long *freesize,
2c1b57
-				 int verbose)
2c1b57
+				 int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	int fd;
2c1b57
 	struct mdinfo *sra;
2c1b57
diff --git a/super-gpt.c b/super-gpt.c
2c1b57
index 8b080a0..bb38a97 100644
2c1b57
--- a/super-gpt.c
2c1b57
+++ b/super-gpt.c
2c1b57
@@ -205,7 +205,7 @@ static int validate_geometry(struct supertype *st, int level,
2c1b57
 			     int *chunk, unsigned long long size,
2c1b57
 			     unsigned long long data_offset,
2c1b57
 			     char *subdev, unsigned long long *freesize,
2c1b57
-			     int verbose)
2c1b57
+			     int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	pr_err("gpt metadata cannot be used this way\n");
2c1b57
 	return 0;
2c1b57
diff --git a/super-intel.c b/super-intel.c
2c1b57
index e1618f1..5d0f131 100644
2c1b57
--- a/super-intel.c
2c1b57
+++ b/super-intel.c
2c1b57
@@ -5155,7 +5155,7 @@ static int check_name(struct intel_super *super, char *name, int quiet)
2c1b57
 }
2c1b57
 
2c1b57
 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
-				  unsigned long long size, char *name,
2c1b57
+				  struct shape *s, char *name,
2c1b57
 				  char *homehost, int *uuid,
2c1b57
 				  long long data_offset)
2c1b57
 {
2c1b57
@@ -5250,7 +5250,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
2c1b57
 	array_blocks = calc_array_size(info->level, info->raid_disks,
2c1b57
 					       info->layout, info->chunk_size,
2c1b57
-					       size * 2);
2c1b57
+					       s->size * 2);
2c1b57
 	/* round array size down to closest MB */
2c1b57
 	array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
2c1b57
 
2c1b57
@@ -5264,7 +5264,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	vol->curr_migr_unit = 0;
2c1b57
 	map = get_imsm_map(dev, MAP_0);
2c1b57
 	set_pba_of_lba0(map, super->create_offset);
2c1b57
-	set_blocks_per_member(map, info_to_blocks_per_member(info, size));
2c1b57
+	set_blocks_per_member(map, info_to_blocks_per_member(info, s->size));
2c1b57
 	map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
2c1b57
 	map->failed_disk_num = ~0;
2c1b57
 	if (info->level > 0)
2c1b57
@@ -5292,7 +5292,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
 		map->num_domains = 1;
2c1b57
 
2c1b57
 	/* info->size is only int so use the 'size' parameter instead */
2c1b57
-	num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
2c1b57
+	num_data_stripes = (s->size * 2) / info_to_blocks_per_strip(info);
2c1b57
 	num_data_stripes /= map->num_domains;
2c1b57
 	set_num_data_stripes(map, num_data_stripes);
2c1b57
 
2c1b57
@@ -5314,7 +5314,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
 }
2c1b57
 
2c1b57
 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
2c1b57
-			   unsigned long long size, char *name,
2c1b57
+		           struct shape *s, char *name,
2c1b57
 			   char *homehost, int *uuid,
2c1b57
 			   unsigned long long data_offset)
2c1b57
 {
2c1b57
@@ -5337,7 +5337,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	}
2c1b57
 
2c1b57
 	if (st->sb)
2c1b57
-		return init_super_imsm_volume(st, info, size, name, homehost, uuid,
2c1b57
+		return init_super_imsm_volume(st, info, s, name, homehost, uuid,
2c1b57
 					      data_offset);
2c1b57
 
2c1b57
 	if (info)
2c1b57
@@ -6914,7 +6914,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
2c1b57
 				  int raiddisks, int *chunk, unsigned long long size,
2c1b57
 				  unsigned long long data_offset,
2c1b57
 				  char *dev, unsigned long long *freesize,
2c1b57
-				  int verbose)
2c1b57
+				  int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	int fd, cfd;
2c1b57
 	struct mdinfo *sra;
2c1b57
@@ -10953,7 +10953,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
2c1b57
 				    geo->raid_disks + devNumChange,
2c1b57
 				    &chunk,
2c1b57
 				    geo->size, INVALID_SECTORS,
2c1b57
-				    0, 0, 1))
2c1b57
+				    0, 0, info.consistency_policy, 1))
2c1b57
 		change = -1;
2c1b57
 
2c1b57
 	if (check_devs) {
2c1b57
diff --git a/super-mbr.c b/super-mbr.c
2c1b57
index f5e4cea..1bbe57a 100644
2c1b57
--- a/super-mbr.c
2c1b57
+++ b/super-mbr.c
2c1b57
@@ -193,7 +193,7 @@ static int validate_geometry(struct supertype *st, int level,
2c1b57
 			     int *chunk, unsigned long long size,
2c1b57
 			     unsigned long long data_offset,
2c1b57
 			     char *subdev, unsigned long long *freesize,
2c1b57
-			     int verbose)
2c1b57
+			     int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	pr_err("mbr metadata cannot be used this way\n");
2c1b57
 	return 0;
2c1b57
diff --git a/super0.c b/super0.c
2c1b57
index f5b4507..7a555e3 100644
2c1b57
--- a/super0.c
2c1b57
+++ b/super0.c
2c1b57
@@ -725,7 +725,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
2c1b57
  * We use the first 8 bytes (64bits) of the sha1 of the host name
2c1b57
  */
2c1b57
 static int init_super0(struct supertype *st, mdu_array_info_t *info,
2c1b57
-		       unsigned long long size, char *ignored_name,
2c1b57
+		       struct shape *s, char *ignored_name,
2c1b57
 		       char *homehost, int *uuid,
2c1b57
 		       unsigned long long data_offset)
2c1b57
 {
2c1b57
@@ -764,8 +764,8 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	sb->gvalid_words = 0; /* ignored */
2c1b57
 	sb->ctime = time(0);
2c1b57
 	sb->level = info->level;
2c1b57
-	sb->size = size;
2c1b57
-	if (size != (unsigned long long)sb->size)
2c1b57
+	sb->size = s->size;
2c1b57
+	if (s->size != (unsigned long long)sb->size)
2c1b57
 		return 0;
2c1b57
 	sb->nr_disks = info->nr_disks;
2c1b57
 	sb->raid_disks = info->raid_disks;
2c1b57
@@ -1267,7 +1267,7 @@ static int validate_geometry0(struct supertype *st, int level,
2c1b57
 			      int *chunk, unsigned long long size,
2c1b57
 			      unsigned long long data_offset,
2c1b57
 			      char *subdev, unsigned long long *freesize,
2c1b57
-			      int verbose)
2c1b57
+			      int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	unsigned long long ldsize;
2c1b57
 	int fd;
2c1b57
diff --git a/super1.c b/super1.c
2c1b57
index f3520ac..4a0f041 100644
2c1b57
--- a/super1.c
2c1b57
+++ b/super1.c
2c1b57
@@ -1397,7 +1397,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
2c1b57
 }
2c1b57
 
2c1b57
 static int init_super1(struct supertype *st, mdu_array_info_t *info,
2c1b57
-		       unsigned long long size, char *name, char *homehost,
2c1b57
+		       struct shape *s, char *name, char *homehost,
2c1b57
 		       int *uuid, unsigned long long data_offset)
2c1b57
 {
2c1b57
 	struct mdp_superblock_1 *sb;
2c1b57
@@ -1450,7 +1450,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	sb->ctime = __cpu_to_le64((unsigned long long)time(0));
2c1b57
 	sb->level = __cpu_to_le32(info->level);
2c1b57
 	sb->layout = __cpu_to_le32(info->layout);
2c1b57
-	sb->size = __cpu_to_le64(size*2ULL);
2c1b57
+	sb->size = __cpu_to_le64(s->size*2ULL);
2c1b57
 	sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
2c1b57
 	sb->raid_disks = __cpu_to_le32(info->raid_disks);
2c1b57
 
2c1b57
@@ -2487,7 +2487,7 @@ static int validate_geometry1(struct supertype *st, int level,
2c1b57
 			      int *chunk, unsigned long long size,
2c1b57
 			      unsigned long long data_offset,
2c1b57
 			      char *subdev, unsigned long long *freesize,
2c1b57
-			      int verbose)
2c1b57
+			      int consistency_policy, int verbose)
2c1b57
 {
2c1b57
 	unsigned long long ldsize, devsize;
2c1b57
 	int bmspace;
2c1b57
diff --git a/sysfs.c b/sysfs.c
2c1b57
index b0657a0..53589a7 100644
2c1b57
--- a/sysfs.c
2c1b57
+++ b/sysfs.c
2c1b57
@@ -242,6 +242,17 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
2c1b57
 	} else
2c1b57
 		sra->sysfs_array_state[0] = 0;
2c1b57
 
2c1b57
+	if (options & GET_CONSISTENCY_POLICY) {
2c1b57
+		strcpy(base, "consistency_policy");
2c1b57
+		if (load_sys(fname, buf, sizeof(buf))) {
2c1b57
+			sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
2c1b57
+		} else {
2c1b57
+			sra->consistency_policy = map_name(consistency_policies, buf);
2c1b57
+			if (sra->consistency_policy == UnSet)
2c1b57
+				sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
2c1b57
+		}
2c1b57
+	}
2c1b57
+
2c1b57
 	if (! (options & GET_DEVS))
2c1b57
 		return sra;
2c1b57