dcavalca / rpms / mdadm

Forked from rpms/mdadm 3 years ago
Clone

Blame SOURCES/generic-support-for-consistency-policy-and-PPL.patch

dd3a91
commit 5308f11727b889965efe5ac0e854d197c2b51f6d
dd3a91
Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
dd3a91
Date:   Wed Mar 29 11:54:15 2017 +0200
dd3a91
dd3a91
    Generic support for --consistency-policy and PPL
dd3a91
    
dd3a91
    Add a new parameter to mdadm: --consistency-policy=. It determines how
dd3a91
    the array maintains consistency in case of unexpected shutdown. This
dd3a91
    maps to the md sysfs attribute 'consistency_policy'. It can be used to
dd3a91
    create a raid5 array using PPL. Add the necessary plumbing to pass this
dd3a91
    option to metadata handlers. The write journal and bitmap
dd3a91
    functionalities are treated as different policies, which are implicitly
dd3a91
    selected when using --write-journal or --bitmap options.
dd3a91
    
dd3a91
    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
dd3a91
    Signed-off-by: Jes Sorensen <Jes.Sorensen@gmail.com>
dd3a91
dd3a91
diff --git a/Create.c b/Create.c
dd3a91
index 2721884..4080bf6 100644
dd3a91
--- a/Create.c
dd3a91
+++ b/Create.c
dd3a91
@@ -259,7 +259,8 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 	if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
dd3a91
 					      &s->chunk, s->size*2,
dd3a91
 					      data_offset, NULL,
dd3a91
-					      &newsize, c->verbose>=0))
dd3a91
+					      &newsize, s->consistency_policy,
dd3a91
+					      c->verbose>=0))
dd3a91
 		return 1;
dd3a91
 
dd3a91
 	if (s->chunk && s->chunk != UnSet) {
dd3a91
@@ -358,7 +359,8 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 						st, s->level, s->layout, s->raiddisks,
dd3a91
 						&s->chunk, s->size*2,
dd3a91
 						dv->data_offset, dname,
dd3a91
-						&freesize, c->verbose > 0)) {
dd3a91
+						&freesize, s->consistency_policy,
dd3a91
+						c->verbose > 0)) {
dd3a91
 				case -1: /* Not valid, message printed, and not
dd3a91
 					  * worth checking any further */
dd3a91
 					exit(2);
dd3a91
@@ -395,6 +397,7 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 						       &s->chunk, s->size*2,
dd3a91
 						       dv->data_offset,
dd3a91
 						       dname, &freesize,
dd3a91
+						       s->consistency_policy,
dd3a91
 						       c->verbose >= 0)) {
dd3a91
 
dd3a91
 				pr_err("%s is not suitable for this array.\n",
dd3a91
@@ -501,7 +504,8 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 						       s->raiddisks,
dd3a91
 						       &s->chunk, minsize*2,
dd3a91
 						       data_offset,
dd3a91
-						       NULL, NULL, 0)) {
dd3a91
+						       NULL, NULL,
dd3a91
+						       s->consistency_policy, 0)) {
dd3a91
 				pr_err("devices too large for RAID level %d\n", s->level);
dd3a91
 				return 1;
dd3a91
 			}
dd3a91
@@ -528,6 +532,12 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 	if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
dd3a91
 		s->bitmap_file = NULL;
dd3a91
 
dd3a91
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
dd3a91
+	    !st->ss->write_init_ppl) {
dd3a91
+		pr_err("%s metadata does not support PPL\n", st->ss->name);
dd3a91
+		return 1;
dd3a91
+	}
dd3a91
+
dd3a91
 	if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
dd3a91
 		if (c->runstop != 1 || c->verbose >= 0)
dd3a91
 			pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
dd3a91
@@ -720,7 +730,7 @@ int Create(struct supertype *st, char *mddev,
dd3a91
 				name += 2;
dd3a91
 		}
dd3a91
 	}
dd3a91
-	if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
dd3a91
+	if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
dd3a91
 				data_offset))
dd3a91
 		goto abort_locked;
dd3a91
 
dd3a91
diff --git a/Kill.c b/Kill.c
dd3a91
index f2fdb85..ff52561 100644
dd3a91
--- a/Kill.c
dd3a91
+++ b/Kill.c
dd3a91
@@ -63,7 +63,7 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
dd3a91
 	rv = st->ss->load_super(st, fd, dev);
dd3a91
 	if (rv == 0 || (force && rv >= 2)) {
dd3a91
 		st->ss->free_super(st);
dd3a91
-		st->ss->init_super(st, NULL, 0, "", NULL, NULL,
dd3a91
+		st->ss->init_super(st, NULL, NULL, "", NULL, NULL,
dd3a91
 				   INVALID_SECTORS);
dd3a91
 		if (st->ss->store_super(st, fd)) {
dd3a91
 			if (verbose >= 0)
dd3a91
diff --git a/ReadMe.c b/ReadMe.c
dd3a91
index 50d3807..fc04c2c 100644
dd3a91
--- a/ReadMe.c
dd3a91
+++ b/ReadMe.c
dd3a91
@@ -78,11 +78,11 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
dd3a91
  *     found, it is started.
dd3a91
  */
dd3a91
 
dd3a91
-char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
dd3a91
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
dd3a91
 char short_bitmap_options[]=
dd3a91
-		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
dd3a91
+		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
dd3a91
 char short_bitmap_auto_options[]=
dd3a91
-		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
dd3a91
+		"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
dd3a91
 
dd3a91
 struct option long_options[] = {
dd3a91
     {"manage",    0, 0, ManageOpt},
dd3a91
@@ -148,6 +148,7 @@ struct option long_options[] = {
dd3a91
     {"nodes",1, 0, Nodes}, /* also for --assemble */
dd3a91
     {"home-cluster",1, 0, ClusterName},
dd3a91
     {"write-journal",1, 0, WriteJournal},
dd3a91
+    {"consistency-policy", 1, 0, 'k'},
dd3a91
 
dd3a91
     /* For assemble */
dd3a91
     {"uuid",      1, 0, 'u'},
dd3a91
@@ -362,27 +363,29 @@ char Help_create[] =
dd3a91
 " other levels.\n"
dd3a91
 "\n"
dd3a91
 " Options that are valid with --create (-C) are:\n"
dd3a91
-"  --bitmap=          : Create a bitmap for the array with the given filename\n"
dd3a91
-"                     : or an internal bitmap is 'internal' is given\n"
dd3a91
-"  --chunk=      -c   : chunk size in kibibytes\n"
dd3a91
-"  --rounding=        : rounding factor for linear array (==chunk size)\n"
dd3a91
-"  --level=      -l   : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
dd3a91
-"  --parity=     -p   : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
dd3a91
-"  --layout=          : same as --parity, for RAID10: [fno]NN \n"
dd3a91
-"  --raid-devices= -n : number of active devices in array\n"
dd3a91
-"  --spare-devices= -x: number of spare (eXtra) devices in initial array\n"
dd3a91
-"  --size=       -z   : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
dd3a91
-"  --data-offset=     : Space to leave between start of device and start\n"
dd3a91
-"                     : of array data.\n"
dd3a91
-"  --force       -f   : Honour devices as listed on command line.  Don't\n"
dd3a91
-"                     : insert a missing drive for RAID5.\n"
dd3a91
-"  --run         -R   : insist of running the array even if not all\n"
dd3a91
-"                     : devices are present or some look odd.\n"
dd3a91
-"  --readonly    -o   : start the array readonly - not supported yet.\n"
dd3a91
-"  --name=       -N   : Textual name for array - max 32 characters\n"
dd3a91
-"  --bitmap-chunk=    : bitmap chunksize in Kilobytes.\n"
dd3a91
-"  --delay=      -d   : bitmap update delay in seconds.\n"
dd3a91
-"  --write-journal=   : Specify journal device for RAID-4/5/6 array\n"
dd3a91
+"  --bitmap=          -b : Create a bitmap for the array with the given filename\n"
dd3a91
+"                        : or an internal bitmap if 'internal' is given\n"
dd3a91
+"  --chunk=           -c : chunk size in kibibytes\n"
dd3a91
+"  --rounding=           : rounding factor for linear array (==chunk size)\n"
dd3a91
+"  --level=           -l : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
dd3a91
+"  --parity=          -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
dd3a91
+"  --layout=             : same as --parity, for RAID10: [fno]NN \n"
dd3a91
+"  --raid-devices=    -n : number of active devices in array\n"
dd3a91
+"  --spare-devices=   -x : number of spare (eXtra) devices in initial array\n"
dd3a91
+"  --size=            -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
dd3a91
+"  --data-offset=        : Space to leave between start of device and start\n"
dd3a91
+"                        : of array data.\n"
dd3a91
+"  --force            -f : Honour devices as listed on command line.  Don't\n"
dd3a91
+"                        : insert a missing drive for RAID5.\n"
dd3a91
+"  --run              -R : insist of running the array even if not all\n"
dd3a91
+"                        : devices are present or some look odd.\n"
dd3a91
+"  --readonly         -o : start the array readonly - not supported yet.\n"
dd3a91
+"  --name=            -N : Textual name for array - max 32 characters\n"
dd3a91
+"  --bitmap-chunk=       : bitmap chunksize in Kilobytes.\n"
dd3a91
+"  --delay=           -d : bitmap update delay in seconds.\n"
dd3a91
+"  --write-journal=      : Specify journal device for RAID-4/5/6 array\n"
dd3a91
+"  --consistency-policy= : Specify the policy that determines how the array\n"
dd3a91
+"                     -k : maintains consistency in case of unexpected shutdown.\n"
dd3a91
 "\n"
dd3a91
 ;
dd3a91
 
dd3a91
diff --git a/maps.c b/maps.c
dd3a91
index 64f1df2..d9ee7de 100644
dd3a91
--- a/maps.c
dd3a91
+++ b/maps.c
dd3a91
@@ -129,6 +129,16 @@ mapping_t faultylayout[] = {
dd3a91
 	{ NULL, 0}
dd3a91
 };
dd3a91
 
dd3a91
+mapping_t consistency_policies[] = {
dd3a91
+	{ "unknown", CONSISTENCY_POLICY_UNKNOWN},
dd3a91
+	{ "none", CONSISTENCY_POLICY_NONE},
dd3a91
+	{ "resync", CONSISTENCY_POLICY_RESYNC},
dd3a91
+	{ "bitmap", CONSISTENCY_POLICY_BITMAP},
dd3a91
+	{ "journal", CONSISTENCY_POLICY_JOURNAL},
dd3a91
+	{ "ppl", CONSISTENCY_POLICY_PPL},
dd3a91
+	{ NULL, 0}
dd3a91
+};
dd3a91
+
dd3a91
 char *map_num(mapping_t *map, int num)
dd3a91
 {
dd3a91
 	while (map->name) {
dd3a91
diff --git a/mdadm.8.in b/mdadm.8.in
dd3a91
index df1d460..cad5db5 100644
dd3a91
--- a/mdadm.8.in
dd3a91
+++ b/mdadm.8.in
dd3a91
@@ -724,7 +724,9 @@ When creating an array on devices which are 100G or larger,
dd3a91
 .I mdadm
dd3a91
 automatically adds an internal bitmap as it will usually be
dd3a91
 beneficial.  This can be suppressed with
dd3a91
-.B "\-\-bitmap=none".
dd3a91
+.B "\-\-bitmap=none"
dd3a91
+or by selecting a different consistency policy with
dd3a91
+.BR \-\-consistency\-policy .
dd3a91
 
dd3a91
 .TP
dd3a91
 .BR \-\-bitmap\-chunk=
dd3a91
@@ -1020,6 +1022,36 @@ should be a SSD with reasonable lifetime.
dd3a91
 Auto creation of symlinks in /dev to /dev/md, option --symlinks must
dd3a91
 be 'no' or 'yes' and work with --create and --build.
dd3a91
 
dd3a91
+.TP
dd3a91
+.BR \-k ", " \-\-consistency\-policy=
dd3a91
+Specify how the array maintains consistency in case of unexpected shutdown.
dd3a91
+Only relevant for RAID levels with redundancy.
dd3a91
+Currently supported options are:
dd3a91
+.RS
dd3a91
+
dd3a91
+.TP
dd3a91
+.B resync
dd3a91
+Full resync is performed and all redundancy is regenerated when the array is
dd3a91
+started after unclean shutdown.
dd3a91
+
dd3a91
+.TP
dd3a91
+.B bitmap
dd3a91
+Resync assisted by a write-intent bitmap. Implicitly selected when using
dd3a91
+.BR \-\-bitmap .
dd3a91
+
dd3a91
+.TP
dd3a91
+.B journal
dd3a91
+For RAID levels 4/5/6, journal device is used to log transactions and replay
dd3a91
+after unclean shutdown. Implicitly selected when using
dd3a91
+.BR \-\-write\-journal .
dd3a91
+
dd3a91
+.TP
dd3a91
+.B ppl
dd3a91
+For RAID5 only, Partial Parity Log is used to close the write hole and
dd3a91
+eliminate resync. PPL is stored in the metadata region of RAID member drives,
dd3a91
+no additional journal drive is needed.
dd3a91
+.RE
dd3a91
+
dd3a91
 
dd3a91
 .SH For assemble:
dd3a91
 
dd3a91
@@ -2153,8 +2185,10 @@ in the array exceed 100G is size, an internal write-intent bitmap
dd3a91
 will automatically be added unless some other option is explicitly
dd3a91
 requested with the
dd3a91
 .B \-\-bitmap
dd3a91
-option.  In any case space for a bitmap will be reserved so that one
dd3a91
-can be added layer with
dd3a91
+option or a different consistency policy is selected with the
dd3a91
+.B \-\-consistency\-policy
dd3a91
+option. In any case space for a bitmap will be reserved so that one
dd3a91
+can be added later with
dd3a91
 .BR "\-\-grow \-\-bitmap=internal" .
dd3a91
 
dd3a91
 If the metadata type supports it (currently only 1.x metadata), space
dd3a91
diff --git a/mdadm.c b/mdadm.c
dd3a91
index 08ddcab..d4e8286 100644
dd3a91
--- a/mdadm.c
dd3a91
+++ b/mdadm.c
dd3a91
@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
dd3a91
 		.level		= UnSet,
dd3a91
 		.layout		= UnSet,
dd3a91
 		.bitmap_chunk	= UnSet,
dd3a91
+		.consistency_policy	= UnSet,
dd3a91
 	};
dd3a91
 
dd3a91
 	char sys_hostname[256];
dd3a91
@@ -1215,6 +1216,16 @@ int main(int argc, char *argv[])
dd3a91
 
dd3a91
 			s.journaldisks = 1;
dd3a91
 			continue;
dd3a91
+		case O(CREATE, 'k'):
dd3a91
+			s.consistency_policy = map_name(consistency_policies,
dd3a91
+							optarg);
dd3a91
+			if (s.consistency_policy == UnSet ||
dd3a91
+			    s.consistency_policy < CONSISTENCY_POLICY_RESYNC) {
dd3a91
+				pr_err("Invalid consistency policy: %s\n",
dd3a91
+				       optarg);
dd3a91
+				exit(2);
dd3a91
+			}
dd3a91
+			continue;
dd3a91
 		}
dd3a91
 		/* We have now processed all the valid options. Anything else is
dd3a91
 		 * an error
dd3a91
@@ -1242,9 +1253,47 @@ int main(int argc, char *argv[])
dd3a91
 		exit(0);
dd3a91
 	}
dd3a91
 
dd3a91
-	if (s.journaldisks && (s.level < 4 || s.level > 6)) {
dd3a91
-		pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
dd3a91
-		exit(2);
dd3a91
+	if (s.journaldisks) {
dd3a91
+		if (s.level < 4 || s.level > 6) {
dd3a91
+			pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
dd3a91
+			exit(2);
dd3a91
+		}
dd3a91
+		if (s.consistency_policy != UnSet &&
dd3a91
+		    s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
dd3a91
+			pr_err("--write-journal is not supported with consistency policy: %s\n",
dd3a91
+			       map_num(consistency_policies, s.consistency_policy));
dd3a91
+			exit(2);
dd3a91
+		}
dd3a91
+	}
dd3a91
+
dd3a91
+	if (mode == CREATE && s.consistency_policy != UnSet) {
dd3a91
+		if (s.level <= 0) {
dd3a91
+			pr_err("--consistency-policy not meaningful with level %s.\n",
dd3a91
+			       map_num(pers, s.level));
dd3a91
+			exit(2);
dd3a91
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
dd3a91
+			   !s.journaldisks) {
dd3a91
+			pr_err("--write-journal is required for consistency policy: %s\n",
dd3a91
+			       map_num(consistency_policies, s.consistency_policy));
dd3a91
+			exit(2);
dd3a91
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
dd3a91
+			   s.level != 5) {
dd3a91
+			pr_err("PPL consistency policy is only supported for RAID level 5.\n");
dd3a91
+			exit(2);
dd3a91
+		} else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
dd3a91
+			   (!s.bitmap_file ||
dd3a91
+			    strcmp(s.bitmap_file, "none") == 0)) {
dd3a91
+			pr_err("--bitmap is required for consistency policy: %s\n",
dd3a91
+			       map_num(consistency_policies, s.consistency_policy));
dd3a91
+			exit(2);
dd3a91
+		} else if (s.bitmap_file &&
dd3a91
+			   strcmp(s.bitmap_file, "none") != 0 &&
dd3a91
+			   s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
dd3a91
+			   s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
dd3a91
+			pr_err("--bitmap is not compatible with consistency policy: %s\n",
dd3a91
+			       map_num(consistency_policies, s.consistency_policy));
dd3a91
+			exit(2);
dd3a91
+		}
dd3a91
 	}
dd3a91
 
dd3a91
 	if (!mode && devs_found) {
dd3a91
diff --git a/mdadm.h b/mdadm.h
dd3a91
index cebc0c0..b52d4d3 100644
dd3a91
--- a/mdadm.h
dd3a91
+++ b/mdadm.h
dd3a91
@@ -279,6 +279,15 @@ struct mdinfo {
dd3a91
 	int journal_device_required;
dd3a91
 	int journal_clean;
dd3a91
 
dd3a91
+	enum {
dd3a91
+		CONSISTENCY_POLICY_UNKNOWN,
dd3a91
+		CONSISTENCY_POLICY_NONE,
dd3a91
+		CONSISTENCY_POLICY_RESYNC,
dd3a91
+		CONSISTENCY_POLICY_BITMAP,
dd3a91
+		CONSISTENCY_POLICY_JOURNAL,
dd3a91
+		CONSISTENCY_POLICY_PPL,
dd3a91
+	} consistency_policy;
dd3a91
+
dd3a91
 	/* During reshape we can sometimes change the data_offset to avoid
dd3a91
 	 * over-writing still-valid data.  We need to know if there is space.
dd3a91
 	 * So getinfo_super will fill in space_before and space_after in sectors.
dd3a91
@@ -426,6 +435,7 @@ enum special_options {
dd3a91
 	ClusterName,
dd3a91
 	ClusterConfirm,
dd3a91
 	WriteJournal,
dd3a91
+	ConsistencyPolicy,
dd3a91
 };
dd3a91
 
dd3a91
 enum prefix_standard {
dd3a91
@@ -527,6 +537,7 @@ struct shape {
dd3a91
 	int	assume_clean;
dd3a91
 	int	write_behind;
dd3a91
 	unsigned long long size;
dd3a91
+	int	consistency_policy;
dd3a91
 };
dd3a91
 
dd3a91
 /* List of device names - wildcards expanded */
dd3a91
@@ -618,6 +629,7 @@ enum sysfs_read_flags {
dd3a91
 	GET_STATE	= (1 << 23),
dd3a91
 	GET_ERROR	= (1 << 24),
dd3a91
 	GET_ARRAY_STATE = (1 << 25),
dd3a91
+	GET_CONSISTENCY_POLICY	= (1 << 26),
dd3a91
 };
dd3a91
 
dd3a91
 /* If fd >= 0, get the array it is open on,
dd3a91
@@ -701,7 +713,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
dd3a91
 
dd3a91
 extern char *map_num(mapping_t *map, int num);
dd3a91
 extern int map_name(mapping_t *map, char *name);
dd3a91
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
dd3a91
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[], consistency_policies[];
dd3a91
 
dd3a91
 extern char *map_dev_preferred(int major, int minor, int create,
dd3a91
 			       char *prefer);
dd3a91
@@ -863,7 +875,7 @@ extern struct superswitch {
dd3a91
 	 * metadata.
dd3a91
 	 */
dd3a91
 	int (*init_super)(struct supertype *st, mdu_array_info_t *info,
dd3a91
-			  unsigned long long size, char *name,
dd3a91
+			  struct shape *s, char *name,
dd3a91
 			  char *homehost, int *uuid,
dd3a91
 			  unsigned long long data_offset);
dd3a91
 
dd3a91
@@ -961,7 +973,7 @@ extern struct superswitch {
dd3a91
 				 int *chunk, unsigned long long size,
dd3a91
 				 unsigned long long data_offset,
dd3a91
 				 char *subdev, unsigned long long *freesize,
dd3a91
-				 int verbose);
dd3a91
+				 int consistency_policy, int verbose);
dd3a91
 
dd3a91
 	/* Return a linked list of 'mdinfo' structures for all arrays
dd3a91
 	 * in the container.  For non-containers, it is like
dd3a91
@@ -1059,6 +1071,9 @@ extern struct superswitch {
dd3a91
 	/* validate container after assemble */
dd3a91
 	int (*validate_container)(struct mdinfo *info);
dd3a91
 
dd3a91
+	/* write initial empty PPL on device */
dd3a91
+	int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
dd3a91
+
dd3a91
 	/* records new bad block in metadata */
dd3a91
 	int (*record_bad_block)(struct active_array *a, int n,
dd3a91
 					unsigned long long sector, int length);
dd3a91
diff --git a/super-ddf.c b/super-ddf.c
dd3a91
index 1707ad1..cdd16a4 100644
dd3a91
--- a/super-ddf.c
dd3a91
+++ b/super-ddf.c
dd3a91
@@ -2290,7 +2290,7 @@ static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
dd3a91
 
dd3a91
 static int init_super_ddf(struct supertype *st,
dd3a91
 			  mdu_array_info_t *info,
dd3a91
-			  unsigned long long size, char *name, char *homehost,
dd3a91
+			  struct shape *s, char *name, char *homehost,
dd3a91
 			  int *uuid, unsigned long long data_offset)
dd3a91
 {
dd3a91
 	/* This is primarily called by Create when creating a new array.
dd3a91
@@ -2328,7 +2328,7 @@ static int init_super_ddf(struct supertype *st,
dd3a91
 	struct virtual_disk *vd;
dd3a91
 
dd3a91
 	if (st->sb)
dd3a91
-		return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
dd3a91
+		return init_super_ddf_bvd(st, info, s->size, name, homehost, uuid,
dd3a91
 					  data_offset);
dd3a91
 
dd3a91
 	if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
dd3a91
@@ -3347,7 +3347,7 @@ static int validate_geometry_ddf(struct supertype *st,
dd3a91
 				 int *chunk, unsigned long long size,
dd3a91
 				 unsigned long long data_offset,
dd3a91
 				 char *dev, unsigned long long *freesize,
dd3a91
-				 int verbose)
dd3a91
+				 int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	int fd;
dd3a91
 	struct mdinfo *sra;
dd3a91
diff --git a/super-gpt.c b/super-gpt.c
dd3a91
index 8b080a0..bb38a97 100644
dd3a91
--- a/super-gpt.c
dd3a91
+++ b/super-gpt.c
dd3a91
@@ -205,7 +205,7 @@ static int validate_geometry(struct supertype *st, int level,
dd3a91
 			     int *chunk, unsigned long long size,
dd3a91
 			     unsigned long long data_offset,
dd3a91
 			     char *subdev, unsigned long long *freesize,
dd3a91
-			     int verbose)
dd3a91
+			     int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	pr_err("gpt metadata cannot be used this way\n");
dd3a91
 	return 0;
dd3a91
diff --git a/super-intel.c b/super-intel.c
dd3a91
index e1618f1..5d0f131 100644
dd3a91
--- a/super-intel.c
dd3a91
+++ b/super-intel.c
dd3a91
@@ -5155,7 +5155,7 @@ static int check_name(struct intel_super *super, char *name, int quiet)
dd3a91
 }
dd3a91
 
dd3a91
 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
dd3a91
-				  unsigned long long size, char *name,
dd3a91
+				  struct shape *s, char *name,
dd3a91
 				  char *homehost, int *uuid,
dd3a91
 				  long long data_offset)
dd3a91
 {
dd3a91
@@ -5250,7 +5250,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
dd3a91
 	strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
dd3a91
 	array_blocks = calc_array_size(info->level, info->raid_disks,
dd3a91
 					       info->layout, info->chunk_size,
dd3a91
-					       size * 2);
dd3a91
+					       s->size * 2);
dd3a91
 	/* round array size down to closest MB */
dd3a91
 	array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
dd3a91
 
dd3a91
@@ -5264,7 +5264,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
dd3a91
 	vol->curr_migr_unit = 0;
dd3a91
 	map = get_imsm_map(dev, MAP_0);
dd3a91
 	set_pba_of_lba0(map, super->create_offset);
dd3a91
-	set_blocks_per_member(map, info_to_blocks_per_member(info, size));
dd3a91
+	set_blocks_per_member(map, info_to_blocks_per_member(info, s->size));
dd3a91
 	map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
dd3a91
 	map->failed_disk_num = ~0;
dd3a91
 	if (info->level > 0)
dd3a91
@@ -5292,7 +5292,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
dd3a91
 		map->num_domains = 1;
dd3a91
 
dd3a91
 	/* info->size is only int so use the 'size' parameter instead */
dd3a91
-	num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
dd3a91
+	num_data_stripes = (s->size * 2) / info_to_blocks_per_strip(info);
dd3a91
 	num_data_stripes /= map->num_domains;
dd3a91
 	set_num_data_stripes(map, num_data_stripes);
dd3a91
 
dd3a91
@@ -5314,7 +5314,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
dd3a91
 }
dd3a91
 
dd3a91
 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
dd3a91
-			   unsigned long long size, char *name,
dd3a91
+		           struct shape *s, char *name,
dd3a91
 			   char *homehost, int *uuid,
dd3a91
 			   unsigned long long data_offset)
dd3a91
 {
dd3a91
@@ -5337,7 +5337,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
dd3a91
 	}
dd3a91
 
dd3a91
 	if (st->sb)
dd3a91
-		return init_super_imsm_volume(st, info, size, name, homehost, uuid,
dd3a91
+		return init_super_imsm_volume(st, info, s, name, homehost, uuid,
dd3a91
 					      data_offset);
dd3a91
 
dd3a91
 	if (info)
dd3a91
@@ -6914,7 +6914,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
dd3a91
 				  int raiddisks, int *chunk, unsigned long long size,
dd3a91
 				  unsigned long long data_offset,
dd3a91
 				  char *dev, unsigned long long *freesize,
dd3a91
-				  int verbose)
dd3a91
+				  int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	int fd, cfd;
dd3a91
 	struct mdinfo *sra;
dd3a91
@@ -10953,7 +10953,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
dd3a91
 				    geo->raid_disks + devNumChange,
dd3a91
 				    &chunk,
dd3a91
 				    geo->size, INVALID_SECTORS,
dd3a91
-				    0, 0, 1))
dd3a91
+				    0, 0, info.consistency_policy, 1))
dd3a91
 		change = -1;
dd3a91
 
dd3a91
 	if (check_devs) {
dd3a91
diff --git a/super-mbr.c b/super-mbr.c
dd3a91
index f5e4cea..1bbe57a 100644
dd3a91
--- a/super-mbr.c
dd3a91
+++ b/super-mbr.c
dd3a91
@@ -193,7 +193,7 @@ static int validate_geometry(struct supertype *st, int level,
dd3a91
 			     int *chunk, unsigned long long size,
dd3a91
 			     unsigned long long data_offset,
dd3a91
 			     char *subdev, unsigned long long *freesize,
dd3a91
-			     int verbose)
dd3a91
+			     int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	pr_err("mbr metadata cannot be used this way\n");
dd3a91
 	return 0;
dd3a91
diff --git a/super0.c b/super0.c
dd3a91
index f5b4507..7a555e3 100644
dd3a91
--- a/super0.c
dd3a91
+++ b/super0.c
dd3a91
@@ -725,7 +725,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
dd3a91
  * We use the first 8 bytes (64bits) of the sha1 of the host name
dd3a91
  */
dd3a91
 static int init_super0(struct supertype *st, mdu_array_info_t *info,
dd3a91
-		       unsigned long long size, char *ignored_name,
dd3a91
+		       struct shape *s, char *ignored_name,
dd3a91
 		       char *homehost, int *uuid,
dd3a91
 		       unsigned long long data_offset)
dd3a91
 {
dd3a91
@@ -764,8 +764,8 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
dd3a91
 	sb->gvalid_words = 0; /* ignored */
dd3a91
 	sb->ctime = time(0);
dd3a91
 	sb->level = info->level;
dd3a91
-	sb->size = size;
dd3a91
-	if (size != (unsigned long long)sb->size)
dd3a91
+	sb->size = s->size;
dd3a91
+	if (s->size != (unsigned long long)sb->size)
dd3a91
 		return 0;
dd3a91
 	sb->nr_disks = info->nr_disks;
dd3a91
 	sb->raid_disks = info->raid_disks;
dd3a91
@@ -1267,7 +1267,7 @@ static int validate_geometry0(struct supertype *st, int level,
dd3a91
 			      int *chunk, unsigned long long size,
dd3a91
 			      unsigned long long data_offset,
dd3a91
 			      char *subdev, unsigned long long *freesize,
dd3a91
-			      int verbose)
dd3a91
+			      int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	unsigned long long ldsize;
dd3a91
 	int fd;
dd3a91
diff --git a/super1.c b/super1.c
dd3a91
index f3520ac..4a0f041 100644
dd3a91
--- a/super1.c
dd3a91
+++ b/super1.c
dd3a91
@@ -1397,7 +1397,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
dd3a91
 }
dd3a91
 
dd3a91
 static int init_super1(struct supertype *st, mdu_array_info_t *info,
dd3a91
-		       unsigned long long size, char *name, char *homehost,
dd3a91
+		       struct shape *s, char *name, char *homehost,
dd3a91
 		       int *uuid, unsigned long long data_offset)
dd3a91
 {
dd3a91
 	struct mdp_superblock_1 *sb;
dd3a91
@@ -1450,7 +1450,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
dd3a91
 	sb->ctime = __cpu_to_le64((unsigned long long)time(0));
dd3a91
 	sb->level = __cpu_to_le32(info->level);
dd3a91
 	sb->layout = __cpu_to_le32(info->layout);
dd3a91
-	sb->size = __cpu_to_le64(size*2ULL);
dd3a91
+	sb->size = __cpu_to_le64(s->size*2ULL);
dd3a91
 	sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
dd3a91
 	sb->raid_disks = __cpu_to_le32(info->raid_disks);
dd3a91
 
dd3a91
@@ -2487,7 +2487,7 @@ static int validate_geometry1(struct supertype *st, int level,
dd3a91
 			      int *chunk, unsigned long long size,
dd3a91
 			      unsigned long long data_offset,
dd3a91
 			      char *subdev, unsigned long long *freesize,
dd3a91
-			      int verbose)
dd3a91
+			      int consistency_policy, int verbose)
dd3a91
 {
dd3a91
 	unsigned long long ldsize, devsize;
dd3a91
 	int bmspace;
dd3a91
diff --git a/sysfs.c b/sysfs.c
dd3a91
index b0657a0..53589a7 100644
dd3a91
--- a/sysfs.c
dd3a91
+++ b/sysfs.c
dd3a91
@@ -242,6 +242,17 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
dd3a91
 	} else
dd3a91
 		sra->sysfs_array_state[0] = 0;
dd3a91
 
dd3a91
+	if (options & GET_CONSISTENCY_POLICY) {
dd3a91
+		strcpy(base, "consistency_policy");
dd3a91
+		if (load_sys(fname, buf, sizeof(buf))) {
dd3a91
+			sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
dd3a91
+		} else {
dd3a91
+			sra->consistency_policy = map_name(consistency_policies, buf);
dd3a91
+			if (sra->consistency_policy == UnSet)
dd3a91
+				sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
dd3a91
+		}
dd3a91
+	}
dd3a91
+
dd3a91
 	if (! (options & GET_DEVS))
dd3a91
 		return sra;
dd3a91