dcavalca / rpms / mdadm

Forked from rpms/mdadm 3 years ago
Clone

Blame SOURCES/support-consistency-policy-change.patch

dd3a91
commit 860f11ed4d6a7bac6f2d698a30a13371c0aa7924
dd3a91
Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
dd3a91
Date:   Wed Mar 29 11:54:20 2017 +0200
dd3a91
dd3a91
    Grow: support consistency policy change
dd3a91
    
dd3a91
    Extend the --consistency-policy parameter to work also in Grow mode.
dd3a91
    Using it changes the currently active consistency policy in the kernel
dd3a91
    driver and updates the metadata to make this change permanent. Currently
dd3a91
    this supports only changing between "ppl" and "resync" policies, that is
dd3a91
    enabling or disabling PPL at runtime.
dd3a91
    
dd3a91
    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
dd3a91
    Signed-off-by: Jes Sorensen <Jes.Sorensen@gmail.com>
dd3a91
dd3a91
diff --git a/Grow.c b/Grow.c
dd3a91
index a849012..b86b53e 100755
dd3a91
--- a/Grow.c
dd3a91
+++ b/Grow.c
dd3a91
@@ -528,6 +528,178 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
dd3a91
 	return 0;
dd3a91
 }
dd3a91
 
dd3a91
+int Grow_consistency_policy(char *devname, int fd, struct context *c, struct shape *s)
dd3a91
+{
dd3a91
+	struct supertype *st;
dd3a91
+	struct mdinfo *sra;
dd3a91
+	struct mdinfo *sd;
dd3a91
+	char *subarray = NULL;
dd3a91
+	int ret = 0;
dd3a91
+	char container_dev[PATH_MAX];
dd3a91
+
dd3a91
+	if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
dd3a91
+	    s->consistency_policy != CONSISTENCY_POLICY_PPL) {
dd3a91
+		pr_err("Operation not supported for consistency policy %s\n",
dd3a91
+		       map_num(consistency_policies, s->consistency_policy));
dd3a91
+		return 1;
dd3a91
+	}
dd3a91
+
dd3a91
+	st = super_by_fd(fd, &subarray);
dd3a91
+	if (!st)
dd3a91
+		return 1;
dd3a91
+
dd3a91
+	sra = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY|GET_LEVEL|
dd3a91
+				   GET_DEVS|GET_STATE);
dd3a91
+	if (!sra) {
dd3a91
+		ret = 1;
dd3a91
+		goto free_st;
dd3a91
+	}
dd3a91
+
dd3a91
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
dd3a91
+	    !st->ss->write_init_ppl) {
dd3a91
+		pr_err("%s metadata does not support PPL\n", st->ss->name);
dd3a91
+		ret = 1;
dd3a91
+		goto free_info;
dd3a91
+	}
dd3a91
+
dd3a91
+	if (sra->array.level != 5) {
dd3a91
+		pr_err("Operation not supported for array level %d\n",
dd3a91
+				sra->array.level);
dd3a91
+		ret = 1;
dd3a91
+		goto free_info;
dd3a91
+	}
dd3a91
+
dd3a91
+	if (sra->consistency_policy == (unsigned)s->consistency_policy) {
dd3a91
+		pr_err("Consistency policy is already %s\n",
dd3a91
+		       map_num(consistency_policies, s->consistency_policy));
dd3a91
+		ret = 1;
dd3a91
+		goto free_info;
dd3a91
+	} else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
dd3a91
+		   sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
dd3a91
+		pr_err("Current consistency policy is %s, cannot change to %s\n",
dd3a91
+		       map_num(consistency_policies, sra->consistency_policy),
dd3a91
+		       map_num(consistency_policies, s->consistency_policy));
dd3a91
+		ret = 1;
dd3a91
+		goto free_info;
dd3a91
+	}
dd3a91
+
dd3a91
+	if (subarray) {
dd3a91
+		char *update;
dd3a91
+
dd3a91
+		if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
dd3a91
+			update = "ppl";
dd3a91
+		else
dd3a91
+			update = "no-ppl";
dd3a91
+
dd3a91
+		sprintf(container_dev, "/dev/%s", st->container_devnm);
dd3a91
+
dd3a91
+		ret = Update_subarray(container_dev, subarray, update, NULL,
dd3a91
+				      c->verbose);
dd3a91
+		if (ret)
dd3a91
+			goto free_info;
dd3a91
+	}
dd3a91
+
dd3a91
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
dd3a91
+		struct mdinfo info;
dd3a91
+
dd3a91
+		if (subarray) {
dd3a91
+			struct mdinfo *mdi;
dd3a91
+			int cfd;
dd3a91
+
dd3a91
+			cfd = open(container_dev, O_RDWR|O_EXCL);
dd3a91
+			if (cfd < 0) {
dd3a91
+				pr_err("Failed to open %s\n", container_dev);
dd3a91
+				ret = 1;
dd3a91
+				goto free_info;
dd3a91
+			}
dd3a91
+
dd3a91
+			ret = st->ss->load_container(st, cfd, st->container_devnm);
dd3a91
+			close(cfd);
dd3a91
+
dd3a91
+			if (ret) {
dd3a91
+				pr_err("Cannot read superblock for %s\n",
dd3a91
+				       container_dev);
dd3a91
+				goto free_info;
dd3a91
+			}
dd3a91
+
dd3a91
+			mdi = st->ss->container_content(st, subarray);
dd3a91
+			info = *mdi;
dd3a91
+			free(mdi);
dd3a91
+		}
dd3a91
+
dd3a91
+		for (sd = sra->devs; sd; sd = sd->next) {
dd3a91
+			int dfd;
dd3a91
+			char *devpath;
dd3a91
+
dd3a91
+			if ((sd->disk.state & (1 << MD_DISK_SYNC)) == 0)
dd3a91
+				continue;
dd3a91
+
dd3a91
+			devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
dd3a91
+			dfd = dev_open(devpath, O_RDWR);
dd3a91
+			if (dfd < 0) {
dd3a91
+				pr_err("Failed to open %s\n", devpath);
dd3a91
+				ret = 1;
dd3a91
+				goto free_info;
dd3a91
+			}
dd3a91
+
dd3a91
+			if (!subarray) {
dd3a91
+				ret = st->ss->load_super(st, dfd, NULL);
dd3a91
+				if (ret) {
dd3a91
+					pr_err("Failed to load super-block.\n");
dd3a91
+					close(dfd);
dd3a91
+					goto free_info;
dd3a91
+				}
dd3a91
+
dd3a91
+				ret = st->ss->update_super(st, sra, "ppl", devname,
dd3a91
+							   c->verbose, 0, NULL);
dd3a91
+				if (ret) {
dd3a91
+					close(dfd);
dd3a91
+					st->ss->free_super(st);
dd3a91
+					goto free_info;
dd3a91
+				}
dd3a91
+				st->ss->getinfo_super(st, &info, NULL);
dd3a91
+			}
dd3a91
+
dd3a91
+			ret |= sysfs_set_num(sra, sd, "ppl_sector", info.ppl_sector);
dd3a91
+			ret |= sysfs_set_num(sra, sd, "ppl_size", info.ppl_size);
dd3a91
+
dd3a91
+			if (ret) {
dd3a91
+				pr_err("Failed to set PPL attributes for %s\n",
dd3a91
+				       sd->sys_name);
dd3a91
+				close(dfd);
dd3a91
+				st->ss->free_super(st);
dd3a91
+				goto free_info;
dd3a91
+			}
dd3a91
+
dd3a91
+			ret = st->ss->write_init_ppl(st, &info, dfd);
dd3a91
+			if (ret)
dd3a91
+				pr_err("Failed to write PPL\n");
dd3a91
+
dd3a91
+			close(dfd);
dd3a91
+
dd3a91
+			if (!subarray)
dd3a91
+				st->ss->free_super(st);
dd3a91
+
dd3a91
+			if (ret)
dd3a91
+				goto free_info;
dd3a91
+		}
dd3a91
+	}
dd3a91
+
dd3a91
+	ret = sysfs_set_str(sra, NULL, "consistency_policy",
dd3a91
+			    map_num(consistency_policies,
dd3a91
+				    s->consistency_policy));
dd3a91
+	if (ret)
dd3a91
+		pr_err("Failed to change array consistency policy\n");
dd3a91
+
dd3a91
+free_info:
dd3a91
+	sysfs_free(sra);
dd3a91
+free_st:
dd3a91
+	free(st);
dd3a91
+	free(subarray);
dd3a91
+
dd3a91
+	return ret;
dd3a91
+}
dd3a91
+
dd3a91
 /*
dd3a91
  * When reshaping an array we might need to backup some data.
dd3a91
  * This is written to all spares with a 'super_block' describing it.
dd3a91
diff --git a/ReadMe.c b/ReadMe.c
dd3a91
index fc04c2c..eb8fb4b 100644
dd3a91
--- a/ReadMe.c
dd3a91
+++ b/ReadMe.c
dd3a91
@@ -559,28 +559,30 @@ char Help_grow[] =
dd3a91
 "reconfiguration.\n"
dd3a91
 "\n"
dd3a91
 "Options that are valid with the grow (-G --grow) mode are:\n"
dd3a91
-"  --level=       -l   : Tell mdadm what level to convert the array to.\n"
dd3a91
-"  --layout=      -p   : For a FAULTY array, set/change the error mode.\n"
dd3a91
-"                      : for other arrays, update the layout\n"
dd3a91
-"  --size=        -z   : Change the active size of devices in an array.\n"
dd3a91
-"                      : This is useful if all devices have been replaced\n"
dd3a91
-"                      : with larger devices.   Value is in Kilobytes, or\n"
dd3a91
-"                      : the special word 'max' meaning 'as large as possible'.\n"
dd3a91
-"  --assume-clean      : When increasing the --size, this flag will avoid\n"
dd3a91
-"                      : a resync of the new space\n"
dd3a91
-"  --chunk=       -c   : Change the chunksize of the array\n"
dd3a91
-"  --raid-devices= -n  : Change the number of active devices in an array.\n"
dd3a91
-"  --add=         -a   : Add listed devices as part of reshape.  This is\n"
dd3a91
-"                      : needed for resizing a RAID0 which cannot have\n"
dd3a91
-"                      : spares already present.\n"
dd3a91
-"  --bitmap=      -b   : Add or remove a write-intent bitmap.\n"
dd3a91
-"  --backup-file= file : A file on a different device to store data for a\n"
dd3a91
-"                      : short time while increasing raid-devices on a\n"
dd3a91
-"                      : RAID4/5/6 array. Also needed throughout a reshape\n"
dd3a91
-"                      : when changing parameters other than raid-devices\n"
dd3a91
-"  --array-size=  -Z   : Change visible size of array.  This does not change\n"
dd3a91
-"                      : any data on the device, and is not stable across restarts.\n"
dd3a91
-"  --data-offset=      : Location on device to move start of data to.\n"
dd3a91
+"  --level=           -l : Tell mdadm what level to convert the array to.\n"
dd3a91
+"  --layout=          -p : For a FAULTY array, set/change the error mode.\n"
dd3a91
+"                        : for other arrays, update the layout\n"
dd3a91
+"  --size=            -z : Change the active size of devices in an array.\n"
dd3a91
+"                        : This is useful if all devices have been replaced\n"
dd3a91
+"                        : with larger devices.   Value is in Kilobytes, or\n"
dd3a91
+"                        : the special word 'max' meaning 'as large as possible'.\n"
dd3a91
+"  --assume-clean        : When increasing the --size, this flag will avoid\n"
dd3a91
+"                        : a resync of the new space\n"
dd3a91
+"  --chunk=           -c : Change the chunksize of the array\n"
dd3a91
+"  --raid-devices=    -n : Change the number of active devices in an array.\n"
dd3a91
+"  --add=             -a : Add listed devices as part of reshape.  This is\n"
dd3a91
+"                        : needed for resizing a RAID0 which cannot have\n"
dd3a91
+"                        : spares already present.\n"
dd3a91
+"  --bitmap=          -b : Add or remove a write-intent bitmap.\n"
dd3a91
+"  --backup-file= file   : A file on a different device to store data for a\n"
dd3a91
+"                        : short time while increasing raid-devices on a\n"
dd3a91
+"                        : RAID4/5/6 array. Also needed throughout a reshape\n"
dd3a91
+"                        : when changing parameters other than raid-devices\n"
dd3a91
+"  --array-size=      -Z : Change visible size of array. This does not change any\n"
dd3a91
+"                        : data on the device, and is not stable across restarts.\n"
dd3a91
+"  --data-offset=        : Location on device to move start of data to.\n"
dd3a91
+"  --consistency-policy= : Change the consistency policy of an active array.\n"
dd3a91
+"                     -k : Currently works only for PPL with RAID5.\n"
dd3a91
 ;
dd3a91
 
dd3a91
 char Help_incr[] =
dd3a91
diff --git a/mdadm.8.in b/mdadm.8.in
dd3a91
index 1178ed9..744c12b 100644
dd3a91
--- a/mdadm.8.in
dd3a91
+++ b/mdadm.8.in
dd3a91
@@ -126,7 +126,7 @@ of component devices and changing the number of active devices in
dd3a91
 Linear and RAID levels 0/1/4/5/6,
dd3a91
 changing the RAID level between 0, 1, 5, and 6, and between 0 and 10,
dd3a91
 changing the chunk size and layout for RAID 0,4,5,6,10 as well as adding or
dd3a91
-removing a write-intent bitmap.
dd3a91
+removing a write-intent bitmap and changing the array's consistency policy.
dd3a91
 
dd3a91
 .TP
dd3a91
 .B "Incremental Assembly"
dd3a91
@@ -1050,6 +1050,10 @@ after unclean shutdown. Implicitly selected when using
dd3a91
 For RAID5 only, Partial Parity Log is used to close the write hole and
dd3a91
 eliminate resync. PPL is stored in the metadata region of RAID member drives,
dd3a91
 no additional journal drive is needed.
dd3a91
+
dd3a91
+.PP
dd3a91
+Can be used with \-\-grow to change the consistency policy of an active array
dd3a91
+in some cases. See CONSISTENCY POLICY CHANGES below.
dd3a91
 .RE
dd3a91
 
dd3a91
 
dd3a91
@@ -2694,6 +2698,8 @@ RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
dd3a91
 .IP \(bu 4
dd3a91
 add a write-intent bitmap to any array which supports these bitmaps, or
dd3a91
 remove a write-intent bitmap from such an array.
dd3a91
+.IP \(bu 4
dd3a91
+change the array's consistency policy.
dd3a91
 .PP
dd3a91
 
dd3a91
 Using GROW on containers is currently supported only for Intel's IMSM
dd3a91
@@ -2850,6 +2856,16 @@ can be added.  Note that if you add a bitmap stored in a file which is
dd3a91
 in a filesystem that is on the RAID array being affected, the system
dd3a91
 will deadlock.  The bitmap must be on a separate filesystem.
dd3a91
 
dd3a91
+.SS CONSISTENCY POLICY CHANGES
dd3a91
+
dd3a91
+The consistency policy of an active array can be changed by using the
dd3a91
+.B \-\-consistency\-policy
dd3a91
+option in Grow mode. Currently this works only for the
dd3a91
+.B ppl
dd3a91
+and
dd3a91
+.B resync
dd3a91
+policies and allows to enable or disable the RAID5 Partial Parity Log (PPL).
dd3a91
+
dd3a91
 .SH INCREMENTAL MODE
dd3a91
 
dd3a91
 .HP 12
dd3a91
diff --git a/mdadm.c b/mdadm.c
dd3a91
index 6edf3ab..5ebf117 100644
dd3a91
--- a/mdadm.c
dd3a91
+++ b/mdadm.c
dd3a91
@@ -1221,6 +1221,7 @@ int main(int argc, char *argv[])
dd3a91
 			s.journaldisks = 1;
dd3a91
 			continue;
dd3a91
 		case O(CREATE, 'k'):
dd3a91
+		case O(GROW, 'k'):
dd3a91
 			s.consistency_policy = map_name(consistency_policies,
dd3a91
 							optarg);
dd3a91
 			if (s.consistency_policy == UnSet ||
dd3a91
@@ -1679,6 +1680,8 @@ int main(int argc, char *argv[])
dd3a91
 			rv = Grow_reshape(devlist->devname, mdfd,
dd3a91
 					  devlist->next,
dd3a91
 					  data_offset, &c, &s);
dd3a91
+		} else if (s.consistency_policy != UnSet) {
dd3a91
+			rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
dd3a91
 		} else if (array_size == 0)
dd3a91
 			pr_err("no changes to --grow\n");
dd3a91
 		break;
dd3a91
diff --git a/mdadm.h b/mdadm.h
dd3a91
index 2c7066d..4891acf 100644
dd3a91
--- a/mdadm.h
dd3a91
+++ b/mdadm.h
dd3a91
@@ -1331,6 +1331,8 @@ extern int Grow_restart(struct supertype *st, struct mdinfo *info,
dd3a91
 extern int Grow_continue(int mdfd, struct supertype *st,
dd3a91
 			 struct mdinfo *info, char *backup_file,
dd3a91
 			 int forked, int freeze_reshape);
dd3a91
+extern int Grow_consistency_policy(char *devname, int fd,
dd3a91
+				   struct context *c, struct shape *s);
dd3a91
 
dd3a91
 extern int restore_backup(struct supertype *st,
dd3a91
 			  struct mdinfo *content,