dcavalca / rpms / mdadm

Forked from rpms/mdadm 3 years ago
Clone

Blame SOURCES/support-consistency-policy1-change.patch

2c1b57
commit 860f11ed4d6a7bac6f2d698a30a13371c0aa7924
2c1b57
Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
Date:   Wed Mar 29 11:54:20 2017 +0200
2c1b57
2c1b57
    Grow: support consistency policy change
2c1b57
    
2c1b57
    Extend the --consistency-policy parameter to work also in Grow mode.
2c1b57
    Using it changes the currently active consistency policy in the kernel
2c1b57
    driver and updates the metadata to make this change permanent. Currently
2c1b57
    this supports only changing between "ppl" and "resync" policies, that is
2c1b57
    enabling or disabling PPL at runtime.
2c1b57
    
2c1b57
    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
    Signed-off-by: Jes Sorensen <Jes.Sorensen@gmail.com>
2c1b57
2c1b57
diff --git a/Grow.c b/Grow.c
2c1b57
index a849012..b86b53e 100755
2c1b57
--- a/Grow.c
2c1b57
+++ b/Grow.c
2c1b57
@@ -528,6 +528,178 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
2c1b57
 	return 0;
2c1b57
 }
2c1b57
 
2c1b57
+int Grow_consistency_policy(char *devname, int fd, struct context *c, struct shape *s)
2c1b57
+{
2c1b57
+	struct supertype *st;
2c1b57
+	struct mdinfo *sra;
2c1b57
+	struct mdinfo *sd;
2c1b57
+	char *subarray = NULL;
2c1b57
+	int ret = 0;
2c1b57
+	char container_dev[PATH_MAX];
2c1b57
+
2c1b57
+	if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
2c1b57
+	    s->consistency_policy != CONSISTENCY_POLICY_PPL) {
2c1b57
+		pr_err("Operation not supported for consistency policy %s\n",
2c1b57
+		       map_num(consistency_policies, s->consistency_policy));
2c1b57
+		return 1;
2c1b57
+	}
2c1b57
+
2c1b57
+	st = super_by_fd(fd, &subarray);
2c1b57
+	if (!st)
2c1b57
+		return 1;
2c1b57
+
2c1b57
+	sra = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY|GET_LEVEL|
2c1b57
+				   GET_DEVS|GET_STATE);
2c1b57
+	if (!sra) {
2c1b57
+		ret = 1;
2c1b57
+		goto free_st;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
2c1b57
+	    !st->ss->write_init_ppl) {
2c1b57
+		pr_err("%s metadata does not support PPL\n", st->ss->name);
2c1b57
+		ret = 1;
2c1b57
+		goto free_info;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (sra->array.level != 5) {
2c1b57
+		pr_err("Operation not supported for array level %d\n",
2c1b57
+				sra->array.level);
2c1b57
+		ret = 1;
2c1b57
+		goto free_info;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (sra->consistency_policy == (unsigned)s->consistency_policy) {
2c1b57
+		pr_err("Consistency policy is already %s\n",
2c1b57
+		       map_num(consistency_policies, s->consistency_policy));
2c1b57
+		ret = 1;
2c1b57
+		goto free_info;
2c1b57
+	} else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
2c1b57
+		   sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
2c1b57
+		pr_err("Current consistency policy is %s, cannot change to %s\n",
2c1b57
+		       map_num(consistency_policies, sra->consistency_policy),
2c1b57
+		       map_num(consistency_policies, s->consistency_policy));
2c1b57
+		ret = 1;
2c1b57
+		goto free_info;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (subarray) {
2c1b57
+		char *update;
2c1b57
+
2c1b57
+		if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
2c1b57
+			update = "ppl";
2c1b57
+		else
2c1b57
+			update = "no-ppl";
2c1b57
+
2c1b57
+		sprintf(container_dev, "/dev/%s", st->container_devnm);
2c1b57
+
2c1b57
+		ret = Update_subarray(container_dev, subarray, update, NULL,
2c1b57
+				      c->verbose);
2c1b57
+		if (ret)
2c1b57
+			goto free_info;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
2c1b57
+		struct mdinfo info;
2c1b57
+
2c1b57
+		if (subarray) {
2c1b57
+			struct mdinfo *mdi;
2c1b57
+			int cfd;
2c1b57
+
2c1b57
+			cfd = open(container_dev, O_RDWR|O_EXCL);
2c1b57
+			if (cfd < 0) {
2c1b57
+				pr_err("Failed to open %s\n", container_dev);
2c1b57
+				ret = 1;
2c1b57
+				goto free_info;
2c1b57
+			}
2c1b57
+
2c1b57
+			ret = st->ss->load_container(st, cfd, st->container_devnm);
2c1b57
+			close(cfd);
2c1b57
+
2c1b57
+			if (ret) {
2c1b57
+				pr_err("Cannot read superblock for %s\n",
2c1b57
+				       container_dev);
2c1b57
+				goto free_info;
2c1b57
+			}
2c1b57
+
2c1b57
+			mdi = st->ss->container_content(st, subarray);
2c1b57
+			info = *mdi;
2c1b57
+			free(mdi);
2c1b57
+		}
2c1b57
+
2c1b57
+		for (sd = sra->devs; sd; sd = sd->next) {
2c1b57
+			int dfd;
2c1b57
+			char *devpath;
2c1b57
+
2c1b57
+			if ((sd->disk.state & (1 << MD_DISK_SYNC)) == 0)
2c1b57
+				continue;
2c1b57
+
2c1b57
+			devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
2c1b57
+			dfd = dev_open(devpath, O_RDWR);
2c1b57
+			if (dfd < 0) {
2c1b57
+				pr_err("Failed to open %s\n", devpath);
2c1b57
+				ret = 1;
2c1b57
+				goto free_info;
2c1b57
+			}
2c1b57
+
2c1b57
+			if (!subarray) {
2c1b57
+				ret = st->ss->load_super(st, dfd, NULL);
2c1b57
+				if (ret) {
2c1b57
+					pr_err("Failed to load super-block.\n");
2c1b57
+					close(dfd);
2c1b57
+					goto free_info;
2c1b57
+				}
2c1b57
+
2c1b57
+				ret = st->ss->update_super(st, sra, "ppl", devname,
2c1b57
+							   c->verbose, 0, NULL);
2c1b57
+				if (ret) {
2c1b57
+					close(dfd);
2c1b57
+					st->ss->free_super(st);
2c1b57
+					goto free_info;
2c1b57
+				}
2c1b57
+				st->ss->getinfo_super(st, &info, NULL);
2c1b57
+			}
2c1b57
+
2c1b57
+			ret |= sysfs_set_num(sra, sd, "ppl_sector", info.ppl_sector);
2c1b57
+			ret |= sysfs_set_num(sra, sd, "ppl_size", info.ppl_size);
2c1b57
+
2c1b57
+			if (ret) {
2c1b57
+				pr_err("Failed to set PPL attributes for %s\n",
2c1b57
+				       sd->sys_name);
2c1b57
+				close(dfd);
2c1b57
+				st->ss->free_super(st);
2c1b57
+				goto free_info;
2c1b57
+			}
2c1b57
+
2c1b57
+			ret = st->ss->write_init_ppl(st, &info, dfd);
2c1b57
+			if (ret)
2c1b57
+				pr_err("Failed to write PPL\n");
2c1b57
+
2c1b57
+			close(dfd);
2c1b57
+
2c1b57
+			if (!subarray)
2c1b57
+				st->ss->free_super(st);
2c1b57
+
2c1b57
+			if (ret)
2c1b57
+				goto free_info;
2c1b57
+		}
2c1b57
+	}
2c1b57
+
2c1b57
+	ret = sysfs_set_str(sra, NULL, "consistency_policy",
2c1b57
+			    map_num(consistency_policies,
2c1b57
+				    s->consistency_policy));
2c1b57
+	if (ret)
2c1b57
+		pr_err("Failed to change array consistency policy\n");
2c1b57
+
2c1b57
+free_info:
2c1b57
+	sysfs_free(sra);
2c1b57
+free_st:
2c1b57
+	free(st);
2c1b57
+	free(subarray);
2c1b57
+
2c1b57
+	return ret;
2c1b57
+}
2c1b57
+
2c1b57
 /*
2c1b57
  * When reshaping an array we might need to backup some data.
2c1b57
  * This is written to all spares with a 'super_block' describing it.
2c1b57
diff --git a/ReadMe.c b/ReadMe.c
2c1b57
index fc04c2c..eb8fb4b 100644
2c1b57
--- a/ReadMe.c
2c1b57
+++ b/ReadMe.c
2c1b57
@@ -559,28 +559,30 @@ char Help_grow[] =
2c1b57
 "reconfiguration.\n"
2c1b57
 "\n"
2c1b57
 "Options that are valid with the grow (-G --grow) mode are:\n"
2c1b57
-"  --level=       -l   : Tell mdadm what level to convert the array to.\n"
2c1b57
-"  --layout=      -p   : For a FAULTY array, set/change the error mode.\n"
2c1b57
-"                      : for other arrays, update the layout\n"
2c1b57
-"  --size=        -z   : Change the active size of devices in an array.\n"
2c1b57
-"                      : This is useful if all devices have been replaced\n"
2c1b57
-"                      : with larger devices.   Value is in Kilobytes, or\n"
2c1b57
-"                      : the special word 'max' meaning 'as large as possible'.\n"
2c1b57
-"  --assume-clean      : When increasing the --size, this flag will avoid\n"
2c1b57
-"                      : a resync of the new space\n"
2c1b57
-"  --chunk=       -c   : Change the chunksize of the array\n"
2c1b57
-"  --raid-devices= -n  : Change the number of active devices in an array.\n"
2c1b57
-"  --add=         -a   : Add listed devices as part of reshape.  This is\n"
2c1b57
-"                      : needed for resizing a RAID0 which cannot have\n"
2c1b57
-"                      : spares already present.\n"
2c1b57
-"  --bitmap=      -b   : Add or remove a write-intent bitmap.\n"
2c1b57
-"  --backup-file= file : A file on a different device to store data for a\n"
2c1b57
-"                      : short time while increasing raid-devices on a\n"
2c1b57
-"                      : RAID4/5/6 array. Also needed throughout a reshape\n"
2c1b57
-"                      : when changing parameters other than raid-devices\n"
2c1b57
-"  --array-size=  -Z   : Change visible size of array.  This does not change\n"
2c1b57
-"                      : any data on the device, and is not stable across restarts.\n"
2c1b57
-"  --data-offset=      : Location on device to move start of data to.\n"
2c1b57
+"  --level=           -l : Tell mdadm what level to convert the array to.\n"
2c1b57
+"  --layout=          -p : For a FAULTY array, set/change the error mode.\n"
2c1b57
+"                        : for other arrays, update the layout\n"
2c1b57
+"  --size=            -z : Change the active size of devices in an array.\n"
2c1b57
+"                        : This is useful if all devices have been replaced\n"
2c1b57
+"                        : with larger devices.   Value is in Kilobytes, or\n"
2c1b57
+"                        : the special word 'max' meaning 'as large as possible'.\n"
2c1b57
+"  --assume-clean        : When increasing the --size, this flag will avoid\n"
2c1b57
+"                        : a resync of the new space\n"
2c1b57
+"  --chunk=           -c : Change the chunksize of the array\n"
2c1b57
+"  --raid-devices=    -n : Change the number of active devices in an array.\n"
2c1b57
+"  --add=             -a : Add listed devices as part of reshape.  This is\n"
2c1b57
+"                        : needed for resizing a RAID0 which cannot have\n"
2c1b57
+"                        : spares already present.\n"
2c1b57
+"  --bitmap=          -b : Add or remove a write-intent bitmap.\n"
2c1b57
+"  --backup-file= file   : A file on a different device to store data for a\n"
2c1b57
+"                        : short time while increasing raid-devices on a\n"
2c1b57
+"                        : RAID4/5/6 array. Also needed throughout a reshape\n"
2c1b57
+"                        : when changing parameters other than raid-devices\n"
2c1b57
+"  --array-size=      -Z : Change visible size of array. This does not change any\n"
2c1b57
+"                        : data on the device, and is not stable across restarts.\n"
2c1b57
+"  --data-offset=        : Location on device to move start of data to.\n"
2c1b57
+"  --consistency-policy= : Change the consistency policy of an active array.\n"
2c1b57
+"                     -k : Currently works only for PPL with RAID5.\n"
2c1b57
 ;
2c1b57
 
2c1b57
 char Help_incr[] =
2c1b57
diff --git a/mdadm.8.in b/mdadm.8.in
2c1b57
index 1178ed9..744c12b 100644
2c1b57
--- a/mdadm.8.in
2c1b57
+++ b/mdadm.8.in
2c1b57
@@ -126,7 +126,7 @@ of component devices and changing the number of active devices in
2c1b57
 Linear and RAID levels 0/1/4/5/6,
2c1b57
 changing the RAID level between 0, 1, 5, and 6, and between 0 and 10,
2c1b57
 changing the chunk size and layout for RAID 0,4,5,6,10 as well as adding or
2c1b57
-removing a write-intent bitmap.
2c1b57
+removing a write-intent bitmap and changing the array's consistency policy.
2c1b57
 
2c1b57
 .TP
2c1b57
 .B "Incremental Assembly"
2c1b57
@@ -1050,6 +1050,10 @@ after unclean shutdown. Implicitly selected when using
2c1b57
 For RAID5 only, Partial Parity Log is used to close the write hole and
2c1b57
 eliminate resync. PPL is stored in the metadata region of RAID member drives,
2c1b57
 no additional journal drive is needed.
2c1b57
+
2c1b57
+.PP
2c1b57
+Can be used with \-\-grow to change the consistency policy of an active array
2c1b57
+in some cases. See CONSISTENCY POLICY CHANGES below.
2c1b57
 .RE
2c1b57
 
2c1b57
 
2c1b57
@@ -2694,6 +2698,8 @@ RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
2c1b57
 .IP \(bu 4
2c1b57
 add a write-intent bitmap to any array which supports these bitmaps, or
2c1b57
 remove a write-intent bitmap from such an array.
2c1b57
+.IP \(bu 4
2c1b57
+change the array's consistency policy.
2c1b57
 .PP
2c1b57
 
2c1b57
 Using GROW on containers is currently supported only for Intel's IMSM
2c1b57
@@ -2850,6 +2856,16 @@ can be added.  Note that if you add a bitmap stored in a file which is
2c1b57
 in a filesystem that is on the RAID array being affected, the system
2c1b57
 will deadlock.  The bitmap must be on a separate filesystem.
2c1b57
 
2c1b57
+.SS CONSISTENCY POLICY CHANGES
2c1b57
+
2c1b57
+The consistency policy of an active array can be changed by using the
2c1b57
+.B \-\-consistency\-policy
2c1b57
+option in Grow mode. Currently this works only for the
2c1b57
+.B ppl
2c1b57
+and
2c1b57
+.B resync
2c1b57
+policies and allows to enable or disable the RAID5 Partial Parity Log (PPL).
2c1b57
+
2c1b57
 .SH INCREMENTAL MODE
2c1b57
 
2c1b57
 .HP 12
2c1b57
diff --git a/mdadm.c b/mdadm.c
2c1b57
index 6edf3ab..5ebf117 100644
2c1b57
--- a/mdadm.c
2c1b57
+++ b/mdadm.c
2c1b57
@@ -1221,6 +1221,7 @@ int main(int argc, char *argv[])
2c1b57
 			s.journaldisks = 1;
2c1b57
 			continue;
2c1b57
 		case O(CREATE, 'k'):
2c1b57
+		case O(GROW, 'k'):
2c1b57
 			s.consistency_policy = map_name(consistency_policies,
2c1b57
 							optarg);
2c1b57
 			if (s.consistency_policy == UnSet ||
2c1b57
@@ -1679,6 +1680,8 @@ int main(int argc, char *argv[])
2c1b57
 			rv = Grow_reshape(devlist->devname, mdfd,
2c1b57
 					  devlist->next,
2c1b57
 					  data_offset, &c, &s);
2c1b57
+		} else if (s.consistency_policy != UnSet) {
2c1b57
+			rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
2c1b57
 		} else if (array_size == 0)
2c1b57
 			pr_err("no changes to --grow\n");
2c1b57
 		break;
2c1b57
diff --git a/mdadm.h b/mdadm.h
2c1b57
index 2c7066d..4891acf 100644
2c1b57
--- a/mdadm.h
2c1b57
+++ b/mdadm.h
2c1b57
@@ -1331,6 +1331,8 @@ extern int Grow_restart(struct supertype *st, struct mdinfo *info,
2c1b57
 extern int Grow_continue(int mdfd, struct supertype *st,
2c1b57
 			 struct mdinfo *info, char *backup_file,
2c1b57
 			 int forked, int freeze_reshape);
2c1b57
+extern int Grow_consistency_policy(char *devname, int fd,
2c1b57
+				   struct context *c, struct shape *s);
2c1b57
 
2c1b57
 extern int restore_backup(struct supertype *st,
2c1b57
 			  struct mdinfo *content,