Blame SOURCES/imsmppl-support.patch

2c1b57
commit 2432ce9b3235f34d00ef6c28ef6b624a32b85530
2c1b57
Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
Date:   Wed Mar 29 11:54:17 2017 +0200
2c1b57
2c1b57
    imsm: PPL support
2c1b57
    
2c1b57
    Enable creating and assembling IMSM raid5 arrays with PPL. Update the
2c1b57
    IMSM metadata format to include new fields used for PPL.
2c1b57
    
2c1b57
    Add structures for PPL metadata. They are used also by super1 and shared
2c1b57
    with the kernel, so put them in md_p.h.
2c1b57
    
2c1b57
    Write the initial empty PPL header when creating an array. When
2c1b57
    assembling an array with PPL, validate the PPL header and in case it is
2c1b57
    not correct allow to overwrite it if --force was provided.
2c1b57
    
2c1b57
    Write the PPL location and size for a device to the new rdev sysfs
2c1b57
    attributes 'ppl_sector' and 'ppl_size'. Enable PPL in the kernel by
2c1b57
    writing to 'consistency_policy' before the array is activated.
2c1b57
    
2c1b57
    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
2c1b57
    Signed-off-by: Jes Sorensen <Jes.Sorensen@gmail.com>
2c1b57
2c1b57
diff --git a/Assemble.c b/Assemble.c
2c1b57
index 3da0903..8e55b49 100644
2c1b57
--- a/Assemble.c
2c1b57
+++ b/Assemble.c
2c1b57
@@ -1942,6 +1942,55 @@ int assemble_container_content(struct supertype *st, int mdfd,
2c1b57
 	map_update(NULL, fd2devnm(mdfd), content->text_version,
2c1b57
 		   content->uuid, chosen_name);
2c1b57
 
2c1b57
+	if (content->consistency_policy == CONSISTENCY_POLICY_PPL &&
2c1b57
+	    st->ss->validate_ppl) {
2c1b57
+		content->array.state |= 1;
2c1b57
+		err = 0;
2c1b57
+
2c1b57
+		for (dev = content->devs; dev; dev = dev->next) {
2c1b57
+			int dfd;
2c1b57
+			char *devpath;
2c1b57
+			int ret;
2c1b57
+
2c1b57
+			ret = st->ss->validate_ppl(st, content, dev);
2c1b57
+			if (ret == 0)
2c1b57
+				continue;
2c1b57
+
2c1b57
+			if (ret < 0) {
2c1b57
+				err = 1;
2c1b57
+				break;
2c1b57
+			}
2c1b57
+
2c1b57
+			if (!c->force) {
2c1b57
+				pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n",
2c1b57
+					chosen_name);
2c1b57
+				content->array.state &= ~1;
2c1b57
+				avail[dev->disk.raid_disk] = 0;
2c1b57
+				break;
2c1b57
+			}
2c1b57
+
2c1b57
+			/* have --force - overwrite the invalid ppl */
2c1b57
+			devpath = map_dev(dev->disk.major, dev->disk.minor, 0);
2c1b57
+			dfd = dev_open(devpath, O_RDWR);
2c1b57
+			if (dfd < 0) {
2c1b57
+				pr_err("Failed to open %s\n", devpath);
2c1b57
+				err = 1;
2c1b57
+				break;
2c1b57
+			}
2c1b57
+
2c1b57
+			err = st->ss->write_init_ppl(st, content, dfd);
2c1b57
+			close(dfd);
2c1b57
+
2c1b57
+			if (err)
2c1b57
+				break;
2c1b57
+		}
2c1b57
+
2c1b57
+		if (err) {
2c1b57
+			free(avail);
2c1b57
+			return err;
2c1b57
+		}
2c1b57
+	}
2c1b57
+
2c1b57
 	if (enough(content->array.level, content->array.raid_disks,
2c1b57
 		   content->array.layout, content->array.state & 1, avail) == 0) {
2c1b57
 		if (c->export && result)
2c1b57
diff --git a/Makefile b/Makefile
2c1b57
index d1a6ac4..5ff6cc0 100644
2c1b57
--- a/Makefile
2c1b57
+++ b/Makefile
2c1b57
@@ -151,7 +151,7 @@ MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
2c1b57
 	Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
2c1b57
 	super-mbr.o super-gpt.o \
2c1b57
 	super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
2c1b57
-	platform-intel.o probe_roms.o
2c1b57
+	platform-intel.o probe_roms.o crc32c.o
2c1b57
 
2c1b57
 MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
2c1b57
 
2c1b57
@@ -161,7 +161,8 @@ STATICOBJS = pwgr.o
2c1b57
 ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
2c1b57
 	maps.c lib.c xmalloc.c \
2c1b57
 	super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
2c1b57
-	platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c mapfile.c
2c1b57
+	platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c mapfile.c \
2c1b57
+	crc32c.c
2c1b57
 ASSEMBLE_AUTO_SRCS := mdopen.c
2c1b57
 ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
2c1b57
 ifdef MDASSEMBLE_AUTO
2c1b57
diff --git a/md_p.h b/md_p.h
2c1b57
index dc9fec1..358a28c 100644
2c1b57
--- a/md_p.h
2c1b57
+++ b/md_p.h
2c1b57
@@ -267,4 +267,29 @@ struct r5l_meta_block {
2c1b57
 #define R5LOG_VERSION 0x1
2c1b57
 #define R5LOG_MAGIC 0x6433c509
2c1b57
 
2c1b57
+struct ppl_header_entry {
2c1b57
+	__u64 data_sector;	/* raid sector of the new data */
2c1b57
+	__u32 pp_size;		/* length of partial parity */
2c1b57
+	__u32 data_size;	/* length of data */
2c1b57
+	__u32 parity_disk;	/* member disk containing parity */
2c1b57
+	__u32 checksum;		/* checksum of this entry's partial parity */
2c1b57
+} __attribute__ ((__packed__));
2c1b57
+
2c1b57
+#define PPL_HEADER_SIZE 4096
2c1b57
+#define PPL_HDR_RESERVED 512
2c1b57
+#define PPL_HDR_ENTRY_SPACE \
2c1b57
+	(PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__u32) - sizeof(__u64))
2c1b57
+#define PPL_HDR_MAX_ENTRIES \
2c1b57
+	(PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry))
2c1b57
+
2c1b57
+struct ppl_header {
2c1b57
+	__u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */
2c1b57
+	__u32 signature;		/* signature (family number of volume) */
2c1b57
+	__u32 padding;			/* zero pad */
2c1b57
+	__u64 generation;		/* generation number of the header */
2c1b57
+	__u32 entries_count;		/* number of entries in entry array */
2c1b57
+	__u32 checksum;			/* checksum of the header */
2c1b57
+	struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES];
2c1b57
+} __attribute__ ((__packed__));
2c1b57
+
2c1b57
 #endif
2c1b57
diff --git a/mdadm.h b/mdadm.h
2c1b57
index b52d4d3..d222cc3 100644
2c1b57
--- a/mdadm.h
2c1b57
+++ b/mdadm.h
2c1b57
@@ -300,6 +300,8 @@ struct mdinfo {
2c1b57
 		#define MaxSector  (~0ULL) /* resync/recovery complete position */
2c1b57
 	};
2c1b57
 	long			bitmap_offset;	/* 0 == none, 1 == a file */
2c1b57
+	unsigned int		ppl_size;
2c1b57
+	unsigned long long	ppl_sector;
2c1b57
 	unsigned long		safe_mode_delay; /* ms delay to mark clean */
2c1b57
 	int			new_level, delta_disks, new_layout, new_chunk;
2c1b57
 	int			errors;
2c1b57
@@ -1074,6 +1076,10 @@ extern struct superswitch {
2c1b57
 	/* write initial empty PPL on device */
2c1b57
 	int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
2c1b57
 
2c1b57
+	/* validate ppl before assemble */
2c1b57
+	int (*validate_ppl)(struct supertype *st, struct mdinfo *info,
2c1b57
+			    struct mdinfo *disk);
2c1b57
+
2c1b57
 	/* records new bad block in metadata */
2c1b57
 	int (*record_bad_block)(struct active_array *a, int n,
2c1b57
 					unsigned long long sector, int length);
2c1b57
diff --git a/super-intel.c b/super-intel.c
2c1b57
index 2d92c8e..87fec8b 100644
2c1b57
--- a/super-intel.c
2c1b57
+++ b/super-intel.c
2c1b57
@@ -102,6 +102,7 @@ struct imsm_disk {
2c1b57
 #define SPARE_DISK      __cpu_to_le32(0x01)  /* Spare */
2c1b57
 #define CONFIGURED_DISK __cpu_to_le32(0x02)  /* Member of some RaidDev */
2c1b57
 #define FAILED_DISK     __cpu_to_le32(0x04)  /* Permanent failure */
2c1b57
+#define JOURNAL_DISK    __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
2c1b57
 	__u32 status;			 /* 0xF0 - 0xF3 */
2c1b57
 	__u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
2c1b57
 	__u32 total_blocks_hi;		 /* 0xF4 - 0xF5 total blocks hi */
2c1b57
@@ -155,6 +156,9 @@ struct imsm_vol {
2c1b57
 #define MIGR_STATE_CHANGE 4
2c1b57
 #define MIGR_REPAIR 5
2c1b57
 	__u8  migr_type;	/* Initializing, Rebuilding, ... */
2c1b57
+#define RAIDVOL_CLEAN          0
2c1b57
+#define RAIDVOL_DIRTY          1
2c1b57
+#define RAIDVOL_DSRECORD_VALID 2
2c1b57
 	__u8  dirty;
2c1b57
 	__u8  fs_state;		/* fast-sync state for CnG (0xff == disabled) */
2c1b57
 	__u16 verify_errors;	/* number of mismatches */
2c1b57
@@ -190,7 +194,24 @@ struct imsm_dev {
2c1b57
 	__u16 cache_policy;
2c1b57
 	__u8  cng_state;
2c1b57
 	__u8  cng_sub_state;
2c1b57
-#define IMSM_DEV_FILLERS 10
2c1b57
+	__u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
2c1b57
+
2c1b57
+	/* NVM_EN */
2c1b57
+	__u8 nv_cache_mode;
2c1b57
+	__u8 nv_cache_flags;
2c1b57
+
2c1b57
+	/* Unique Volume Id of the NvCache Volume associated with this volume */
2c1b57
+	__u32 nvc_vol_orig_family_num;
2c1b57
+	__u16 nvc_vol_raid_dev_num;
2c1b57
+
2c1b57
+#define RWH_OFF 0
2c1b57
+#define RWH_DISTRIBUTED 1
2c1b57
+#define RWH_JOURNALING_DRIVE 2
2c1b57
+	__u8  rwh_policy; /* Raid Write Hole Policy */
2c1b57
+	__u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
2c1b57
+	__u8  filler1;
2c1b57
+
2c1b57
+#define IMSM_DEV_FILLERS 3
2c1b57
 	__u32 filler[IMSM_DEV_FILLERS];
2c1b57
 	struct imsm_vol vol;
2c1b57
 } __attribute__ ((packed));
2c1b57
@@ -257,6 +278,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
2c1b57
 #define UNIT_SRC_IN_CP_AREA 1   /* Source data for curr_migr_unit has
2c1b57
 				 *  already been migrated and must
2c1b57
 				 *  be recovered from checkpoint area */
2c1b57
+
2c1b57
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
2c1b57
+
2c1b57
 struct migr_record {
2c1b57
 	__u32 rec_status;	    /* Status used to determine how to restart
2c1b57
 				     * migration in case it aborts
2c1b57
@@ -1288,6 +1312,11 @@ static int is_failed(struct imsm_disk *disk)
2c1b57
 	return (disk->status & FAILED_DISK) == FAILED_DISK;
2c1b57
 }
2c1b57
 
2c1b57
+static int is_journal(struct imsm_disk *disk)
2c1b57
+{
2c1b57
+	return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
2c1b57
+}
2c1b57
+
2c1b57
 /* try to determine how much space is reserved for metadata from
2c1b57
  * the last get_extents() entry on the smallest active disk,
2c1b57
  * otherwise fallback to the default
2c1b57
@@ -1477,7 +1506,17 @@ static void print_imsm_dev(struct intel_super *super,
2c1b57
 				   blocks_per_migr_unit(super, dev));
2c1b57
 	}
2c1b57
 	printf("\n");
2c1b57
-	printf("    Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
2c1b57
+	printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
2c1b57
+					 "dirty" : "clean");
2c1b57
+	printf("     RWH Policy : ");
2c1b57
+	if (dev->rwh_policy == RWH_OFF)
2c1b57
+		printf("off\n");
2c1b57
+	else if (dev->rwh_policy == RWH_DISTRIBUTED)
2c1b57
+		printf("PPL distributed\n");
2c1b57
+	else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
2c1b57
+		printf("PPL journaling drive\n");
2c1b57
+	else
2c1b57
+		printf("<unknown:%d>\n", dev->rwh_policy);
2c1b57
 }
2c1b57
 
2c1b57
 static void print_imsm_disk(struct imsm_disk *disk,
2c1b57
@@ -1496,9 +1535,10 @@ static void print_imsm_disk(struct imsm_disk *disk,
2c1b57
 		printf("  Disk%02d Serial : %s\n", index, str);
2c1b57
 	else
2c1b57
 		printf("    Disk Serial : %s\n", str);
2c1b57
-	printf("          State :%s%s%s\n", is_spare(disk) ? " spare" : "",
2c1b57
-					    is_configured(disk) ? " active" : "",
2c1b57
-					    is_failed(disk) ? " failed" : "");
2c1b57
+	printf("          State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
2c1b57
+					      is_configured(disk) ? " active" : "",
2c1b57
+					      is_failed(disk) ? " failed" : "",
2c1b57
+					      is_journal(disk) ? " journal" : "");
2c1b57
 	printf("             Id : %08x\n", __le32_to_cpu(disk->scsi_id));
2c1b57
 	sz = total_blocks(disk) - reserved;
2c1b57
 	printf("    Usable Size : %llu%s\n",
2c1b57
@@ -3114,6 +3154,15 @@ static unsigned long long imsm_component_size_aligment_check(int level,
2c1b57
 	return component_size;
2c1b57
 }
2c1b57
 
2c1b57
+static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
2c1b57
+{
2c1b57
+	struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
2c1b57
+	struct imsm_map *map = get_imsm_map(dev, MAP_0);
2c1b57
+
2c1b57
+	return pba_of_lba0(map) +
2c1b57
+	       (num_data_stripes(map) * map->blocks_per_strip);
2c1b57
+}
2c1b57
+
2c1b57
 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
2c1b57
 {
2c1b57
 	struct intel_super *super = st->sb;
2c1b57
@@ -3140,7 +3189,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
2c1b57
 	info->array.utime	  = 0;
2c1b57
 	info->array.chunk_size	  =
2c1b57
 		__le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
2c1b57
-	info->array.state	  = !dev->vol.dirty;
2c1b57
+	info->array.state	  = !(dev->vol.dirty & RAIDVOL_DIRTY);
2c1b57
 	info->custom_array_size   = __le32_to_cpu(dev->size_high);
2c1b57
 	info->custom_array_size   <<= 32;
2c1b57
 	info->custom_array_size   |= __le32_to_cpu(dev->size_low);
2c1b57
@@ -3221,10 +3270,20 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
2c1b57
 	memset(info->uuid, 0, sizeof(info->uuid));
2c1b57
 	info->recovery_start = MaxSector;
2c1b57
 
2c1b57
+	if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
2c1b57
+		info->consistency_policy = CONSISTENCY_POLICY_PPL;
2c1b57
+		info->ppl_sector = get_ppl_sector(super, super->current_vol);
2c1b57
+		info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
2c1b57
+	} else if (info->array.level <= 0) {
2c1b57
+		info->consistency_policy = CONSISTENCY_POLICY_NONE;
2c1b57
+	} else {
2c1b57
+		info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
2c1b57
+	}
2c1b57
+
2c1b57
 	info->reshape_progress = 0;
2c1b57
 	info->resync_start = MaxSector;
2c1b57
 	if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2c1b57
-	    dev->vol.dirty) &&
2c1b57
+	    !(info->array.state & 1)) &&
2c1b57
 	    imsm_reshape_blocks_arrays_changes(super) == 0) {
2c1b57
 		info->resync_start = 0;
2c1b57
 	}
2c1b57
@@ -3451,7 +3510,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
2c1b57
 		 * found the 'most fresh' version of the metadata
2c1b57
 		 */
2c1b57
 		info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2c1b57
-		info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2c1b57
+		info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
2c1b57
+				    0 : (1 << MD_DISK_SYNC);
2c1b57
 	}
2c1b57
 
2c1b57
 	/* only call uuid_from_super_imsm when this disk is part of a populated container,
2c1b57
@@ -3906,7 +3966,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2c1b57
 		 */
2c1b57
 		if (is_failed(&dl->disk))
2c1b57
 			dl->index = -2;
2c1b57
-		else if (is_spare(&dl->disk))
2c1b57
+		else if (is_spare(&dl->disk) || is_journal(&dl->disk))
2c1b57
 			dl->index = -1;
2c1b57
 	}
2c1b57
 
2c1b57
@@ -5303,6 +5363,20 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2c1b57
 	}
2c1b57
 	mpb->num_raid_devs++;
2c1b57
 
2c1b57
+	if (s->consistency_policy == UnSet ||
2c1b57
+	    s->consistency_policy == CONSISTENCY_POLICY_RESYNC ||
2c1b57
+	    s->consistency_policy == CONSISTENCY_POLICY_NONE) {
2c1b57
+		dev->rwh_policy = RWH_OFF;
2c1b57
+	} else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
2c1b57
+		dev->rwh_policy = RWH_DISTRIBUTED;
2c1b57
+	} else {
2c1b57
+		free(dev);
2c1b57
+		free(dv);
2c1b57
+		pr_err("imsm does not support consistency policy %s\n",
2c1b57
+		       map_num(consistency_policies, s->consistency_policy));
2c1b57
+		return 0;
2c1b57
+	}
2c1b57
+
2c1b57
 	dv->dev = dev;
2c1b57
 	dv->index = super->current_vol;
2c1b57
 	dv->next = super->devlist;
2c1b57
@@ -5927,11 +6001,146 @@ static int mgmt_disk(struct supertype *st)
2c1b57
 
2c1b57
 	return 0;
2c1b57
 }
2c1b57
+#endif
2c1b57
+
2c1b57
+__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
2c1b57
+
2c1b57
+static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
2c1b57
+{
2c1b57
+	struct intel_super *super = st->sb;
2c1b57
+	void *buf;
2c1b57
+	struct ppl_header *ppl_hdr;
2c1b57
+	int ret;
2c1b57
+
2c1b57
+	ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
2c1b57
+	if (ret) {
2c1b57
+		pr_err("Failed to allocate PPL header buffer\n");
2c1b57
+		return ret;
2c1b57
+	}
2c1b57
+
2c1b57
+	memset(buf, 0, PPL_HEADER_SIZE);
2c1b57
+	ppl_hdr = buf;
2c1b57
+	memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
2c1b57
+	ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
2c1b57
+	ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
2c1b57
+
2c1b57
+	if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
2c1b57
+		ret = errno;
2c1b57
+		perror("Failed to seek to PPL header location");
2c1b57
+	}
2c1b57
+
2c1b57
+	if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
2c1b57
+		ret = errno;
2c1b57
+		perror("Write PPL header failed");
2c1b57
+	}
2c1b57
+
2c1b57
+	if (!ret)
2c1b57
+		fsync(fd);
2c1b57
+
2c1b57
+	free(buf);
2c1b57
+	return ret;
2c1b57
+}
2c1b57
+
2c1b57
+static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
2c1b57
+			     struct mdinfo *disk)
2c1b57
+{
2c1b57
+	struct intel_super *super = st->sb;
2c1b57
+	struct dl *d;
2c1b57
+	void *buf;
2c1b57
+	int ret = 0;
2c1b57
+	struct ppl_header *ppl_hdr;
2c1b57
+	__u32 crc;
2c1b57
+	struct imsm_dev *dev;
2c1b57
+	struct imsm_map *map;
2c1b57
+	__u32 idx;
2c1b57
+
2c1b57
+	if (disk->disk.raid_disk < 0)
2c1b57
+		return 0;
2c1b57
+
2c1b57
+	if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
2c1b57
+		pr_err("Failed to allocate PPL header buffer\n");
2c1b57
+		return -1;
2c1b57
+	}
2c1b57
+
2c1b57
+	dev = get_imsm_dev(super, info->container_member);
2c1b57
+	map = get_imsm_map(dev, MAP_X);
2c1b57
+	idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
2c1b57
+	d = get_imsm_dl_disk(super, idx);
2c1b57
+
2c1b57
+	if (!d || d->index < 0 || is_failed(&d->disk))
2c1b57
+		goto out;
2c1b57
+
2c1b57
+	if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
2c1b57
+		perror("Failed to seek to PPL header location");
2c1b57
+		ret = -1;
2c1b57
+		goto out;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
2c1b57
+		perror("Read PPL header failed");
2c1b57
+		ret = -1;
2c1b57
+		goto out;
2c1b57
+	}
2c1b57
+
2c1b57
+	ppl_hdr = buf;
2c1b57
+
2c1b57
+	crc = __le32_to_cpu(ppl_hdr->checksum);
2c1b57
+	ppl_hdr->checksum = 0;
2c1b57
+
2c1b57
+	if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
2c1b57
+		dprintf("Wrong PPL header checksum on %s\n",
2c1b57
+			d->devname);
2c1b57
+		ret = 1;
2c1b57
+	}
2c1b57
+
2c1b57
+	if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
2c1b57
+		      super->anchor->orig_family_num)) {
2c1b57
+		dprintf("Wrong PPL header signature on %s\n",
2c1b57
+			d->devname);
2c1b57
+		ret = 1;
2c1b57
+	}
2c1b57
+
2c1b57
+out:
2c1b57
+	free(buf);
2c1b57
+
2c1b57
+	if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
2c1b57
+		return st->ss->write_init_ppl(st, info, d->fd);
2c1b57
+
2c1b57
+	return ret;
2c1b57
+}
2c1b57
+
2c1b57
+#ifndef MDASSEMBLE
2c1b57
+
2c1b57
+static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
2c1b57
+{
2c1b57
+	struct intel_super *super = st->sb;
2c1b57
+	struct dl *d;
2c1b57
+	int ret = 0;
2c1b57
+
2c1b57
+	if (info->consistency_policy != CONSISTENCY_POLICY_PPL ||
2c1b57
+	    info->array.level != 5)
2c1b57
+		return 0;
2c1b57
+
2c1b57
+	for (d = super->disks; d ; d = d->next) {
2c1b57
+		if (d->index < 0 || is_failed(&d->disk))
2c1b57
+			continue;
2c1b57
+
2c1b57
+		ret = st->ss->write_init_ppl(st, info, d->fd);
2c1b57
+		if (ret)
2c1b57
+			break;
2c1b57
+	}
2c1b57
+
2c1b57
+	return ret;
2c1b57
+}
2c1b57
 
2c1b57
 static int write_init_super_imsm(struct supertype *st)
2c1b57
 {
2c1b57
 	struct intel_super *super = st->sb;
2c1b57
 	int current_vol = super->current_vol;
2c1b57
+	int rv = 0;
2c1b57
+	struct mdinfo info;
2c1b57
+
2c1b57
+	getinfo_super_imsm(st, &info, NULL);
2c1b57
 
2c1b57
 	/* we are done with current_vol reset it to point st at the container */
2c1b57
 	super->current_vol = -1;
2c1b57
@@ -5939,24 +6148,29 @@ static int write_init_super_imsm(struct supertype *st)
2c1b57
 	if (st->update_tail) {
2c1b57
 		/* queue the recently created array / added disk
2c1b57
 		 * as a metadata update */
2c1b57
-		int rv;
2c1b57
 
2c1b57
 		/* determine if we are creating a volume or adding a disk */
2c1b57
 		if (current_vol < 0) {
2c1b57
 			/* in the mgmt (add/remove) disk case we are running
2c1b57
 			 * in mdmon context, so don't close fd's
2c1b57
 			 */
2c1b57
-			return mgmt_disk(st);
2c1b57
-		} else
2c1b57
-			rv = create_array(st, current_vol);
2c1b57
-
2c1b57
-		return rv;
2c1b57
+			rv = mgmt_disk(st);
2c1b57
+		} else {
2c1b57
+			rv = write_init_ppl_imsm_all(st, &info;;
2c1b57
+			if (!rv)
2c1b57
+				rv = create_array(st, current_vol);
2c1b57
+		}
2c1b57
 	} else {
2c1b57
 		struct dl *d;
2c1b57
 		for (d = super->disks; d; d = d->next)
2c1b57
 			Kill(d->devname, NULL, 0, -1, 1);
2c1b57
-		return write_super_imsm(st, 1);
2c1b57
+		if (current_vol >= 0)
2c1b57
+			rv = write_init_ppl_imsm_all(st, &info;;
2c1b57
+		if (!rv)
2c1b57
+			rv = write_super_imsm(st, 1);
2c1b57
 	}
2c1b57
+
2c1b57
+	return rv;
2c1b57
 }
2c1b57
 #endif
2c1b57
 
2c1b57
@@ -7375,7 +7589,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
2c1b57
 			 *
2c1b57
 			 * FIXME handle dirty degraded
2c1b57
 			 */
2c1b57
-			if ((skip || recovery_start == 0) && !dev->vol.dirty)
2c1b57
+			if ((skip || recovery_start == 0) &&
2c1b57
+			    !(dev->vol.dirty & RAIDVOL_DIRTY))
2c1b57
 				this->resync_start = MaxSector;
2c1b57
 			if (skip)
2c1b57
 				continue;
2c1b57
@@ -7410,9 +7625,12 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
2c1b57
 				info_d->component_size =
2c1b57
 						num_data_stripes(map) *
2c1b57
 						map->blocks_per_strip;
2c1b57
+				info_d->ppl_sector = this->ppl_sector;
2c1b57
+				info_d->ppl_size = this->ppl_size;
2c1b57
 			} else {
2c1b57
 				info_d->component_size = blocks_per_member(map);
2c1b57
 			}
2c1b57
+			info_d->consistency_policy = this->consistency_policy;
2c1b57
 
2c1b57
 			info_d->bb.supported = 1;
2c1b57
 			get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
2c1b57
@@ -7928,12 +8146,16 @@ mark_checkpoint:
2c1b57
 
2c1b57
 skip_mark_checkpoint:
2c1b57
 	/* mark dirty / clean */
2c1b57
-	if (dev->vol.dirty != !consistent) {
2c1b57
+	if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) ||
2c1b57
+	    (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) {
2c1b57
 		dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
2c1b57
-		if (consistent)
2c1b57
-			dev->vol.dirty = 0;
2c1b57
-		else
2c1b57
-			dev->vol.dirty = 1;
2c1b57
+		if (consistent) {
2c1b57
+			dev->vol.dirty = RAIDVOL_CLEAN;
2c1b57
+		} else {
2c1b57
+			dev->vol.dirty = RAIDVOL_DIRTY;
2c1b57
+			if (dev->rwh_policy == RWH_DISTRIBUTED)
2c1b57
+				dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
2c1b57
+		}
2c1b57
 		super->updates_pending++;
2c1b57
 	}
2c1b57
 
2c1b57
@@ -8445,6 +8667,11 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
2c1b57
 		di->component_size = a->info.component_size;
2c1b57
 		di->container_member = inst;
2c1b57
 		di->bb.supported = 1;
2c1b57
+		if (dev->rwh_policy == RWH_DISTRIBUTED) {
2c1b57
+			di->consistency_policy = CONSISTENCY_POLICY_PPL;
2c1b57
+			di->ppl_sector = get_ppl_sector(super, inst);
2c1b57
+			di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
2c1b57
+		}
2c1b57
 		super->random = random32();
2c1b57
 		di->next = rv;
2c1b57
 		rv = di;
2c1b57
@@ -11600,6 +11827,9 @@ struct superswitch super_imsm = {
2c1b57
 	.container_content = container_content_imsm,
2c1b57
 	.validate_container = validate_container_imsm,
2c1b57
 
2c1b57
+	.write_init_ppl = write_init_ppl_imsm,
2c1b57
+	.validate_ppl	= validate_ppl_imsm,
2c1b57
+
2c1b57
 	.external	= 1,
2c1b57
 	.name = "imsm",
2c1b57
 
2c1b57
diff --git a/sysfs.c b/sysfs.c
2c1b57
index 53589a7..2a91ba0 100644
2c1b57
--- a/sysfs.c
2c1b57
+++ b/sysfs.c
2c1b57
@@ -689,6 +689,16 @@ int sysfs_set_array(struct mdinfo *info, int vers)
2c1b57
 		 * once the reshape completes.
2c1b57
 		 */
2c1b57
 	}
2c1b57
+
2c1b57
+	if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
2c1b57
+		if (sysfs_set_str(info, NULL, "consistency_policy",
2c1b57
+				  map_num(consistency_policies,
2c1b57
+					  info->consistency_policy))) {
2c1b57
+			pr_err("This kernel does not support PPL\n");
2c1b57
+			return 1;
2c1b57
+		}
2c1b57
+	}
2c1b57
+
2c1b57
 	return rv;
2c1b57
 }
2c1b57
 
2c1b57
@@ -720,6 +730,10 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
2c1b57
 	rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
2c1b57
 	rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
2c1b57
 	if (sra->array.level != LEVEL_CONTAINER) {
2c1b57
+		if (sd->consistency_policy == CONSISTENCY_POLICY_PPL) {
2c1b57
+			rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
2c1b57
+			rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
2c1b57
+		}
2c1b57
 		if (sd->recovery_start == MaxSector)
2c1b57
 			/* This can correctly fail if array isn't started,
2c1b57
 			 * yet, so just ignore status for now.