Blame SOURCES/lvm2-2_02_188-scanning-optimize-by-checking-text-offset-and-checks.patch

3c4a43
 lib/cache/lvmcache.c          |  24 +++++++++
3c4a43
 lib/cache/lvmcache.h          |   4 ++
3c4a43
 lib/device/bcache-utils.c     |  15 ++++++
3c4a43
 lib/device/bcache.h           |   1 +
3c4a43
 lib/format_text/format-text.c |  16 +++++-
3c4a43
 lib/format_text/layout.h      |   2 +-
3c4a43
 lib/format_text/text_label.c  |   2 +-
3c4a43
 lib/label/label.c             |   5 ++
3c4a43
 lib/label/label.h             |   1 +
3c4a43
 lib/metadata/metadata.c       | 117 +++++++++++++++++++++++++++++++++++++++++-
3c4a43
 lib/metadata/metadata.h       |   4 +-
3c4a43
 11 files changed, 186 insertions(+), 5 deletions(-)
3c4a43
3c4a43
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
3c4a43
index b6a02b0..5b8dce8 100644
3c4a43
--- a/lib/cache/lvmcache.c
3c4a43
+++ b/lib/cache/lvmcache.c
3c4a43
@@ -3068,3 +3068,27 @@ uint64_t lvmcache_max_metadata_size(void)
3c4a43
 	return _max_metadata_size;
3c4a43
 }
3c4a43
 
3c4a43
+void lvmcache_get_mdas(struct cmd_context *cmd,
3c4a43
+		       const char *vgname, const char *vgid,
3c4a43
+                       struct dm_list *mda_list)
3c4a43
+{
3c4a43
+	struct lvmcache_vginfo *vginfo;
3c4a43
+	struct lvmcache_info *info;
3c4a43
+	struct mda_list *mdal;
3c4a43
+	struct metadata_area *mda, *mda2;
3c4a43
+
3c4a43
+	if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
3c4a43
+		log_error(INTERNAL_ERROR "lvmcache_get_mdas no vginfo %s", vgname);
3c4a43
+		return;
3c4a43
+	}
3c4a43
+
3c4a43
+	dm_list_iterate_items(info, &vginfo->infos) {
3c4a43
+		dm_list_iterate_items_safe(mda, mda2, &info->mdas) {
3c4a43
+			if (!(mdal = dm_zalloc(sizeof(*mdal))))
3c4a43
+				continue;
3c4a43
+			mdal->mda = mda;
3c4a43
+			dm_list_add(mda_list, &mdal->list);
3c4a43
+		}
3c4a43
+	}
3c4a43
+}
3c4a43
+
3c4a43
diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h
3c4a43
index f436785..541e8be 100644
3c4a43
--- a/lib/cache/lvmcache.h
3c4a43
+++ b/lib/cache/lvmcache.h
3c4a43
@@ -228,4 +228,8 @@ void lvmcache_drop_saved_vgid(const char *vgid);
3c4a43
 uint64_t lvmcache_max_metadata_size(void);
3c4a43
 void lvmcache_save_metadata_size(uint64_t val);
3c4a43
 
3c4a43
+void lvmcache_get_mdas(struct cmd_context *cmd,
3c4a43
+                       const char *vgname, const char *vgid,
3c4a43
+                       struct dm_list *mda_list);
3c4a43
+
3c4a43
 #endif
3c4a43
diff --git a/lib/device/bcache-utils.c b/lib/device/bcache-utils.c
3c4a43
index a533a66..2f0b01d 100644
3c4a43
--- a/lib/device/bcache-utils.c
3c4a43
+++ b/lib/device/bcache-utils.c
3c4a43
@@ -79,6 +79,21 @@ bool bcache_read_bytes(struct bcache *cache, int fd, uint64_t start, size_t len,
3c4a43
 	return true;
3c4a43
 }
3c4a43
 
3c4a43
+bool bcache_invalidate_bytes(struct bcache *cache, int fd, uint64_t start, size_t len)
3c4a43
+{
3c4a43
+	block_address bb, be;
3c4a43
+	bool result = true;
3c4a43
+
3c4a43
+	byte_range_to_block_range(cache, start, len, &bb, &be);
3c4a43
+
3c4a43
+	for (; bb != be; bb++) {
3c4a43
+		if (!bcache_invalidate(cache, fd, bb))
3c4a43
+			result = false;
3c4a43
+	}
3c4a43
+
3c4a43
+	return result;
3c4a43
+}
3c4a43
+
3c4a43
 //----------------------------------------------------------------
3c4a43
 
3c4a43
 // Writing bytes and zeroing bytes are very similar, so we factor out
3c4a43
diff --git a/lib/device/bcache.h b/lib/device/bcache.h
3c4a43
index f9067f7..3e7a168 100644
3c4a43
--- a/lib/device/bcache.h
3c4a43
+++ b/lib/device/bcache.h
3c4a43
@@ -163,6 +163,7 @@ bool bcache_read_bytes(struct bcache *cache, int fd, uint64_t start, size_t len,
3c4a43
 bool bcache_write_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, void *data);
3c4a43
 bool bcache_zero_bytes(struct bcache *cache, int fd, uint64_t start, size_t len);
3c4a43
 bool bcache_set_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, uint8_t val);
3c4a43
+bool bcache_invalidate_bytes(struct bcache *cache, int fd, uint64_t start, size_t len);
3c4a43
 
3c4a43
 void bcache_set_last_byte(struct bcache *cache, int fd, uint64_t offset, int sector_size);
3c4a43
 void bcache_unset_last_byte(struct bcache *cache, int fd);
3c4a43
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
3c4a43
index 6f5d739..75d7fcd 100644
3c4a43
--- a/lib/format_text/format-text.c
3c4a43
+++ b/lib/format_text/format-text.c
3c4a43
@@ -1199,6 +1199,7 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)
3c4a43
 }
3c4a43
 
3c4a43
 int read_metadata_location_summary(const struct format_type *fmt,
3c4a43
+		    struct metadata_area *mda,
3c4a43
 		    struct mda_header *mdah, int primary_mda, struct device_area *dev_area,
3c4a43
 		    struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors)
3c4a43
 {
3c4a43
@@ -1251,6 +1252,19 @@ int read_metadata_location_summary(const struct format_type *fmt,
3c4a43
 		return 0;
3c4a43
 	}
3c4a43
 
3c4a43
+	/*
3c4a43
+	 * This function is used to read the vg summary during label scan.
3c4a43
+	 * Save the text start location and checksum during scan.  After the VG
3c4a43
+	 * lock is acquired in vg_read, we can reread the mda_header, and
3c4a43
+	 * compare rlocn->offset,checksum to what was saved during scan.  If
3c4a43
+	 * unchanged, it means that the metadata was not changed between scan
3c4a43
+	 * and the read.
3c4a43
+	 */
3c4a43
+	if (mda) {
3c4a43
+		mda->scan_text_offset = rlocn->offset;
3c4a43
+		mda->scan_text_checksum = rlocn->checksum;
3c4a43
+	}
3c4a43
+
3c4a43
 	/* We found a VG - now check the metadata */
3c4a43
 	if (rlocn->offset + rlocn->size > mdah->size)
3c4a43
 		wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size);
3c4a43
@@ -1374,7 +1388,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu
3c4a43
 			continue;
3c4a43
 		}
3c4a43
 
3c4a43
-		if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) {
3c4a43
+		if (read_metadata_location_summary(fmt, NULL, mdah, 0, &rl->dev_area, &vgsummary, NULL)) {
3c4a43
 			vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0);
3c4a43
 			if (vg) {
3c4a43
 				lvmcache_update_vg(vg, 0);
3c4a43
diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h
3c4a43
index 2671bbf..4601952 100644
3c4a43
--- a/lib/format_text/layout.h
3c4a43
+++ b/lib/format_text/layout.h
3c4a43
@@ -104,7 +104,7 @@ struct mda_context {
3c4a43
 #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize())
3c4a43
 #define MDA_ORIGINAL_ALIGNMENT 512	/* Original alignment used for start of VG metadata content */
3c4a43
 
3c4a43
-int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, 
3c4a43
+int read_metadata_location_summary(const struct format_type *fmt, struct metadata_area *mda, struct mda_header *mdah, int primary_mda, 
3c4a43
 		    struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary,
3c4a43
 		    uint64_t *mda_free_sectors);
3c4a43
 
3c4a43
diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c
3c4a43
index 7d10e06..fc8294e 100644
3c4a43
--- a/lib/format_text/text_label.c
3c4a43
+++ b/lib/format_text/text_label.c
3c4a43
@@ -345,7 +345,7 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton)
3c4a43
 		return 1;
3c4a43
 	}
3c4a43
 
3c4a43
-	if (!read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area,
3c4a43
+	if (!read_metadata_location_summary(fmt, mda, mdah, mda_is_primary(mda), &mdac->area,
3c4a43
 					    &vgsummary, &mdac->free_sectors)) {
3c4a43
 		if (vgsummary.zero_offset)
3c4a43
 			return 1;
3c4a43
diff --git a/lib/label/label.c b/lib/label/label.c
3c4a43
index 70b7934..8a4b662 100644
3c4a43
--- a/lib/label/label.c
3c4a43
+++ b/lib/label/label.c
3c4a43
@@ -1418,6 +1418,11 @@ bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data)
3c4a43
 	return true;
3c4a43
 }
3c4a43
 
3c4a43
+bool dev_invalidate_bytes(struct device *dev, uint64_t start, size_t len)
3c4a43
+{
3c4a43
+	return bcache_invalidate_bytes(scan_bcache, dev->bcache_fd, start, len);
3c4a43
+}
3c4a43
+
3c4a43
 bool dev_write_zeros(struct device *dev, uint64_t start, size_t len)
3c4a43
 {
3c4a43
 	if (test_mode())
3c4a43
diff --git a/lib/label/label.h b/lib/label/label.h
3c4a43
index 42c9946..ea29c84 100644
3c4a43
--- a/lib/label/label.h
3c4a43
+++ b/lib/label/label.h
3c4a43
@@ -128,6 +128,7 @@ bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data);
3c4a43
 bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data);
3c4a43
 bool dev_write_zeros(struct device *dev, uint64_t start, size_t len);
3c4a43
 bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val);
3c4a43
+bool dev_invalidate_bytes(struct device *dev, uint64_t start, size_t len);
3c4a43
 void dev_set_last_byte(struct device *dev, uint64_t offset);
3c4a43
 void dev_unset_last_byte(struct device *dev);
3c4a43
 
3c4a43
diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
3c4a43
index d448fd9..39f10fe 100644
3c4a43
--- a/lib/metadata/metadata.c
3c4a43
+++ b/lib/metadata/metadata.c
3c4a43
@@ -33,6 +33,8 @@
3c4a43
 #include "lvmlockd.h"
3c4a43
 #include "time.h"
3c4a43
 #include "lvmnotify.h"
3c4a43
+#include "format_text/format-text.h"
3c4a43
+#include "format_text/layout.h"
3c4a43
 
3c4a43
 #include <math.h>
3c4a43
 #include <sys/param.h>
3c4a43
@@ -3760,6 +3762,118 @@ out:
3c4a43
 	return r;
3c4a43
 }
3c4a43
 
3c4a43
+/*
3c4a43
+ * Reread an mda_header.  If the text offset is the same as was seen and saved
3c4a43
+ * by label scan, it means the metadata is unchanged and we do not need to
3c4a43
+ * reread metadata.
3c4a43
+ *
3c4a43
+ * This is used to ensure that the metadata seen during scan still matches
3c4a43
+ * what's on disk.  If the scan data still matches what's on disk we don't
3c4a43
+ * need to reread the metadata from disk.  When we read the metadata from
3c4a43
+ * bcache it may come from the cache or from disk again if the cache has
3c4a43
+ * dropped it.
3c4a43
+ */
3c4a43
+
3c4a43
+static bool _scan_text_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid)
3c4a43
+{
3c4a43
+	struct dm_list mda_list;
3c4a43
+	struct mda_list *mdal, *safe;
3c4a43
+	struct metadata_area *mda;
3c4a43
+	struct mda_context *mdac;
3c4a43
+	struct device_area *area;
3c4a43
+	struct mda_header *mdah;
3c4a43
+	struct raw_locn *rlocn;
3c4a43
+	struct device *dev;
3c4a43
+	bool ret = true;
3c4a43
+
3c4a43
+	/*
3c4a43
+	 * if cmd->can_use_one_scan, check one mda_header is unchanged,
3c4a43
+	 * else check that all mda_headers are unchanged.
3c4a43
+	 */
3c4a43
+
3c4a43
+	dm_list_init(&mda_list);
3c4a43
+
3c4a43
+	lvmcache_get_mdas(cmd, vgname, vgid, &mda_list);
3c4a43
+
3c4a43
+	dm_list_iterate_items(mdal, &mda_list) {
3c4a43
+		mda = mdal->mda;
3c4a43
+
3c4a43
+		if (!mda->scan_text_offset)
3c4a43
+			continue;
3c4a43
+
3c4a43
+		if (!mda_is_primary(mda))
3c4a43
+			continue;
3c4a43
+
3c4a43
+		if (!(dev = mda_get_device(mda))) {
3c4a43
+			log_debug("rescan for text mismatch - no mda dev");
3c4a43
+			goto out;
3c4a43
+		}
3c4a43
+
3c4a43
+		mdac = mda->metadata_locn;
3c4a43
+		area = &mdac->area;
3c4a43
+
3c4a43
+		/*
3c4a43
+		 * Invalidate mda_header in bcache so it will be reread from disk.
3c4a43
+		 */
3c4a43
+		if (!dev_invalidate_bytes(dev, 4096, 512)) {
3c4a43
+			log_debug("rescan for text mismatch - cannot invalidate");
3c4a43
+			goto out;
3c4a43
+		}
3c4a43
+
3c4a43
+		if (!(mdah = raw_read_mda_header(cmd->fmt, area, 1))) {
3c4a43
+			log_debug("rescan for text mismatch - no mda header");
3c4a43
+			goto out;
3c4a43
+		}
3c4a43
+
3c4a43
+		rlocn = mdah->raw_locns;
3c4a43
+
3c4a43
+		if (rlocn->checksum != mda->scan_text_checksum) {
3c4a43
+			log_debug("rescan for text checksum mismatch on %s - now %x prev %x offset now %llu prev %llu",
3c4a43
+				  dev_name(dev),
3c4a43
+				  rlocn->checksum, mda->scan_text_checksum,
3c4a43
+				  (unsigned long long)rlocn->offset,
3c4a43
+				  (unsigned long long)mda->scan_text_offset);
3c4a43
+		} else if (rlocn->offset != mda->scan_text_offset) {
3c4a43
+			log_debug("rescan for text offset mismatch on %s - now %llu prev %llu checksum %x",
3c4a43
+				  dev_name(dev),
3c4a43
+				  (unsigned long long)rlocn->offset,
3c4a43
+				  (unsigned long long)mda->scan_text_offset,
3c4a43
+				  rlocn->checksum);
3c4a43
+		} else {
3c4a43
+			/* the common case where fields match and no rescan needed */
3c4a43
+			ret = false;
3c4a43
+		}
3c4a43
+
3c4a43
+		dm_pool_free(cmd->mem, mdah);
3c4a43
+
3c4a43
+		/* For can_use_one_scan commands, return result from checking one mda. */
3c4a43
+		if (cmd->can_use_one_scan)
3c4a43
+			goto out;
3c4a43
+
3c4a43
+		/* For other commands, return mismatch immediately. */
3c4a43
+		if (ret)
3c4a43
+			goto_out;
3c4a43
+	}
3c4a43
+
3c4a43
+	if (ret) {
3c4a43
+		/* shouldn't happen */
3c4a43
+		log_debug("rescan for text mismatch - no mdas");
3c4a43
+		goto out;
3c4a43
+	}
3c4a43
+out:
3c4a43
+	if (!ret)
3c4a43
+		log_debug("rescan skipped - unchanged offset %llu checksum %x",
3c4a43
+			  (unsigned long long)mda->scan_text_offset,
3c4a43
+			  mda->scan_text_checksum);
3c4a43
+
3c4a43
+	dm_list_iterate_items_safe(mdal, safe, &mda_list) {
3c4a43
+		dm_list_del(&mdal->list);
3c4a43
+		free(mdal);
3c4a43
+	}
3c4a43
+
3c4a43
+	return ret;
3c4a43
+}
3c4a43
+
3c4a43
 /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
3c4a43
  * inconsistent metadata on disk (i.e. the VG write lock is held).
3c4a43
  * This guarantees only consistent metadata is returned.
3c4a43
@@ -3904,7 +4018,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
3c4a43
 	 * lock is taken prior to the label scan, and still held here,
3c4a43
 	 * we can also skip the rescan in that case.
3c4a43
 	 */
3c4a43
-	if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) {
3c4a43
+	if (!cmd->can_use_one_scan ||
3c4a43
+	    lvmcache_scan_mismatch(cmd, vgname, vgid) || _scan_text_mismatch(cmd, vgname, vgid)) {
3c4a43
 		/* the skip rescan special case is for clvmd vg_read_by_vgid */
3c4a43
 		/* FIXME: this is not a warn flag, pass this differently */
3c4a43
 		if (warn_flags & SKIP_RESCAN)
3c4a43
diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h
3c4a43
index f8083e5..96bbb56 100644
3c4a43
--- a/lib/metadata/metadata.h
3c4a43
+++ b/lib/metadata/metadata.h
3c4a43
@@ -173,6 +173,8 @@ struct metadata_area {
3c4a43
 	struct metadata_area_ops *ops;
3c4a43
 	void *metadata_locn;
3c4a43
 	uint32_t status;
3c4a43
+	uint64_t scan_text_offset; /* rlocn->offset seen during scan */
3c4a43
+	uint32_t scan_text_checksum; /* rlocn->checksum seen during scan */
3c4a43
 };
3c4a43
 struct metadata_area *mda_copy(struct dm_pool *mem,
3c4a43
 			       struct metadata_area *mda);
3c4a43
@@ -234,7 +236,7 @@ struct name_list {
3c4a43
 
3c4a43
 struct mda_list {
3c4a43
 	struct dm_list list;
3c4a43
-	struct device_area mda;
3c4a43
+	struct metadata_area *mda;
3c4a43
 };
3c4a43
 
3c4a43
 struct peg_list {