lib/cache/lvmcache.c | 24 +++++++++ lib/cache/lvmcache.h | 4 ++ lib/device/bcache-utils.c | 15 ++++++ lib/device/bcache.h | 1 + lib/format_text/format-text.c | 16 +++++- lib/format_text/layout.h | 2 +- lib/format_text/text_label.c | 2 +- lib/label/label.c | 5 ++ lib/label/label.h | 1 + lib/metadata/metadata.c | 117 +++++++++++++++++++++++++++++++++++++++++- lib/metadata/metadata.h | 4 +- 11 files changed, 186 insertions(+), 5 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index b6a02b0..5b8dce8 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -3068,3 +3068,27 @@ uint64_t lvmcache_max_metadata_size(void) return _max_metadata_size; } +void lvmcache_get_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *mda_list) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + struct mda_list *mdal; + struct metadata_area *mda, *mda2; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_mdas no vginfo %s", vgname); + return; + } + + dm_list_iterate_items(info, &vginfo->infos) { + dm_list_iterate_items_safe(mda, mda2, &info->mdas) { + if (!(mdal = dm_zalloc(sizeof(*mdal)))) + continue; + mdal->mda = mda; + dm_list_add(mda_list, &mdal->list); + } + } +} + diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index f436785..541e8be 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -228,4 +228,8 @@ void lvmcache_drop_saved_vgid(const char *vgid); uint64_t lvmcache_max_metadata_size(void); void lvmcache_save_metadata_size(uint64_t val); +void lvmcache_get_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *mda_list); + #endif diff --git a/lib/device/bcache-utils.c b/lib/device/bcache-utils.c index a533a66..2f0b01d 100644 --- a/lib/device/bcache-utils.c +++ b/lib/device/bcache-utils.c @@ -79,6 +79,21 @@ bool bcache_read_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, return true; } +bool bcache_invalidate_bytes(struct bcache *cache, int fd, uint64_t start, size_t len) +{ + block_address bb, be; + bool result = true; + + byte_range_to_block_range(cache, start, len, &bb, &be); + + for (; bb != be; bb++) { + if (!bcache_invalidate(cache, fd, bb)) + result = false; + } + + return result; +} + //---------------------------------------------------------------- // Writing bytes and zeroing bytes are very similar, so we factor out diff --git a/lib/device/bcache.h b/lib/device/bcache.h index f9067f7..3e7a168 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -163,6 +163,7 @@ bool bcache_read_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, bool bcache_write_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, void *data); bool bcache_zero_bytes(struct bcache *cache, int fd, uint64_t start, size_t len); bool bcache_set_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, uint8_t val); +bool bcache_invalidate_bytes(struct bcache *cache, int fd, uint64_t start, size_t len); void bcache_set_last_byte(struct bcache *cache, int fd, uint64_t offset, int sector_size); void bcache_unset_last_byte(struct bcache *cache, int fd); diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 6f5d739..75d7fcd 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1199,6 +1199,7 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) } int read_metadata_location_summary(const struct format_type *fmt, + struct metadata_area *mda, struct mda_header *mdah, int primary_mda, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors) { @@ -1251,6 +1252,19 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } + /* + * This function is used to read the vg summary during label scan. + * Save the text start location and checksum during scan. After the VG + * lock is acquired in vg_read, we can reread the mda_header, and + * compare rlocn->offset,checksum to what was saved during scan. If + * unchanged, it means that the metadata was not changed between scan + * and the read. + */ + if (mda) { + mda->scan_text_offset = rlocn->offset; + mda->scan_text_checksum = rlocn->checksum; + } + /* We found a VG - now check the metadata */ if (rlocn->offset + rlocn->size > mdah->size) wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); @@ -1374,7 +1388,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu continue; } - if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { + if (read_metadata_location_summary(fmt, NULL, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0); if (vg) { lvmcache_update_vg(vg, 0); diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index 2671bbf..4601952 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -104,7 +104,7 @@ struct mda_context { #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize()) #define MDA_ORIGINAL_ALIGNMENT 512 /* Original alignment used for start of VG metadata content */ -int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, +int read_metadata_location_summary(const struct format_type *fmt, struct metadata_area *mda, struct mda_header *mdah, int primary_mda, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors); diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 7d10e06..fc8294e 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -345,7 +345,7 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) return 1; } - if (!read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, + if (!read_metadata_location_summary(fmt, mda, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, &mdac->free_sectors)) { if (vgsummary.zero_offset) return 1; diff --git a/lib/label/label.c b/lib/label/label.c index 70b7934..8a4b662 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -1418,6 +1418,11 @@ bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data) return true; } +bool dev_invalidate_bytes(struct device *dev, uint64_t start, size_t len) +{ + return bcache_invalidate_bytes(scan_bcache, dev->bcache_fd, start, len); +} + bool dev_write_zeros(struct device *dev, uint64_t start, size_t len) { if (test_mode()) diff --git a/lib/label/label.h b/lib/label/label.h index 42c9946..ea29c84 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -128,6 +128,7 @@ bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data); bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data); bool dev_write_zeros(struct device *dev, uint64_t start, size_t len); bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val); +bool dev_invalidate_bytes(struct device *dev, uint64_t start, size_t len); void dev_set_last_byte(struct device *dev, uint64_t offset); void dev_unset_last_byte(struct device *dev); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index d448fd9..39f10fe 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -33,6 +33,8 @@ #include "lvmlockd.h" #include "time.h" #include "lvmnotify.h" +#include "format_text/format-text.h" +#include "format_text/layout.h" #include #include @@ -3760,6 +3762,118 @@ out: return r; } +/* + * Reread an mda_header. If the text offset is the same as was seen and saved + * by label scan, it means the metadata is unchanged and we do not need to + * reread metadata. + * + * This is used to ensure that the metadata seen during scan still matches + * what's on disk. If the scan data still matches what's on disk we don't + * need to reread the metadata from disk. When we read the metadata from + * bcache it may come from the cache or from disk again if the cache has + * dropped it. + */ + +static bool _scan_text_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid) +{ + struct dm_list mda_list; + struct mda_list *mdal, *safe; + struct metadata_area *mda; + struct mda_context *mdac; + struct device_area *area; + struct mda_header *mdah; + struct raw_locn *rlocn; + struct device *dev; + bool ret = true; + + /* + * if cmd->can_use_one_scan, check one mda_header is unchanged, + * else check that all mda_headers are unchanged. + */ + + dm_list_init(&mda_list); + + lvmcache_get_mdas(cmd, vgname, vgid, &mda_list); + + dm_list_iterate_items(mdal, &mda_list) { + mda = mdal->mda; + + if (!mda->scan_text_offset) + continue; + + if (!mda_is_primary(mda)) + continue; + + if (!(dev = mda_get_device(mda))) { + log_debug("rescan for text mismatch - no mda dev"); + goto out; + } + + mdac = mda->metadata_locn; + area = &mdac->area; + + /* + * Invalidate mda_header in bcache so it will be reread from disk. + */ + if (!dev_invalidate_bytes(dev, 4096, 512)) { + log_debug("rescan for text mismatch - cannot invalidate"); + goto out; + } + + if (!(mdah = raw_read_mda_header(cmd->fmt, area, 1))) { + log_debug("rescan for text mismatch - no mda header"); + goto out; + } + + rlocn = mdah->raw_locns; + + if (rlocn->checksum != mda->scan_text_checksum) { + log_debug("rescan for text checksum mismatch on %s - now %x prev %x offset now %llu prev %llu", + dev_name(dev), + rlocn->checksum, mda->scan_text_checksum, + (unsigned long long)rlocn->offset, + (unsigned long long)mda->scan_text_offset); + } else if (rlocn->offset != mda->scan_text_offset) { + log_debug("rescan for text offset mismatch on %s - now %llu prev %llu checksum %x", + dev_name(dev), + (unsigned long long)rlocn->offset, + (unsigned long long)mda->scan_text_offset, + rlocn->checksum); + } else { + /* the common case where fields match and no rescan needed */ + ret = false; + } + + dm_pool_free(cmd->mem, mdah); + + /* For can_use_one_scan commands, return result from checking one mda. */ + if (cmd->can_use_one_scan) + goto out; + + /* For other commands, return mismatch immediately. */ + if (ret) + goto_out; + } + + if (ret) { + /* shouldn't happen */ + log_debug("rescan for text mismatch - no mdas"); + goto out; + } +out: + if (!ret) + log_debug("rescan skipped - unchanged offset %llu checksum %x", + (unsigned long long)mda->scan_text_offset, + mda->scan_text_checksum); + + dm_list_iterate_items_safe(mdal, safe, &mda_list) { + dm_list_del(&mdal->list); + free(mdal); + } + + return ret; +} + /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct * inconsistent metadata on disk (i.e. the VG write lock is held). * This guarantees only consistent metadata is returned. @@ -3904,7 +4018,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, * lock is taken prior to the label scan, and still held here, * we can also skip the rescan in that case. */ - if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { + if (!cmd->can_use_one_scan || + lvmcache_scan_mismatch(cmd, vgname, vgid) || _scan_text_mismatch(cmd, vgname, vgid)) { /* the skip rescan special case is for clvmd vg_read_by_vgid */ /* FIXME: this is not a warn flag, pass this differently */ if (warn_flags & SKIP_RESCAN) diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index f8083e5..96bbb56 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -173,6 +173,8 @@ struct metadata_area { struct metadata_area_ops *ops; void *metadata_locn; uint32_t status; + uint64_t scan_text_offset; /* rlocn->offset seen during scan */ + uint32_t scan_text_checksum; /* rlocn->checksum seen during scan */ }; struct metadata_area *mda_copy(struct dm_pool *mem, struct metadata_area *mda); @@ -234,7 +236,7 @@ struct name_list { struct mda_list { struct dm_list list; - struct device_area mda; + struct metadata_area *mda; }; struct peg_list {