diff --git a/SOURCES/lvm2-2_03_12-WHATS_NEW-update.patch b/SOURCES/lvm2-2_03_12-WHATS_NEW-update.patch new file mode 100644 index 0000000..ed16b64 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-WHATS_NEW-update.patch @@ -0,0 +1,15 @@ + WHATS_NEW | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/WHATS_NEW b/WHATS_NEW +index ffefc9d..3953c7e 100644 +--- a/WHATS_NEW ++++ b/WHATS_NEW +@@ -1,5 +1,7 @@ + Version 2.03.12 - + =================================== ++ Fix problem with wiping of converted LVs. ++ Fix memleak in scanning (2.03.11). + Fix corner case allocation for thin-pools. + + Version 2.03.11 - 08th January 2021 diff --git a/SOURCES/lvm2-2_03_12-alloc-enhance-estimation-of-sufficient_pes_free.patch b/SOURCES/lvm2-2_03_12-alloc-enhance-estimation-of-sufficient_pes_free.patch new file mode 100644 index 0000000..a60a2b8 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-alloc-enhance-estimation-of-sufficient_pes_free.patch @@ -0,0 +1,47 @@ + WHATS_NEW | 10 ++++++++-- + lib/metadata/lv_manip.c | 10 +++++++--- + 2 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/WHATS_NEW b/WHATS_NEW +index 452a631..fe347f7 100644 +--- a/WHATS_NEW ++++ b/WHATS_NEW +@@ -1,3 +1,7 @@ ++Version 2.03.12 - ++=================================== ++ Fix corner case allocation for thin-pools. ++ + Version 2.03.11 - 08th January 2021 + =================================== + Fix pvck handling MDA at offset different from 4096. +diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c +index 7046436..443d32c 100644 +--- a/lib/metadata/lv_manip.c ++++ b/lib/metadata/lv_manip.c +@@ -1850,11 +1850,13 @@ static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint + + /* Is there enough total space or should we give up immediately? */ + static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms, +- uint32_t allocated, uint32_t extents_still_needed) ++ uint32_t allocated, uint32_t log_still_needed, ++ uint32_t extents_still_needed) + { + uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple; + uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple; +- uint32_t metadata_extents_needed = ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN + ah->log_len; /* One each */ ++ uint32_t metadata_extents_needed = (ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN) + ++ (log_still_needed ? ah->log_len : 0); /* One each */ + uint64_t total_extents_needed = (uint64_t)area_extents_needed + parity_extents_needed + metadata_extents_needed; + uint32_t free_pes = pv_maps_size(pvms); + +@@ -3359,7 +3361,9 @@ static int _allocate(struct alloc_handle *ah, + old_allocated = alloc_state.allocated; + log_debug_alloc("Trying allocation using %s policy.", get_alloc_string(alloc)); + +- if (!ah->approx_alloc && !_sufficient_pes_free(ah, pvms, alloc_state.allocated, ah->new_extents)) ++ if (!ah->approx_alloc && !_sufficient_pes_free(ah, pvms, alloc_state.allocated, ++ alloc_state.log_area_count_still_needed, ++ ah->new_extents)) + goto_out; + + _init_alloc_parms(ah, &alloc_parms, alloc, prev_lvseg, diff --git a/SOURCES/lvm2-2_03_12-cache-reuse-code-for-metadata-min_max.patch b/SOURCES/lvm2-2_03_12-cache-reuse-code-for-metadata-min_max.patch new file mode 100644 index 0000000..ce35731 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-cache-reuse-code-for-metadata-min_max.patch @@ -0,0 +1,107 @@ + lib/metadata/cache_manip.c | 40 ++++++++++++++-------------------------- + lib/metadata/metadata-exported.h | 1 + + tools/lvconvert.c | 1 + + tools/lvcreate.c | 1 + + 4 files changed, 17 insertions(+), 26 deletions(-) + +diff --git a/lib/metadata/cache_manip.c b/lib/metadata/cache_manip.c +index 2c4cc92..90ebd94 100644 +--- a/lib/metadata/cache_manip.c ++++ b/lib/metadata/cache_manip.c +@@ -204,6 +204,7 @@ int update_cache_pool_params(struct cmd_context *cmd, + unsigned attr, + uint32_t pool_data_extents, + uint32_t *pool_metadata_extents, ++ struct logical_volume *metadata_lv, + int *chunk_size_calc_method, uint32_t *chunk_size) + { + uint64_t min_meta_size; +@@ -252,39 +253,26 @@ int update_cache_pool_params(struct cmd_context *cmd, + if (!validate_cache_chunk_size(cmd, *chunk_size)) + return_0; + +- min_meta_size = _cache_min_metadata_size((uint64_t) pool_data_extents * extent_size, *chunk_size); ++ if ((uint64_t) *chunk_size > (uint64_t) pool_data_extents * extent_size) { ++ log_error("Size of %s data volume cannot be smaller than chunk size %s.", ++ segtype->name, display_size(cmd, *chunk_size)); ++ return 0; ++ } + +- /* Round up to extent size */ +- if (min_meta_size % extent_size) +- min_meta_size += extent_size - min_meta_size % extent_size; ++ min_meta_size = _cache_min_metadata_size((uint64_t) pool_data_extents * extent_size, *chunk_size); ++ min_meta_size = dm_round_up(min_meta_size, extent_size); + + if (!pool_metadata_size) + pool_metadata_size = min_meta_size; + +- if (pool_metadata_size > (2 * DEFAULT_CACHE_POOL_MAX_METADATA_SIZE)) { +- pool_metadata_size = 2 * DEFAULT_CACHE_POOL_MAX_METADATA_SIZE; +- if (*pool_metadata_extents) +- log_warn("WARNING: Maximum supported pool metadata size is %s.", +- display_size(cmd, pool_metadata_size)); +- } else if (pool_metadata_size < min_meta_size) { +- if (*pool_metadata_extents) +- log_warn("WARNING: Minimum required pool metadata size is %s " +- "(needs extra %s).", +- display_size(cmd, min_meta_size), +- display_size(cmd, min_meta_size - pool_metadata_size)); +- pool_metadata_size = min_meta_size; +- } +- +- if (!(*pool_metadata_extents = +- extents_from_size(cmd, pool_metadata_size, extent_size))) ++ if (!update_pool_metadata_min_max(cmd, extent_size, ++ min_meta_size, ++ (2 * DEFAULT_CACHE_POOL_MAX_METADATA_SIZE), ++ &pool_metadata_size, ++ metadata_lv, ++ pool_metadata_extents)) + return_0; + +- if ((uint64_t) *chunk_size > (uint64_t) pool_data_extents * extent_size) { +- log_error("Size of %s data volume cannot be smaller than chunk size %s.", +- segtype->name, display_size(cmd, *chunk_size)); +- return 0; +- } +- + log_verbose("Preferred pool metadata size %s.", + display_size(cmd, (uint64_t)*pool_metadata_extents * extent_size)); + +diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h +index 0e57722..c0fa564 100644 +--- a/lib/metadata/metadata-exported.h ++++ b/lib/metadata/metadata-exported.h +@@ -1319,6 +1319,7 @@ int update_cache_pool_params(struct cmd_context *cmd, + unsigned attr, + uint32_t pool_data_extents, + uint32_t *pool_metadata_extents, ++ struct logical_volume *metadata_lv, + int *chunk_size_calc_method, uint32_t *chunk_size); + int validate_lv_cache_chunk_size(struct logical_volume *pool_lv, uint32_t chunk_size); + int validate_lv_cache_create_pool(const struct logical_volume *pool_lv); +diff --git a/tools/lvconvert.c b/tools/lvconvert.c +index ce90279..416e8a7 100644 +--- a/tools/lvconvert.c ++++ b/tools/lvconvert.c +@@ -3189,6 +3189,7 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, + pool_segtype, target_attr, + lv->le_count, + &meta_extents, ++ metadata_lv, + &chunk_calc, + &chunk_size)) + goto_bad; +diff --git a/tools/lvcreate.c b/tools/lvcreate.c +index 1ee9e14..1ce561f 100644 +--- a/tools/lvcreate.c ++++ b/tools/lvcreate.c +@@ -403,6 +403,7 @@ static int _update_extents_params(struct volume_group *vg, + lp->segtype, lp->target_attr, + lp->extents, + &lp->pool_metadata_extents, ++ NULL, + &lp->thin_chunk_size_calc_policy, + &lp->chunk_size)) + return_0; diff --git a/SOURCES/lvm2-2_03_12-devs-remove-invalid-path-name-aliases.patch b/SOURCES/lvm2-2_03_12-devs-remove-invalid-path-name-aliases.patch new file mode 100644 index 0000000..0f00653 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-devs-remove-invalid-path-name-aliases.patch @@ -0,0 +1,255 @@ + lib/device/dev-cache.c | 161 ++++++++++++++++++++++++++++++++++++---------- + test/shell/dev-aliases.sh | 53 +++++++++++++++ + 2 files changed, 179 insertions(+), 35 deletions(-) + create mode 100644 test/shell/dev-aliases.sh + +diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c +index d5f18ff..8082efa 100644 +--- a/lib/device/dev-cache.c ++++ b/lib/device/dev-cache.c +@@ -1428,60 +1428,151 @@ struct device *dev_hash_get(const char *name) + return (struct device *) dm_hash_lookup(_cache.names, name); + } + ++static void _remove_alias(struct device *dev, const char *name) ++{ ++ struct dm_str_list *strl; ++ ++ dm_list_iterate_items(strl, &dev->aliases) { ++ if (!strcmp(strl->str, name)) { ++ dm_list_del(&strl->list); ++ return; ++ } ++ } ++} ++ ++/* ++ * Check that paths for this dev still refer to the same dev_t. This is known ++ * to drop invalid paths in the case where lvm deactivates an LV, which causes ++ * that LV path to go away, but that LV path is not removed from dev-cache (it ++ * probably should be). Later a new path to a different LV is added to ++ * dev-cache, where the new LV has the same major:minor as the previously ++ * deactivated LV. The new LV will find the existing struct dev, and that ++ * struct dev will have dev->aliases entries that refer to the name of the old ++ * deactivated LV. Those old paths are all invalid and are dropped here. ++ */ ++ ++static void _verify_aliases(struct device *dev, const char *newname) ++{ ++ struct dm_str_list *strl, *strl2; ++ struct stat st; ++ ++ dm_list_iterate_items_safe(strl, strl2, &dev->aliases) { ++ /* newname was just stat'd and added by caller */ ++ if (newname && !strcmp(strl->str, newname)) ++ continue; ++ ++ if (stat(strl->str, &st) || (st.st_rdev != dev->dev)) { ++ log_debug("Drop invalid path %s for %d:%d (new path %s).", ++ strl->str, (int)MAJOR(dev->dev), (int)MINOR(dev->dev), newname ?: ""); ++ dm_hash_remove(_cache.names, strl->str); ++ dm_list_del(&strl->list); ++ } ++ } ++} ++ + struct device *dev_cache_get(struct cmd_context *cmd, const char *name, struct dev_filter *f) + { +- struct stat buf; +- struct device *d = (struct device *) dm_hash_lookup(_cache.names, name); +- int info_available = 0; +- int ret = 1; ++ struct device *dev = (struct device *) dm_hash_lookup(_cache.names, name); ++ struct stat st; ++ int ret; + +- if (d && (d->flags & DEV_REGULAR)) +- return d; ++ /* ++ * DEV_REGULAR means that is "dev" is actually a file, not a device. ++ * FIXME: I don't think dev-cache is used for files any more and this ++ * can be dropped? ++ */ ++ if (dev && (dev->flags & DEV_REGULAR)) ++ return dev; ++ ++ /* ++ * The requested path is invalid, remove any dev-cache ++ * info for it. ++ */ ++ if (stat(name, &st)) { ++ if (dev) { ++ log_print("Device path %s is invalid for %d:%d %s.", ++ name, (int)MAJOR(dev->dev), (int)MINOR(dev->dev), dev_name(dev)); + +- /* If the entry's wrong, remove it */ +- if (stat(name, &buf) < 0) { +- if (d) + dm_hash_remove(_cache.names, name); +- log_sys_very_verbose("stat", name); +- d = NULL; +- } else +- info_available = 1; + +- if (d && (buf.st_rdev != d->dev)) { +- dm_hash_remove(_cache.names, name); +- d = NULL; +- } ++ _remove_alias(dev, name); + +- if (!d) { +- _insert(name, info_available ? &buf : NULL, 0, obtain_device_list_from_udev()); +- d = (struct device *) dm_hash_lookup(_cache.names, name); +- if (!d) { +- log_debug_devs("Device name not found in dev_cache repeat dev_cache_scan for %s", name); +- dev_cache_scan(); +- d = (struct device *) dm_hash_lookup(_cache.names, name); ++ /* Remove any other names in dev->aliases that are incorrect. */ ++ _verify_aliases(dev, NULL); + } ++ return NULL; + } + +- if (!d) ++ if (!S_ISBLK(st.st_mode)) { ++ log_debug("Not a block device %s.", name); + return NULL; ++ } + +- if (d && (d->flags & DEV_REGULAR)) +- return d; ++ /* ++ * dev-cache has incorrect info for the requested path. ++ * Remove incorrect info and then add new dev-cache entry. ++ */ ++ if (dev && (st.st_rdev != dev->dev)) { ++ log_print("Device path %s does not match %d:%d %s.", ++ name, (int)MAJOR(dev->dev), (int)MINOR(dev->dev), dev_name(dev)); ++ ++ dm_hash_remove(_cache.names, name); ++ ++ _remove_alias(dev, name); ++ ++ /* Remove any other names in dev->aliases that are incorrect. */ ++ _verify_aliases(dev, NULL); ++ ++ /* Add new dev-cache entry next. */ ++ dev = NULL; ++ } ++ ++ /* ++ * Either add a new struct dev for st_rdev and name, ++ * or add name as a new alias for an existing struct dev ++ * for st_rdev. ++ */ ++ if (!dev) { ++ _insert_dev(name, st.st_rdev); + +- if (f && !(d->flags & DEV_REGULAR)) { +- ret = f->passes_filter(cmd, f, d, NULL); ++ /* Get the struct dev that was just added. */ ++ dev = (struct device *) dm_hash_lookup(_cache.names, name); + +- if (ret == -EAGAIN) { +- log_debug_devs("get device by name defer filter %s", dev_name(d)); +- d->flags |= DEV_FILTER_AFTER_SCAN; +- ret = 1; ++ if (!dev) { ++ log_error("Failed to get device %s", name); ++ return NULL; + } ++ ++ _verify_aliases(dev, name); + } + +- if (f && !(d->flags & DEV_REGULAR) && !ret) ++ /* ++ * The caller passed a filter if they only want the dev if it ++ * passes filters. ++ */ ++ ++ if (!f) ++ return dev; ++ ++ ret = f->passes_filter(cmd, f, dev, NULL); ++ ++ /* ++ * This might happen if this function is called before ++ * filters can do i/o. I don't think this will happen ++ * any longer and this EAGAIN case can be removed. ++ */ ++ if (ret == -EAGAIN) { ++ log_debug_devs("dev_cache_get filter deferred %s", dev_name(dev)); ++ dev->flags |= DEV_FILTER_AFTER_SCAN; ++ ret = 1; ++ } ++ ++ if (!ret) { ++ log_debug_devs("dev_cache_get filter excludes %s", dev_name(dev)); + return NULL; ++ } + +- return d; ++ return dev; + } + + static struct device *_dev_cache_seek_devt(dev_t dev) +diff --git a/test/shell/dev-aliases.sh b/test/shell/dev-aliases.sh +new file mode 100644 +index 0000000..c97cd5d +--- /dev/null ++++ b/test/shell/dev-aliases.sh +@@ -0,0 +1,53 @@ ++#!/usr/bin/env bash ++ ++# Copyright (C) 2012 Red Hat, Inc. All rights reserved. ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions ++# of the GNU General Public License v.2. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++SKIP_WITH_LVMPOLLD=1 ++ ++. lib/inittest ++ ++aux prepare_devs 3 ++ ++vgcreate $vg $dev1 $dev2 $dev3 ++ ++# ++# This lvconvert command will deactivate LV1, then internally create a new ++# lv, lvol0, as a poolmetadataspare, then activate lvol0 to zero it. ++# lvol0 will get the same major:minor that LV1 had. When the code gets ++# the struct dev for lvol0, the new path to lvol0 is added to the ++# dev-cache with it's major:minor. That major:minor already exists in ++# dev-cache and has the stale LV1 as an alias. So the path to lvol0 is ++# added as an alias to the existing struct dev (with the correct ++# major:minor), but that struct dev has the stale LV1 path on its aliases ++# list. The code will now validate all the aliases before returning the ++# dev for lvol0, and will find that the LV1 path is stale and remove it ++# from the aliases. That will prevent the stale path from being used for ++# the dev in place of the new path. ++# ++# The preferred_name is set to /dev/mapper so that if the stale path still ++# exists, that stale path would be used as the name for the dev, and the ++# wiping code would fail to open that stale name. ++# ++ ++lvcreate -n $lv1 -L32M $vg $dev1 ++lvcreate -n $lv2 -L16M $vg $dev2 ++lvconvert -y --type cache-pool --poolmetadata $lv2 --cachemode writeback $vg/$lv1 --config='devices { preferred_names=["/dev/mapper/"] }' ++lvremove -y $vg/$lv1 ++ ++lvcreate -n $lv1 -L32M $vg $dev1 ++lvcreate -n $lv2 -L16M $vg $dev2 ++lvconvert -y --type cache-pool --poolmetadata $lv2 $vg/$lv1 ++lvremove -y $vg/$lv1 ++ ++# TODO: add more validation of dev aliases being specified as command ++# args in combination with various preferred_names settings. ++ ++vgremove -ff $vg diff --git a/SOURCES/lvm2-2_03_12-filter-mpath-work-with-nvme-devices.patch b/SOURCES/lvm2-2_03_12-filter-mpath-work-with-nvme-devices.patch new file mode 100644 index 0000000..3778134 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-filter-mpath-work-with-nvme-devices.patch @@ -0,0 +1,494 @@ + lib/device/dev-type.c | 81 +++++++++++++++++++---- + lib/device/dev-type.h | 2 + + lib/device/device.h | 1 + + lib/filters/filter-mpath.c | 156 ++++++++++++++++++++++++++++++--------------- + 4 files changed, 177 insertions(+), 63 deletions(-) + +diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c +index 896821d..379afa8 100644 +--- a/lib/device/dev-type.c ++++ b/lib/device/dev-type.c +@@ -21,6 +21,7 @@ + #include "lib/metadata/metadata.h" + #include "lib/device/bcache.h" + #include "lib/label/label.h" ++#include "lib/commands/toolcontext.h" + + #ifdef BLKID_WIPING_SUPPORT + #include +@@ -67,6 +68,31 @@ int dev_is_pmem(struct device *dev) + return is_pmem ? 1 : 0; + } + ++/* ++ * An nvme device has major number 259 (BLKEXT), minor number , ++ * and reading /sys/dev/block/259:/device/dev shows a character ++ * device cmajor:cminor where cmajor matches the major number of the ++ * nvme character device entry in /proc/devices. Checking all of that ++ * is excessive and unnecessary compared to just comparing /dev/name*. ++ */ ++ ++int dev_is_nvme(struct dev_types *dt, struct device *dev) ++{ ++ struct dm_str_list *strl; ++ ++ if (dev->flags & DEV_IS_NVME) ++ return 1; ++ ++ dm_list_iterate_items(strl, &dev->aliases) { ++ if (!strncmp(strl->str, "/dev/nvme", 9)) { ++ log_debug("Found nvme device %s", dev_name(dev)); ++ dev->flags |= DEV_IS_NVME; ++ return 1; ++ } ++ } ++ return 0; ++} ++ + int dev_is_lv(struct device *dev) + { + FILE *fp; +@@ -302,6 +328,9 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev) + + const char *dev_subsystem_name(struct dev_types *dt, struct device *dev) + { ++ if (dev->flags & DEV_IS_NVME) ++ return "NVME"; ++ + if (MAJOR(dev->dev) == dt->device_mapper_major) + return "DM"; + +@@ -348,7 +377,6 @@ int major_is_scsi_device(struct dev_types *dt, int major) + return (dt->dev_type_array[major].flags & PARTITION_SCSI_DEVICE) ? 1 : 0; + } + +- + static int _loop_is_with_partscan(struct device *dev) + { + FILE *fp; +@@ -398,6 +426,28 @@ struct partition { + uint32_t nr_sects; + } __attribute__((packed)); + ++static int _has_sys_partition(struct device *dev) ++{ ++ char path[PATH_MAX]; ++ struct stat info; ++ int major = (int) MAJOR(dev->dev); ++ int minor = (int) MINOR(dev->dev); ++ ++ /* check if dev is a partition */ ++ if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d/partition", ++ dm_sysfs_dir(), major, minor) < 0) { ++ log_error("dm_snprintf partition failed"); ++ return 0; ++ } ++ ++ if (stat(path, &info) == -1) { ++ if (errno != ENOENT) ++ log_sys_error("stat", path); ++ return 0; ++ } ++ return 1; ++} ++ + static int _is_partitionable(struct dev_types *dt, struct device *dev) + { + int parts = major_max_partitions(dt, MAJOR(dev->dev)); +@@ -414,6 +464,13 @@ static int _is_partitionable(struct dev_types *dt, struct device *dev) + _loop_is_with_partscan(dev)) + return 1; + ++ if (dev_is_nvme(dt, dev)) { ++ /* If this dev is already a partition then it's not partitionable. */ ++ if (_has_sys_partition(dev)) ++ return 0; ++ return 1; ++ } ++ + if ((parts <= 1) || (MINOR(dev->dev) % parts)) + return 0; + +@@ -557,11 +614,18 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result) + char path[PATH_MAX]; + char temp_path[PATH_MAX]; + char buffer[64]; +- struct stat info; + FILE *fp = NULL; + int parts, residue, size, ret = 0; + + /* ++ * /dev/nvme devs don't use the major:minor numbering like ++ * block dev types that have their own major number, so ++ * the calculation based on minor number doesn't work. ++ */ ++ if (dev_is_nvme(dt, dev)) ++ goto sys_partition; ++ ++ /* + * Try to get the primary dev out of the + * list of known device types first. + */ +@@ -576,23 +640,14 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result) + goto out; + } + ++ sys_partition: + /* + * If we can't get the primary dev out of the list of known device + * types, try to look at sysfs directly then. This is more complex + * way and it also requires certain sysfs layout to be present + * which might not be there in old kernels! + */ +- +- /* check if dev is a partition */ +- if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d/partition", +- sysfs_dir, major, minor) < 0) { +- log_error("dm_snprintf partition failed"); +- goto out; +- } +- +- if (stat(path, &info) == -1) { +- if (errno != ENOENT) +- log_sys_error("stat", path); ++ if (!_has_sys_partition(dev)) { + *result = dev->dev; + ret = 1; + goto out; /* dev is not a partition! */ +diff --git a/lib/device/dev-type.h b/lib/device/dev-type.h +index fdf7791..8b94b79 100644 +--- a/lib/device/dev-type.h ++++ b/lib/device/dev-type.h +@@ -95,6 +95,8 @@ int dev_is_rotational(struct dev_types *dt, struct device *dev); + + int dev_is_pmem(struct device *dev); + ++int dev_is_nvme(struct dev_types *dt, struct device *dev); ++ + int dev_is_lv(struct device *dev); + + int get_fs_block_size(struct device *dev, uint32_t *fs_block_size); +diff --git a/lib/device/device.h b/lib/device/device.h +index a58bff8..816db31 100644 +--- a/lib/device/device.h ++++ b/lib/device/device.h +@@ -38,6 +38,7 @@ + #define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */ + #define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */ + #define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */ ++#define DEV_IS_NVME 0x00080000 /* set if dev is nvme */ + + /* + * Support for external device info. +diff --git a/lib/filters/filter-mpath.c b/lib/filters/filter-mpath.c +index 85d1625..40e7df6 100644 +--- a/lib/filters/filter-mpath.c ++++ b/lib/filters/filter-mpath.c +@@ -16,6 +16,7 @@ + #include "lib/misc/lib.h" + #include "lib/filters/filter.h" + #include "lib/activate/activate.h" ++#include "lib/commands/toolcontext.h" + #ifdef UDEV_SYNC_SUPPORT + #include + #include "lib/device/dev-ext-udev-constants.h" +@@ -27,7 +28,6 @@ + + #define MPATH_PREFIX "mpath-" + +- + struct mpath_priv { + struct dm_pool *mem; + struct dev_filter f; +@@ -35,6 +35,9 @@ struct mpath_priv { + struct dm_hash_table *hash; + }; + ++/* ++ * given "/dev/foo" return "foo" ++ */ + static const char *_get_sysfs_name(struct device *dev) + { + const char *name; +@@ -53,6 +56,11 @@ static const char *_get_sysfs_name(struct device *dev) + return name; + } + ++/* ++ * given major:minor ++ * readlink translates /sys/dev/block/major:minor to /sys/.../foo ++ * from /sys/.../foo return "foo" ++ */ + static const char *_get_sysfs_name_by_devt(const char *sysfs_dir, dev_t devno, + char *buf, size_t buf_size) + { +@@ -102,27 +110,28 @@ static int _get_sysfs_string(const char *path, char *buffer, int max_size) + return r; + } + +-static int _get_sysfs_get_major_minor(const char *sysfs_dir, const char *kname, int *major, int *minor) ++static int _get_sysfs_dm_mpath(struct dev_types *dt, const char *sysfs_dir, const char *holder_name) + { +- char path[PATH_MAX], buffer[64]; ++ char path[PATH_MAX]; ++ char buffer[128]; + +- if (dm_snprintf(path, sizeof(path), "%s/block/%s/dev", sysfs_dir, kname) < 0) { ++ if (dm_snprintf(path, sizeof(path), "%sblock/%s/dm/uuid", sysfs_dir, holder_name) < 0) { + log_error("Sysfs path string is too long."); + return 0; + } + ++ buffer[0] = '\0'; ++ + if (!_get_sysfs_string(path, buffer, sizeof(buffer))) + return_0; + +- if (sscanf(buffer, "%d:%d", major, minor) != 2) { +- log_error("Failed to parse major minor from %s", buffer); +- return 0; +- } ++ if (!strncmp(buffer, MPATH_PREFIX, 6)) ++ return 1; + +- return 1; ++ return 0; + } + +-static int _get_parent_mpath(const char *dir, char *name, int max_size) ++static int _get_holder_name(const char *dir, char *name, int max_size) + { + struct dirent *d; + DIR *dr; +@@ -155,7 +164,7 @@ static int _get_parent_mpath(const char *dir, char *name, int max_size) + } + + #ifdef UDEV_SYNC_SUPPORT +-static int _udev_dev_is_mpath(struct device *dev) ++static int _udev_dev_is_mpath_component(struct device *dev) + { + const char *value; + struct dev_ext *ext; +@@ -174,95 +183,148 @@ static int _udev_dev_is_mpath(struct device *dev) + return 0; + } + #else +-static int _udev_dev_is_mpath(struct device *dev) ++static int _udev_dev_is_mpath_component(struct device *dev) + { + return 0; + } + #endif + +-static int _native_dev_is_mpath(struct dev_filter *f, struct device *dev) ++static int _native_dev_is_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev) + { + struct mpath_priv *mp = (struct mpath_priv *) f->private; + struct dev_types *dt = mp->dt; +- const char *part_name, *name; +- struct stat info; +- char path[PATH_MAX], parent_name[PATH_MAX]; ++ const char *part_name; ++ const char *name; /* e.g. "sda" for "/dev/sda" */ ++ char link_path[PATH_MAX]; /* some obscure, unpredictable sysfs path */ ++ char holders_path[PATH_MAX]; /* e.g. "/sys/block/sda/holders/" */ ++ char dm_dev_path[PATH_MAX]; /* e.g. "/dev/dm-1" */ ++ char holder_name[128] = { 0 }; /* e.g. "dm-1" */ + const char *sysfs_dir = dm_sysfs_dir(); +- int major = MAJOR(dev->dev); +- int minor = MINOR(dev->dev); ++ int dev_major = MAJOR(dev->dev); ++ int dev_minor = MINOR(dev->dev); ++ int dm_dev_major; ++ int dm_dev_minor; ++ struct stat info; + dev_t primary_dev; + long look; + +- /* Limit this filter only to SCSI devices */ +- if (!major_is_scsi_device(dt, MAJOR(dev->dev))) ++ /* Limit this filter to SCSI or NVME devices */ ++ if (!major_is_scsi_device(dt, dev_major) && !dev_is_nvme(dt, dev)) + return 0; + + switch (dev_get_primary_dev(dt, dev, &primary_dev)) { ++ + case 2: /* The dev is partition. */ + part_name = dev_name(dev); /* name of original dev for log_debug msg */ +- if (!(name = _get_sysfs_name_by_devt(sysfs_dir, primary_dev, parent_name, sizeof(parent_name)))) ++ ++ /* gets "foo" for "/dev/foo" where "/dev/foo" comes from major:minor */ ++ if (!(name = _get_sysfs_name_by_devt(sysfs_dir, primary_dev, link_path, sizeof(link_path)))) + return_0; ++ + log_debug_devs("%s: Device is a partition, using primary " + "device %s for mpath component detection", + part_name, name); + break; ++ + case 1: /* The dev is already a primary dev. Just continue with the dev. */ ++ ++ /* gets "foo" for "/dev/foo" */ + if (!(name = _get_sysfs_name(dev))) + return_0; + break; ++ + default: /* 0, error. */ +- log_warn("Failed to get primary device for %d:%d.", major, minor); ++ log_warn("Failed to get primary device for %d:%d.", dev_major, dev_minor); + return 0; + } + +- if (dm_snprintf(path, sizeof(path), "%s/block/%s/holders", sysfs_dir, name) < 0) { ++ if (dm_snprintf(holders_path, sizeof(holders_path), "%sblock/%s/holders", sysfs_dir, name) < 0) { + log_warn("Sysfs path to check mpath is too long."); + return 0; + } + + /* also will filter out partitions */ +- if (stat(path, &info)) ++ if (stat(holders_path, &info)) + return 0; + + if (!S_ISDIR(info.st_mode)) { +- log_warn("Path %s is not a directory.", path); ++ log_warn("Path %s is not a directory.", holders_path); + return 0; + } + +- if (!_get_parent_mpath(path, parent_name, sizeof(parent_name))) ++ /* ++ * If holders dir contains an entry such as "dm-1", then this sets ++ * holder_name to "dm-1". ++ * ++ * If holders dir is empty, return 0 (this is generally where ++ * devs that are not mpath components return.) ++ */ ++ if (!_get_holder_name(holders_path, holder_name, sizeof(holder_name))) + return 0; + +- if (!_get_sysfs_get_major_minor(sysfs_dir, parent_name, &major, &minor)) +- return_0; ++ if (dm_snprintf(dm_dev_path, sizeof(dm_dev_path), "%s/%s", cmd->dev_dir, holder_name) < 0) { ++ log_warn("dm device path to check mpath is too long."); ++ return 0; ++ } + +- if (major != dt->device_mapper_major) ++ /* ++ * stat "/dev/dm-1" which is the holder of the dev we're checking ++ * dm_dev_major:dm_dev_minor come from stat("/dev/dm-1") ++ */ ++ if (stat(dm_dev_path, &info)) { ++ log_debug("filter-mpath %s holder %s stat result %d", ++ dev_name(dev), dm_dev_path, errno); + return 0; ++ } ++ dm_dev_major = (int)MAJOR(info.st_rdev); ++ dm_dev_minor = (int)MINOR(info.st_rdev); ++ ++ if (dm_dev_major != dt->device_mapper_major) { ++ log_debug_devs("filter-mpath %s holder %s %d:%d does not have dm major", ++ dev_name(dev), dm_dev_path, dm_dev_major, dm_dev_minor); ++ return 0; ++ } + +- /* Avoid repeated detection of multipath device and use first checked result */ +- look = (long) dm_hash_lookup_binary(mp->hash, &minor, sizeof(minor)); ++ /* ++ * Save the result of checking that "/dev/dm-1" is an mpath device ++ * to avoid repeating it for each path component. ++ * The minor number of "/dev/dm-1" is added to the hash table with ++ * const value 2 meaning that dm minor 1 (for /dev/dm-1) is a multipath dev ++ * and const value 1 meaning that dm minor 1 is not a multipath dev. ++ */ ++ look = (long) dm_hash_lookup_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor)); + if (look > 0) { +- log_debug_devs("%s(%u:%u): already checked as %sbeing mpath.", +- parent_name, major, minor, (look > 1) ? "" : "not "); ++ log_debug_devs("filter-mpath %s holder %s %u:%u already checked as %sbeing mpath.", ++ dev_name(dev), holder_name, dm_dev_major, dm_dev_minor, (look > 1) ? "" : "not "); + return (look > 1) ? 1 : 0; + } + +- if (lvm_dm_prefix_check(major, minor, MPATH_PREFIX)) { +- (void) dm_hash_insert_binary(mp->hash, &minor, sizeof(minor), (void*)2); ++ /* ++ * Returns 1 if /sys/block//dm/uuid indicates that ++ * is a dm device with dm uuid prefix mpath-. ++ * When true, will be something like "dm-1". ++ * ++ * (Is a hash table worth it to avoid reading one sysfs file?) ++ */ ++ if (_get_sysfs_dm_mpath(dt, sysfs_dir, holder_name)) { ++ log_debug_devs("filter-mpath %s holder %s %u:%u ignore mpath component", ++ dev_name(dev), holder_name, dm_dev_major, dm_dev_minor); ++ (void) dm_hash_insert_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor), (void*)2); + return 1; + } + +- (void) dm_hash_insert_binary(mp->hash, &minor, sizeof(minor), (void*)1); ++ (void) dm_hash_insert_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor), (void*)1); + + return 0; + } + +-static int _dev_is_mpath(struct dev_filter *f, struct device *dev) ++static int _dev_is_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev) + { + if (dev->ext.src == DEV_EXT_NONE) +- return _native_dev_is_mpath(f, dev); ++ return _native_dev_is_mpath_component(cmd, f, dev); + + if (dev->ext.src == DEV_EXT_UDEV) +- return _udev_dev_is_mpath(dev); ++ return _udev_dev_is_mpath_component(dev); + + log_error(INTERNAL_ERROR "Missing hook for mpath recognition " + "using external device info source %s", dev_ext_name(dev)); +@@ -272,11 +334,11 @@ static int _dev_is_mpath(struct dev_filter *f, struct device *dev) + + #define MSG_SKIPPING "%s: Skipping mpath component device" + +-static int _ignore_mpath(struct cmd_context *cmd, struct dev_filter *f, struct device *dev, const char *use_filter_name) ++static int _ignore_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev, const char *use_filter_name) + { + dev->filtered_flags &= ~DEV_FILTERED_MPATH_COMPONENT; + +- if (_dev_is_mpath(f, dev) == 1) { ++ if (_dev_is_mpath_component(cmd, f, dev) == 1) { + if (dev->ext.src == DEV_EXT_NONE) + log_debug_devs(MSG_SKIPPING, dev_name(dev)); + else +@@ -303,8 +365,8 @@ static void _destroy(struct dev_filter *f) + struct dev_filter *mpath_filter_create(struct dev_types *dt) + { + const char *sysfs_dir = dm_sysfs_dir(); +- struct dm_pool *mem; + struct mpath_priv *mp; ++ struct dm_pool *mem; + struct dm_hash_table *hash; + + if (!*sysfs_dir) { +@@ -328,19 +390,13 @@ struct dev_filter *mpath_filter_create(struct dev_types *dt) + goto bad; + } + +- if (!(mp = dm_pool_zalloc(mem, sizeof(*mp)))) { +- log_error("mpath filter allocation failed."); +- goto bad; +- } +- +- mp->f.passes_filter = _ignore_mpath; ++ mp->f.passes_filter = _ignore_mpath_component; + mp->f.destroy = _destroy; + mp->f.use_count = 0; + mp->f.private = mp; + mp->f.name = "mpath"; +- +- mp->mem = mem; + mp->dt = dt; ++ mp->mem = mem; + mp->hash = hash; + + log_debug_devs("mpath filter initialised."); diff --git a/SOURCES/lvm2-2_03_12-integrity-fix-segfault-on-error-path-when-replacing-.patch b/SOURCES/lvm2-2_03_12-integrity-fix-segfault-on-error-path-when-replacing-.patch new file mode 100644 index 0000000..d4ece0d --- /dev/null +++ b/SOURCES/lvm2-2_03_12-integrity-fix-segfault-on-error-path-when-replacing-.patch @@ -0,0 +1,24 @@ + lib/metadata/integrity_manip.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/lib/metadata/integrity_manip.c b/lib/metadata/integrity_manip.c +index 53ab1b3..abf90d8 100644 +--- a/lib/metadata/integrity_manip.c ++++ b/lib/metadata/integrity_manip.c +@@ -773,9 +773,13 @@ int lv_add_integrity_to_raid(struct logical_volume *lv, struct integrity_setting + bad: + log_error("Failed to add integrity."); + +- for (s = 0; s < revert_meta_lvs; s++) { +- if (!lv_remove(imeta_lvs[s])) +- log_error("New integrity metadata LV may require manual removal."); ++ if (revert_meta_lvs) { ++ for (s = 0; s < DEFAULT_RAID_MAX_IMAGES; s++) { ++ if (!imeta_lvs[s]) ++ continue; ++ if (!lv_remove(imeta_lvs[s])) ++ log_error("New integrity metadata LV may require manual removal."); ++ } + } + + if (!vg_write(vg) || !vg_commit(vg)) diff --git a/SOURCES/lvm2-2_03_12-label_scan-fix-missing-free-of-filtered_devs.patch b/SOURCES/lvm2-2_03_12-label_scan-fix-missing-free-of-filtered_devs.patch new file mode 100644 index 0000000..b5bccfb --- /dev/null +++ b/SOURCES/lvm2-2_03_12-label_scan-fix-missing-free-of-filtered_devs.patch @@ -0,0 +1,19 @@ + lib/label/label.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/lib/label/label.c b/lib/label/label.c +index e067a6b..e6dd4a1 100644 +--- a/lib/label/label.c ++++ b/lib/label/label.c +@@ -1243,6 +1243,11 @@ int label_scan(struct cmd_context *cmd) + free(devl); + } + ++ dm_list_iterate_items_safe(devl, devl2, &filtered_devs) { ++ dm_list_del(&devl->list); ++ free(devl); ++ } ++ + /* + * If hints were not available/usable, then we scanned all devs, + * and we now know which are PVs. Save this list of PVs we've diff --git a/SOURCES/lvm2-2_03_12-lvcreate-use-lv_passes_readonly_filter.patch b/SOURCES/lvm2-2_03_12-lvcreate-use-lv_passes_readonly_filter.patch new file mode 100644 index 0000000..bb28cd7 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-lvcreate-use-lv_passes_readonly_filter.patch @@ -0,0 +1,66 @@ + WHATS_NEW | 4 ++++ + lib/activate/activate.c | 5 +++++ + lib/activate/activate.h | 2 ++ + lib/metadata/lv_manip.c | 6 ++++++ + 4 files changed, 17 insertions(+) + +diff --git a/WHATS_NEW b/WHATS_NEW +index 3953c7e..c8f869c 100644 +--- a/WHATS_NEW ++++ b/WHATS_NEW +@@ -1,5 +1,9 @@ + Version 2.03.12 - + =================================== ++ Check if lvcreate passes read_only_volume_list with tags and skips zeroing. ++ Limit pool metadata spare to 16GiB. ++ Improves conversion and allocation of pool metadata. ++ Support thin pool metadata 15.88GiB, adds 64MiB, thin_pool_crop_metadata=0. + Fix problem with wiping of converted LVs. + Fix memleak in scanning (2.03.11). + Fix corner case allocation for thin-pools. +diff --git a/lib/activate/activate.c b/lib/activate/activate.c +index 7ed6441..de866fb 100644 +--- a/lib/activate/activate.c ++++ b/lib/activate/activate.c +@@ -466,6 +466,11 @@ static int _passes_readonly_filter(struct cmd_context *cmd, + return _lv_passes_volumes_filter(cmd, lv, cn, activation_read_only_volume_list_CFG); + } + ++int lv_passes_readonly_filter(const struct logical_volume *lv) ++{ ++ return _passes_readonly_filter(lv->vg->cmd, lv); ++} ++ + int library_version(char *version, size_t size) + { + if (!activation()) +diff --git a/lib/activate/activate.h b/lib/activate/activate.h +index 3f4d128..53c8631 100644 +--- a/lib/activate/activate.h ++++ b/lib/activate/activate.h +@@ -208,6 +208,8 @@ int lvs_in_vg_opened(const struct volume_group *vg); + + int lv_is_active(const struct logical_volume *lv); + ++int lv_passes_readonly_filter(const struct logical_volume *lv); ++ + /* Check is any component LV is active */ + const struct logical_volume *lv_component_is_active(const struct logical_volume *lv); + const struct logical_volume *lv_holder_is_active(const struct logical_volume *lv); +diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c +index 445c4ad..5ff64a3 100644 +--- a/lib/metadata/lv_manip.c ++++ b/lib/metadata/lv_manip.c +@@ -7976,6 +7976,12 @@ static int _should_wipe_lv(struct lvcreate_params *lp, + first_seg(first_seg(lv)->pool_lv)->zero_new_blocks)) + return 0; + ++ if (warn && (lv_passes_readonly_filter(lv))) { ++ log_warn("WARNING: Read-only activated logical volume %s not zeroed.", ++ display_lvname(lv)); ++ return 0; ++ } ++ + /* Cannot zero read-only volume */ + if ((lv->status & LVM_WRITE) && + (lp->zero || lp->wipe_signatures)) diff --git a/SOURCES/lvm2-2_03_12-lvmlockd-sscanf-buffer-size-warnings.patch b/SOURCES/lvm2-2_03_12-lvmlockd-sscanf-buffer-size-warnings.patch new file mode 100644 index 0000000..9bc2f48 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-lvmlockd-sscanf-buffer-size-warnings.patch @@ -0,0 +1,29 @@ + daemons/lvmlockd/lvmlockd-core.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c +index fea8ee6..c4abf66 100644 +--- a/daemons/lvmlockd/lvmlockd-core.c ++++ b/daemons/lvmlockd/lvmlockd-core.c +@@ -896,8 +896,9 @@ static int read_adopt_file(struct list_head *vg_lockd) + goto fail; + + memset(vg_uuid, 0, sizeof(vg_uuid)); ++ memset(lm_type_str, 0, sizeof(lm_type_str)); + +- if (sscanf(adopt_line, "VG: %63s %64s %16s %64s", ++ if (sscanf(adopt_line, "VG: %63s %64s %15s %64s", + vg_uuid, ls->vg_name, lm_type_str, ls->vg_args) != 4) { + goto fail; + } +@@ -916,8 +917,9 @@ static int read_adopt_file(struct list_head *vg_lockd) + r->type = LD_RT_LV; + + memset(vg_uuid, 0, sizeof(vg_uuid)); ++ memset(mode, 0, sizeof(mode)); + +- if (sscanf(adopt_line, "LV: %64s %64s %s %8s %u", ++ if (sscanf(adopt_line, "LV: %64s %64s %s %7s %u", + vg_uuid, r->name, r->lv_args, mode, &r->version) != 5) { + goto fail; + } diff --git a/SOURCES/lvm2-2_03_12-make-generate.patch b/SOURCES/lvm2-2_03_12-make-generate.patch new file mode 100644 index 0000000..2f4f682 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-make-generate.patch @@ -0,0 +1,18 @@ + man/lvconvert.8_pregen | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/man/lvconvert.8_pregen b/man/lvconvert.8_pregen +index a47ccac..170eec8 100644 +--- a/man/lvconvert.8_pregen ++++ b/man/lvconvert.8_pregen +@@ -772,6 +772,10 @@ Add a cache to an LV, using a specified cache device. + .br + .RS 4 + .ad l ++[ \fB-c\fP|\fB--chunksize\fP \fISize\fP[k|UNIT] ] ++.ad b ++.br ++.ad l + [ \fB--cachesize\fP \fISize\fP[m|UNIT] ] + .ad b + .br diff --git a/SOURCES/lvm2-2_03_12-man-update-lvmthin.patch b/SOURCES/lvm2-2_03_12-man-update-lvmthin.patch new file mode 100644 index 0000000..f61660f --- /dev/null +++ b/SOURCES/lvm2-2_03_12-man-update-lvmthin.patch @@ -0,0 +1,86 @@ + man/lvmthin.7_main | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/man/lvmthin.7_main b/man/lvmthin.7_main +index ce23431..e6f1d63 100644 +--- a/man/lvmthin.7_main ++++ b/man/lvmthin.7_main +@@ -394,7 +394,7 @@ the pmspare LV. + \& + + If thin pool metadata is damaged, it may be repairable. +-Checking and repairing thin pool metadata is analagous to ++Checking and repairing thin pool metadata is analogous to + running fsck/repair on a file system. + + When a thin pool LV is activated, lvm runs the thin_check command +@@ -437,14 +437,24 @@ copy to the VG's pmspare LV. + If step 1 is successful, the thin pool metadata LV is replaced + with the pmspare LV containing the corrected metadata. + The previous thin pool metadata LV, containing the damaged metadata, +-becomes visible with the new name ThinPoolLV_tmetaN (where N is 0,1,...). +- +-If the repair works, the thin pool LV and its thin LVs can be activated, +-and the LV containing the damaged thin pool metadata can be removed. +-It may be useful to move the new metadata LV (previously pmspare) to a +-better PV. +- +-If the repair does not work, the thin pool LV and its thin LVs are lost. ++becomes visible with the new name ThinPoolLV_metaN (where N is 0,1,...). ++ ++If the repair works, the thin pool LV and its thin LVs can be activated. ++User should manually check if repaired thin pool kernel metadata ++has all data for all lvm2 known LVs by individual activation of ++every thin LV. When all works, user should continue with fsck of ++all filesystems present these such volumes. ++Once the thin pool is considered fully functional user may remove ThinPoolLV_metaN ++(the LV containing the damaged thin pool metadata) for possible ++space reuse. ++For a better performance it may be useful to pvmove the new repaired metadata LV ++(written to previous pmspare volume) to a better PV (i.e. SSD) ++ ++If the repair operation fails, the thin pool LV and its thin LVs ++are not accessible and it may be necessary to restore their content ++from a backup. In such case the content of unmodified original damaged ++ThinPoolLV_metaN volume can be used by your support for more ++advanced recovery methods. + + If metadata is manually restored with thin_repair directly, + the pool metadata LV can be manually swapped with another LV +@@ -452,6 +462,9 @@ containing new metadata: + + .B lvconvert --thinpool VG/ThinPoolLV --poolmetadata VG/NewThinMetaLV + ++Note: Thin pool metadata is compact so even small corruptions ++in them may result in significant portions of mappings to be lost. ++It is recommended to use fast resilient storage for them. + + .SS Activation of thin snapshots + +@@ -549,7 +562,7 @@ Command to extend thin pool data space: + .fi + + Other methods of increasing free data space in a thin pool LV +-include removing a thin LV and its related snapsots, or running ++include removing a thin LV and its related snapshots, or running + fstrim on the file system using a thin LV. + + +@@ -689,7 +702,7 @@ with two configuration settings: + .B thin_pool_autoextend_threshold + .br + is a percentage full value that defines when the thin pool LV should be +-extended. Setting this to 100 disables automatic extention. The minimum ++extended. Setting this to 100 disables automatic extension. The minimum + value is 50. + + .BR lvm.conf (5) +@@ -716,7 +729,7 @@ the --ignoremonitoring option can be used. With this option, the command + will not ask dmeventd to monitor the thin pool LV. + + .IP \[bu] +-Setting thin_pool_autoextend_threshould to 100 disables automatic ++Setting thin_pool_autoextend_threshold to 100 disables automatic + extension of thin pool LVs, even if they are being monitored by dmeventd. + + .P diff --git a/SOURCES/lvm2-2_03_12-pool-limit-pmspare-to-16GiB.patch b/SOURCES/lvm2-2_03_12-pool-limit-pmspare-to-16GiB.patch new file mode 100644 index 0000000..b41b370 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-pool-limit-pmspare-to-16GiB.patch @@ -0,0 +1,39 @@ + lib/metadata/pool_manip.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/lib/metadata/pool_manip.c b/lib/metadata/pool_manip.c +index b67882e..1975cb4 100644 +--- a/lib/metadata/pool_manip.c ++++ b/lib/metadata/pool_manip.c +@@ -697,6 +697,8 @@ static struct logical_volume *_alloc_pool_metadata_spare(struct volume_group *vg + int handle_pool_metadata_spare(struct volume_group *vg, uint32_t extents, + struct dm_list *pvh, int poolmetadataspare) + { ++ /* Max usable size of any spare volume is currently 16GiB rouned to extent size */ ++ const uint64_t MAX_SIZE = (UINT64_C(2 * 16) * 1024 * 1024 + vg->extent_size - 1) / vg->extent_size; + struct logical_volume *lv = vg->pool_metadata_spare_lv; + uint32_t seg_mirrors; + struct lv_segment *seg; +@@ -706,8 +708,11 @@ int handle_pool_metadata_spare(struct volume_group *vg, uint32_t extents, + /* Find maximal size of metadata LV */ + dm_list_iterate_items(lvl, &vg->lvs) + if (lv_is_pool_metadata(lvl->lv) && +- (lvl->lv->le_count > extents)) ++ (lvl->lv->le_count > extents)) { + extents = lvl->lv->le_count; ++ if (extents >= MAX_SIZE) ++ break; ++ } + + if (!poolmetadataspare) { + /* TODO: Not showing when lvm.conf would define 'n' ? */ +@@ -718,6 +723,9 @@ int handle_pool_metadata_spare(struct volume_group *vg, uint32_t extents, + return 1; + } + ++ if (extents > MAX_SIZE) ++ extents = MAX_SIZE; ++ + if (!lv) { + if (!_alloc_pool_metadata_spare(vg, extents, pvh)) + return_0; diff --git a/SOURCES/lvm2-2_03_12-pvck-fix-warning-and-exit-code-for-non-4k-mda1-offse.patch b/SOURCES/lvm2-2_03_12-pvck-fix-warning-and-exit-code-for-non-4k-mda1-offse.patch new file mode 100644 index 0000000..63020bd --- /dev/null +++ b/SOURCES/lvm2-2_03_12-pvck-fix-warning-and-exit-code-for-non-4k-mda1-offse.patch @@ -0,0 +1,24 @@ + tools/pvck.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/tools/pvck.c b/tools/pvck.c +index c36e182..88350de 100644 +--- a/tools/pvck.c ++++ b/tools/pvck.c +@@ -1140,9 +1140,13 @@ static int _dump_label_and_pv_header(struct cmd_context *cmd, uint64_t labelsect + *mda1_offset = xlate64(dlocn->offset); + *mda1_size = xlate64(dlocn->size); + +- if (*mda1_offset != 4096) { +- log_print("CHECK: pv_header.disk_locn[%d].offset expected 4096 # for first mda", di); +- bad++; ++ /* ++ * mda1 offset is page size from machine that created it, ++ * warn if it's not one of the expected page sizes. ++ */ ++ if ((*mda1_offset != 4096) && (*mda1_offset != 8192) && (*mda1_offset != 65536)) { ++ log_print("WARNING: pv_header.disk_locn[%d].offset %llu is unexpected # for first mda", ++ di, (unsigned long long)*mda1_offset); + } + } else { + *mda2_offset = xlate64(dlocn->offset); diff --git a/SOURCES/lvm2-2_03_12-test-check-read_only_volume_list-tagging-works.patch b/SOURCES/lvm2-2_03_12-test-check-read_only_volume_list-tagging-works.patch new file mode 100644 index 0000000..bdcc15b --- /dev/null +++ b/SOURCES/lvm2-2_03_12-test-check-read_only_volume_list-tagging-works.patch @@ -0,0 +1,19 @@ + test/shell/tags.sh | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/test/shell/tags.sh b/test/shell/tags.sh +index fd1b332..5b636a8 100644 +--- a/test/shell/tags.sh ++++ b/test/shell/tags.sh +@@ -52,6 +52,11 @@ check lv_field @firstlvtag1 tags "firstlvtag1" + not check lv_field @secondlvtag1 tags "firstlvtag1" + check lv_field $vg1/$lv2 tags "secondlvtag1" + not check lv_field $vg1/$lv1 tags "secondlvtag1" ++ ++# LV is not zeroed when tag matches read only volume list ++lvcreate -l1 $vg1 --addtag "RO" --config "activation/read_only_volume_list = [ \"@RO\" ]" 2>&1 | tee out ++grep "not zeroed" out ++ + vgremove -f $vg1 + + # lvchange with --addtag and --deltag diff --git a/SOURCES/lvm2-2_03_12-tests-check-16G-thin-pool-metadata-size.patch b/SOURCES/lvm2-2_03_12-tests-check-16G-thin-pool-metadata-size.patch new file mode 100644 index 0000000..ae05b76 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-tests-check-16G-thin-pool-metadata-size.patch @@ -0,0 +1,98 @@ + test/shell/thin-16g.sh | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 88 insertions(+) + create mode 100644 test/shell/thin-16g.sh + +diff --git a/test/shell/thin-16g.sh b/test/shell/thin-16g.sh +new file mode 100644 +index 0000000..ee7e22e +--- /dev/null ++++ b/test/shell/thin-16g.sh +@@ -0,0 +1,88 @@ ++#!/usr/bin/env bash ++ ++# Copyright (C) 2021 Red Hat, Inc. All rights reserved. ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions ++# of the GNU General Public License v.2. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++# Test usability of 16g thin pool metadata LV ++ ++ ++SKIP_WITH_LVMPOLLD=1 ++ ++. lib/inittest ++ ++aux have_thin 1 0 0 || skip ++ ++aux prepare_vg 1 50000 ++ ++lvcreate -T -L10 --poolmetadatasize 16g $vg/pool ++check lv_field $vg/pool_tmeta size "<15.88g" ++lvremove -f $vg ++ ++# Cropped way ++lvcreate -T -L10 --poolmetadatasize 16g --config 'allocation/thin_pool_crop_metadata=1' $vg/pool ++check lv_field $vg/pool_tmeta size "15.81g" ++lvremove -f $vg ++ ++lvcreate -L16G -n meta $vg ++lvcreate -L10 -n pool $vg ++lvconvert --yes --thinpool $vg/pool --poolmetadata meta ++# Uncropped size 33554432 sectors - 16GiB ++dmsetup table ${vg}-pool_tmeta | grep 33554432 ++lvremove -f $vg ++ ++# Uses 20G metadata volume, but crops the size in DM table ++lvcreate -L20G -n meta $vg ++lvcreate -L10 -n pool $vg ++lvconvert --yes --thinpool $vg/pool --poolmetadata meta --config 'allocation/thin_pool_crop_metadata=1' ++check lv_field $vg/lvol0_pmspare size "16.00g" ++# Size should be cropped to 33161216 sectors ~15.81GiB ++dmsetup table ${vg}-pool_tmeta | grep 33161216 ++ ++# Also size remains unchanged with activation has no cropping, ++# but metadata have no CROP_METADATA flag set ++lvchange -an $vg ++lvchange -ay $vg ++# Size still stays cropped to 33161216 sectors ~15.81GiB ++dmsetup table ${vg}-pool_tmeta | grep 33161216 ++lvremove -f $vg ++ ++# Minimal size is 2M ++lvcreate -L1M -n meta $vg ++lvcreate -L10 -n pool $vg ++not lvconvert --yes --thinpool $vg/pool --poolmetadata meta ++lvremove -f $vg ++ ++# Uses 20G metadata volume, but crops the size in DM table ++lvcreate -L1 --poolmetadatasize 10G -T $vg/pool ++lvresize -L+10G $vg/pool_tmeta --config 'allocation/thin_pool_crop_metadata=1' ++check lv_field $vg/lvol0_pmspare size "15.81g" ++# Size should be cropped to 33161216 sectors ~15.81GiB ++dmsetup table ${vg}-pool_tmeta | grep 33161216 ++ ++# Without cropping we can grop to ~15.88GiB ++lvresize -L+10G $vg/pool_tmeta ++check lv_field $vg/lvol0_pmspare size "<15.88g" ++lvremove -f $vg ++ ++# User has already 'bigger' metadata and wants them uncropped ++lvcreate -L16G -n meta $vg ++lvcreate -L10 -n pool $vg ++lvconvert --yes --thinpool $vg/pool --poolmetadata meta --config 'allocation/thin_pool_crop_metadata=1' ++ ++# No change with cropping ++lvresize -l+1 $vg/pool_tmeta --config 'allocation/thin_pool_crop_metadata=1' ++dmsetup table ${vg}-pool_tmeta | grep 33161216 ++ ++# Resizes to 'uncropped' size 16GiB with ANY size ++lvresize -l+1 $vg/pool_tmeta ++dmsetup table ${vg}-pool_tmeta | grep 33554432 ++check lv_field $vg/pool_tmeta size "16.00g" ++ ++vgremove -ff $vg diff --git a/SOURCES/lvm2-2_03_12-tests-check-full-zeroing-of-thin-pool-metadata.patch b/SOURCES/lvm2-2_03_12-tests-check-full-zeroing-of-thin-pool-metadata.patch new file mode 100644 index 0000000..d7edcc7 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-tests-check-full-zeroing-of-thin-pool-metadata.patch @@ -0,0 +1,78 @@ + test/shell/thin-zero-meta.sh | 68 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 68 insertions(+) + create mode 100644 test/shell/thin-zero-meta.sh + +diff --git a/test/shell/thin-zero-meta.sh b/test/shell/thin-zero-meta.sh +new file mode 100644 +index 0000000..6a15a73 +--- /dev/null ++++ b/test/shell/thin-zero-meta.sh +@@ -0,0 +1,68 @@ ++#!/usr/bin/env bash ++ ++# Copyright (C) 2021 Red Hat, Inc. All rights reserved. ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions ++# of the GNU General Public License v.2. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++# Test how zeroing of thin-pool metadata works ++ ++SKIP_WITH_LVMLOCKD=1 ++SKIP_WITH_LVMPOLLD=1 ++ ++export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false} ++ ++. lib/inittest ++ ++# ++# Main ++# ++aux have_thin 1 3 0 || skip ++aux have_cache 1 3 0 || skip ++ ++aux prepare_vg 3 40000 ++ ++# Create mostly-zero devs only front of it has some 'real' back-end ++aux zero_dev "$dev1" "$(( $(get first_extent_sector "$dev1") + 8192 )):" ++aux zero_dev "$dev2" "$(( $(get first_extent_sector "$dev2") + 8192 )):" ++aux zero_dev "$dev3" "$(( $(get first_extent_sector "$dev3") + 8192 )):" ++ ++# Prepare randomly filled 4M LV on dev2 ++lvcreate -L16G -n $lv1 $vg "$dev2" ++dd if=/dev/urandom of="$DM_DEV_DIR/$vg/$lv1" bs=1M count=4 oflag=direct || true ++lvremove -f $vg ++ ++for i in 0 1 ++do ++ aux lvmconf "allocation/zero_metadata = $i" ++ ++ # Lvm2 should allocate metadata on dev2 ++ lvcreate -T -L10G --poolmetadatasize 16G $vg/pool "$dev1" "$dev2" ++ lvchange -an $vg ++ ++ lvs -ao+seg_pe_ranges $vg ++ lvchange -ay $vg/pool_tmeta --yes ++ ++ # Skip past 1.2M which is 'created' by thin-pool initialization ++ hexdump -C -n 200 -s 2000000 "$DM_DEV_DIR/$vg/pool_tmeta" | tee out ++ ++ # When fully zeroed, it should be zero - so almost no output from hexdump ++ case "$i" in ++ 0) test $(wc -l < out) -ge 10 ;; # should not be zeroed ++ 1) test $(wc -l < out) -le 10 ;; # should be zeroed ++ esac ++ ++ lvremove -f $vg/pool ++done ++ ++# Check lvm2 spots error during full zeroing of metadata device ++aux error_dev "$dev2" "$(( $(get first_extent_sector "$dev2") + 32 )):" ++not lvcreate -T -L10G --poolmetadatasize 16G $vg/pool "$dev1" "$dev2" |& tee err ++grep "Failed to initialize logical volume" err ++ ++vgremove -ff $vg diff --git a/SOURCES/lvm2-2_03_12-tests-check-thin-pool-corner-case-allocs.patch b/SOURCES/lvm2-2_03_12-tests-check-thin-pool-corner-case-allocs.patch new file mode 100644 index 0000000..04962b8 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-tests-check-thin-pool-corner-case-allocs.patch @@ -0,0 +1,47 @@ + test/shell/lvcreate-thin-limits.sh | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +diff --git a/test/shell/lvcreate-thin-limits.sh b/test/shell/lvcreate-thin-limits.sh +index 6a9c33d..5dcc160 100644 +--- a/test/shell/lvcreate-thin-limits.sh ++++ b/test/shell/lvcreate-thin-limits.sh +@@ -27,13 +27,35 @@ aux can_use_16T || skip + aux have_thin 1 0 0 || skip + which mkfs.ext4 || skip + +-aux prepare_pvs 1 16777216 ++# 16T device ++aux prepare_pvs 2 8388608 + get_devs + +-vgcreate $SHARED -s 4K "$vg" "${DEVICES[@]}" ++# gives 16777215M device ++vgcreate $SHARED -s 4M "$vg" "${DEVICES[@]}" + +-not lvcreate -T -L15.995T --poolmetadatasize 5G $vg/pool ++# For 1st. pass only single PV ++lvcreate -l100%PV --name $lv1 $vg "$dev2" + +-lvs -ao+seg_pe_ranges $vg ++for i in 1 0 ++do ++ SIZE=$(get vg_field "$vg" vg_free --units m) ++ SIZE=${SIZE%%\.*} ++ ++ # ~16T - 2 * 5G + something -> should not fit ++ not lvcreate -Zn -T -L$(( SIZE - 2 * 5 * 1024 + 1 )) --poolmetadatasize 5G $vg/pool ++ ++ check vg_field "$vg" lv_count "$i" ++ ++ # Should fit data + metadata + pmspare ++ lvcreate -Zn -T -L$(( SIZE - 2 * 5 * 1024 )) --poolmetadatasize 5G $vg/pool ++ ++ check vg_field "$vg" vg_free "0" ++ ++ lvs -ao+seg_pe_ranges $vg ++ ++ # Remove everything for 2nd. pass ++ lvremove -ff $vg ++done + + vgremove -ff $vg diff --git a/SOURCES/lvm2-2_03_12-tests-update-thin-and-cache-checked-messages.patch b/SOURCES/lvm2-2_03_12-tests-update-thin-and-cache-checked-messages.patch new file mode 100644 index 0000000..f4ccd37 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-tests-update-thin-and-cache-checked-messages.patch @@ -0,0 +1,77 @@ + test/shell/lvconvert-thin.sh | 2 +- + test/shell/lvcreate-cache.sh | 12 +++++------- + test/shell/lvcreate-thin-big.sh | 10 +++++----- + 3 files changed, 11 insertions(+), 13 deletions(-) + +diff --git a/test/shell/lvconvert-thin.sh b/test/shell/lvconvert-thin.sh +index 1319655..ee85691 100644 +--- a/test/shell/lvconvert-thin.sh ++++ b/test/shell/lvconvert-thin.sh +@@ -128,7 +128,7 @@ lvcreate -L1T -n $lv1 $vg + lvcreate -L32G -n $lv2 $vg + # Warning about bigger then needed + lvconvert --yes --thinpool $vg/$lv1 --poolmetadata $vg/$lv2 2>&1 | tee err +-grep "WARNING: Maximum" err ++grep -i "maximum" err + lvremove -f $vg + + +diff --git a/test/shell/lvcreate-cache.sh b/test/shell/lvcreate-cache.sh +index 2c46e21..4d9d75e 100644 +--- a/test/shell/lvcreate-cache.sh ++++ b/test/shell/lvcreate-cache.sh +@@ -27,7 +27,6 @@ aux prepare_vg 5 80000 + + aux lvmconf 'global/cache_disabled_features = [ "policy_smq" ]' + +- + ####################### + # Cache_Pool creation # + ####################### +@@ -173,17 +172,16 @@ dmsetup table ${vg}-$lv1 | grep cache # ensure it is loaded in kernel + + lvremove -f $vg + +- + # Check minimum cache pool metadata size +-lvcreate -l 1 --type cache-pool --poolmetadatasize 1 $vg 2>out +-grep "WARNING: Minimum" out ++lvcreate -l 1 --type cache-pool --poolmetadatasize 1 $vg 2>&1 | tee out ++grep -i "minimal" out ++ + + # FIXME: This test is failing in allocator with smaller VG sizes +-lvcreate -l 1 --type cache-pool --poolmetadatasize 17G $vg 2>out +-grep "WARNING: Maximum" out ++lvcreate -l 1 --type cache-pool --poolmetadatasize 17G $vg 2>&1 | tee out ++grep -i "maximum" out + + lvremove -f $vg +- + ######################################## + # Cache conversion and r/w permissions # + ######################################## +diff --git a/test/shell/lvcreate-thin-big.sh b/test/shell/lvcreate-thin-big.sh +index 0b622b7..2549035 100644 +--- a/test/shell/lvcreate-thin-big.sh ++++ b/test/shell/lvcreate-thin-big.sh +@@ -31,14 +31,14 @@ vgcreate $SHARED -s 64K "$vg" "${DEVICES[@]}" + + # Size 0 is not valid + invalid lvcreate -L4M --chunksize 128 --poolmetadatasize 0 -T $vg/pool1 2>out +-lvcreate -Zn -L4M --chunksize 128 --poolmetadatasize 16k -T $vg/pool1 2>out +-grep "WARNING: Minimum" out ++lvcreate -Zn -L4M --chunksize 128 --poolmetadatasize 16k -T $vg/pool1 2>&1 >out ++grep -i "minimal" out + # FIXME: metadata allocation fails, if PV doesn't have at least 16GB + # i.e. pool metadata device cannot be multisegment +-lvcreate -Zn -L4M --chunksize 64k --poolmetadatasize 17G -T $vg/pool2 2>out +-grep "WARNING: Maximum" out ++lvcreate -Zn -L4M --chunksize 64k --poolmetadatasize 17G -T $vg/pool2 2>&1 >out ++grep "maximum" out + check lv_field $vg/pool1_tmeta size "2.00m" +-check lv_field $vg/pool2_tmeta size "15.81g" ++check lv_field $vg/pool2_tmeta size "<15.88g" + + # Check we do report correct percent values. + lvcreate --type zero -L3G $vg -n pool3 diff --git a/SOURCES/lvm2-2_03_12-thin-improve-16g-support-for-thin-pool-metadata.patch b/SOURCES/lvm2-2_03_12-thin-improve-16g-support-for-thin-pool-metadata.patch new file mode 100644 index 0000000..8347ad4 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-thin-improve-16g-support-for-thin-pool-metadata.patch @@ -0,0 +1,694 @@ + conf/example.conf.in | 7 +++ + device_mapper/all.h | 16 +++++-- + device_mapper/libdm-deptree.c | 39 ++++++++++++----- + lib/activate/dev_manager.c | 8 ++-- + lib/config/config_settings.h | 5 +++ + lib/config/defaults.h | 2 + + lib/format_text/flags.c | 1 + + lib/metadata/lv_manip.c | 31 ++++++++++++++ + lib/metadata/merge.c | 2 + + lib/metadata/metadata-exported.h | 11 +++++ + lib/metadata/metadata.h | 13 ++++++ + lib/metadata/pool_manip.c | 46 ++++++++++++++++++++ + lib/metadata/thin_manip.c | 92 ++++++++++++++++++++++++++-------------- + lib/thin/thin.c | 22 +++++++--- + man/lvmthin.7_main | 10 ++++- + tools/lvconvert.c | 4 ++ + tools/lvcreate.c | 2 + + 17 files changed, 256 insertions(+), 55 deletions(-) + +diff --git a/conf/example.conf.in b/conf/example.conf.in +index d149ed9..107a071 100644 +--- a/conf/example.conf.in ++++ b/conf/example.conf.in +@@ -494,6 +494,13 @@ allocation { + # This configuration option has an automatic default value. + # thin_pool_metadata_require_separate_pvs = 0 + ++ # Configuration option allocation/thin_pool_crop_metadata. ++ # Older version of lvm2 cropped pool's metadata size to 15.81 GiB. ++ # This is slightly less then the actual maximum 15.88 GiB. ++ # For compatibility with older version and use of cropped size set to 1. ++ # This configuration option has an automatic default value. ++ # thin_pool_crop_metadata = 0 ++ + # Configuration option allocation/thin_pool_zero. + # Thin pool data chunks are zeroed before they are first used. + # Zeroing with a larger thin pool chunk size reduces performance. +diff --git a/device_mapper/all.h b/device_mapper/all.h +index 1080d25..489ca1c 100644 +--- a/device_mapper/all.h ++++ b/device_mapper/all.h +@@ -1072,10 +1072,10 @@ int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node, + #define DM_THIN_MIN_DATA_BLOCK_SIZE (UINT32_C(128)) + #define DM_THIN_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152)) + /* +- * Max supported size for thin pool metadata device (17112760320 bytes) +- * Limitation is hardcoded into the kernel and bigger device size +- * is not accepted. ++ * Max supported size for thin pool metadata device (17045913600 bytes) + * drivers/md/dm-thin-metadata.h THIN_METADATA_MAX_SECTORS ++ * But here DM_THIN_MAX_METADATA_SIZE got defined incorrectly ++ * Correct size is (UINT64_C(255) * ((1 << 14) - 64) * (4096 / (1 << 9))) + */ + #define DM_THIN_MAX_METADATA_SIZE (UINT64_C(255) * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024) + +@@ -1088,6 +1088,16 @@ int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node, + uint64_t low_water_mark, + unsigned skip_block_zeroing); + ++int dm_tree_node_add_thin_pool_target_v1(struct dm_tree_node *node, ++ uint64_t size, ++ uint64_t transaction_id, ++ const char *metadata_uuid, ++ const char *pool_uuid, ++ uint32_t data_block_size, ++ uint64_t low_water_mark, ++ unsigned skip_block_zeroing, ++ unsigned crop_metadata); ++ + /* Supported messages for thin provision target */ + typedef enum { + DM_THIN_MESSAGE_CREATE_SNAP, /* device_id, origin_id */ +diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c +index 6ce956f..5b60dc9 100644 +--- a/device_mapper/libdm-deptree.c ++++ b/device_mapper/libdm-deptree.c +@@ -3979,6 +3979,24 @@ int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node, + uint64_t low_water_mark, + unsigned skip_block_zeroing) + { ++ return dm_tree_node_add_thin_pool_target_v1(node, size, transaction_id, ++ metadata_uuid, pool_uuid, ++ data_block_size, ++ low_water_mark, ++ skip_block_zeroing, ++ 1); ++} ++ ++int dm_tree_node_add_thin_pool_target_v1(struct dm_tree_node *node, ++ uint64_t size, ++ uint64_t transaction_id, ++ const char *metadata_uuid, ++ const char *pool_uuid, ++ uint32_t data_block_size, ++ uint64_t low_water_mark, ++ unsigned skip_block_zeroing, ++ unsigned crop_metadata) ++{ + struct load_segment *seg, *mseg; + uint64_t devsize = 0; + +@@ -4005,17 +4023,18 @@ int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node, + if (!_link_tree_nodes(node, seg->metadata)) + return_0; + +- /* FIXME: more complex target may need more tweaks */ +- dm_list_iterate_items(mseg, &seg->metadata->props.segs) { +- devsize += mseg->size; +- if (devsize > DM_THIN_MAX_METADATA_SIZE) { +- log_debug_activation("Ignoring %" PRIu64 " of device.", +- devsize - DM_THIN_MAX_METADATA_SIZE); +- mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE); +- devsize = DM_THIN_MAX_METADATA_SIZE; +- /* FIXME: drop remaining segs */ ++ if (crop_metadata) ++ /* FIXME: more complex target may need more tweaks */ ++ dm_list_iterate_items(mseg, &seg->metadata->props.segs) { ++ devsize += mseg->size; ++ if (devsize > DM_THIN_MAX_METADATA_SIZE) { ++ log_debug_activation("Ignoring %" PRIu64 " of device.", ++ devsize - DM_THIN_MAX_METADATA_SIZE); ++ mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE); ++ devsize = DM_THIN_MAX_METADATA_SIZE; ++ /* FIXME: drop remaining segs */ ++ } + } +- } + + if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) { + log_error("Missing pool uuid %s.", pool_uuid); +diff --git a/lib/activate/dev_manager.c b/lib/activate/dev_manager.c +index 8d27bd3..9a25482 100644 +--- a/lib/activate/dev_manager.c ++++ b/lib/activate/dev_manager.c +@@ -261,7 +261,7 @@ static int _info_run(const char *dlid, struct dm_info *dminfo, + int dmtask; + int with_flush; /* TODO: arg for _info_run */ + void *target = NULL; +- uint64_t target_start, target_length, start, length; ++ uint64_t target_start, target_length, start, length, length_crop = 0; + char *target_name, *target_params; + const char *devname; + +@@ -297,7 +297,7 @@ static int _info_run(const char *dlid, struct dm_info *dminfo, + /* Uses max DM_THIN_MAX_METADATA_SIZE sectors for metadata device */ + if (lv_is_thin_pool_metadata(seg_status->seg->lv) && + (length > DM_THIN_MAX_METADATA_SIZE)) +- length = DM_THIN_MAX_METADATA_SIZE; ++ length_crop = DM_THIN_MAX_METADATA_SIZE; + + /* Uses virtual size with headers for VDO pool device */ + if (lv_is_vdo_pool(seg_status->seg->lv)) +@@ -310,7 +310,9 @@ static int _info_run(const char *dlid, struct dm_info *dminfo, + target = dm_get_next_target(dmt, target, &target_start, + &target_length, &target_name, &target_params); + +- if ((start == target_start) && (length == target_length)) ++ if ((start == target_start) && ++ ((length == target_length) || ++ (length_crop && (length_crop == target_length)))) + break; /* Keep target_params when matching segment is found */ + + target_params = NULL; /* Marking this target_params unusable */ +diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h +index 3c4032e..cb4e23a 100644 +--- a/lib/config/config_settings.h ++++ b/lib/config/config_settings.h +@@ -628,6 +628,11 @@ cfg(allocation_cache_pool_max_chunks_CFG, "cache_pool_max_chunks", allocation_CF + cfg(allocation_thin_pool_metadata_require_separate_pvs_CFG, "thin_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 89), NULL, 0, NULL, + "Thin pool metadata and data will always use different PVs.\n") + ++cfg(allocation_thin_pool_crop_metadata_CFG, "thin_pool_crop_metadata", allocation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_CROP_METADATA, vsn(2, 3, 12), NULL, 0, NULL, ++ "Older version of lvm2 cropped pool's metadata size to 15.81 GiB.\n" ++ "This is slightly less then the actual maximum 15.88 GiB.\n" ++ "For compatibility with older version and use of cropped size set to 1.\n") ++ + cfg(allocation_thin_pool_zero_CFG, "thin_pool_zero", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_ZERO, vsn(2, 2, 99), NULL, 0, NULL, + "Thin pool data chunks are zeroed before they are first used.\n" + "Zeroing with a larger thin pool chunk size reduces performance.\n") +diff --git a/lib/config/defaults.h b/lib/config/defaults.h +index 708a575..bcc20cc 100644 +--- a/lib/config/defaults.h ++++ b/lib/config/defaults.h +@@ -118,6 +118,8 @@ + #define DEFAULT_THIN_REPAIR_OPTION1 "" + #define DEFAULT_THIN_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_THIN_REPAIR_OPTION1 + #define DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS 0 ++#define DEFAULT_THIN_POOL_CROP_METADATA 0 ++#define DEFAULT_THIN_POOL_MAX_METADATA_SIZE_V1_KB (UINT64_C(255) * ((1 << 14) - 64) * 4) /* KB */ /* 0x3f8040 blocks */ + #define DEFAULT_THIN_POOL_MAX_METADATA_SIZE (DM_THIN_MAX_METADATA_SIZE / 2) /* KB */ + #define DEFAULT_THIN_POOL_MIN_METADATA_SIZE 2048 /* KB */ + #define DEFAULT_THIN_POOL_OPTIMAL_METADATA_SIZE (128 * 1024) /* KB */ +diff --git a/lib/format_text/flags.c b/lib/format_text/flags.c +index bc93a5d..4cee14a 100644 +--- a/lib/format_text/flags.c ++++ b/lib/format_text/flags.c +@@ -72,6 +72,7 @@ static const struct flag _lv_flags[] = { + {LV_ACTIVATION_SKIP, "ACTIVATION_SKIP", COMPATIBLE_FLAG}, + {LV_ERROR_WHEN_FULL, "ERROR_WHEN_FULL", COMPATIBLE_FLAG}, + {LV_METADATA_FORMAT, "METADATA_FORMAT", SEGTYPE_FLAG}, ++ {LV_CROP_METADATA, "CROP_METADATA", SEGTYPE_FLAG}, + {LV_CACHE_VOL, "CACHE_VOL", COMPATIBLE_FLAG}, + {LV_CACHE_USES_CACHEVOL, "CACHE_USES_CACHEVOL", SEGTYPE_FLAG}, + {LV_NOSCAN, NULL, 0}, +diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c +index 443d32c..445c4ad 100644 +--- a/lib/metadata/lv_manip.c ++++ b/lib/metadata/lv_manip.c +@@ -5384,6 +5384,8 @@ static int _lvresize_adjust_extents(struct logical_volume *lv, + uint32_t existing_extents; + uint32_t seg_size = 0; + uint32_t new_extents; ++ uint64_t max_metadata_size; ++ thin_crop_metadata_t crop; + int reducing = 0; + + seg_last = last_seg(lv); +@@ -5544,6 +5546,33 @@ static int _lvresize_adjust_extents(struct logical_volume *lv, + return 1; + } + } ++ } else if (lv_is_thin_pool_metadata(lv)) { ++ if (!(seg = get_only_segment_using_this_lv(lv))) ++ return_0; ++ ++ max_metadata_size = get_thin_pool_max_metadata_size(cmd, vg->profile, &crop); ++ ++ if (((uint64_t)lp->extents * vg->extent_size) > max_metadata_size) { ++ lp->extents = (max_metadata_size + vg->extent_size - 1) / vg->extent_size; ++ log_print_unless_silent("Reached maximum pool metadata size %s (%" PRIu32 " extents).", ++ display_size(vg->cmd, max_metadata_size), lp->extents); ++ } ++ ++ if (existing_logical_extents >= lp->extents) ++ lp->extents = existing_logical_extents; ++ ++ crop = get_thin_pool_crop_metadata(cmd, crop, (uint64_t)lp->extents * vg->extent_size); ++ ++ if (seg->crop_metadata != crop) { ++ seg->crop_metadata = crop; ++ seg->lv->status |= LV_CROP_METADATA; ++ /* Crop change require reload even if there no size change */ ++ lp->size_changed = 1; ++ log_print_unless_silent("Thin pool will use metadata without cropping."); ++ } ++ ++ if (!(seg_size = lp->extents - existing_logical_extents)) ++ return 1; /* No change in metadata size */ + } + } else { /* If reducing, find stripes, stripesize & size of last segment */ + if (lp->stripes || lp->stripe_size || lp->mirrors) +@@ -8388,6 +8417,8 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, + first_seg(lv)->chunk_size = lp->chunk_size; + first_seg(lv)->zero_new_blocks = lp->zero_new_blocks; + first_seg(lv)->discards = lp->discards; ++ if ((first_seg(lv)->crop_metadata = lp->crop_metadata) == THIN_CROP_METADATA_NO) ++ lv->status |= LV_CROP_METADATA; + if (!recalculate_pool_chunk_size_with_dev_hints(lv, lp->thin_chunk_size_calc_policy)) { + stack; + goto revert_new_lv; +diff --git a/lib/metadata/merge.c b/lib/metadata/merge.c +index 0aa2293..eff59ae 100644 +--- a/lib/metadata/merge.c ++++ b/lib/metadata/merge.c +@@ -495,6 +495,8 @@ static void _check_lv_segment(struct logical_volume *lv, struct lv_segment *seg, + seg_error("sets discards"); + if (!dm_list_empty(&seg->thin_messages)) + seg_error("sets thin_messages list"); ++ if (seg->lv->status & LV_CROP_METADATA) ++ seg_error("sets CROP_METADATA flag"); + } + + if (seg_is_thin_volume(seg)) { +diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h +index 54dc29f..0e57722 100644 +--- a/lib/metadata/metadata-exported.h ++++ b/lib/metadata/metadata-exported.h +@@ -143,6 +143,7 @@ + + #define LV_REMOVE_AFTER_RESHAPE UINT64_C(0x0400000000000000) /* LV needs to be removed after a shrinking reshape */ + #define LV_METADATA_FORMAT UINT64_C(0x0800000000000000) /* LV has segments with metadata format */ ++#define LV_CROP_METADATA UINT64_C(0x0000000000000400) /* LV - also VG CLUSTERED */ + + #define LV_RESHAPE UINT64_C(0x1000000000000000) /* Ongoing reshape (number of stripes, stripesize or raid algorithm change): + used as SEGTYPE_FLAG to prevent activation on old runtime */ +@@ -326,6 +327,12 @@ typedef enum { + } thin_discards_t; + + typedef enum { ++ THIN_CROP_METADATA_UNSELECTED = 0, /* 'auto' selects */ ++ THIN_CROP_METADATA_NO, ++ THIN_CROP_METADATA_YES, ++} thin_crop_metadata_t; ++ ++typedef enum { + CACHE_MODE_UNSELECTED = 0, + CACHE_MODE_WRITETHROUGH, + CACHE_MODE_WRITEBACK, +@@ -502,6 +509,7 @@ struct lv_segment { + uint64_t transaction_id; /* For thin_pool, thin */ + thin_zero_t zero_new_blocks; /* For thin_pool */ + thin_discards_t discards; /* For thin_pool */ ++ thin_crop_metadata_t crop_metadata; /* For thin_pool */ + struct dm_list thin_messages; /* For thin_pool */ + struct logical_volume *external_lv; /* For thin */ + struct logical_volume *pool_lv; /* For thin, cache */ +@@ -885,6 +893,8 @@ int update_thin_pool_params(struct cmd_context *cmd, + unsigned attr, + uint32_t pool_data_extents, + uint32_t *pool_metadata_extents, ++ struct logical_volume *metadata_lv, ++ unsigned *crop_metadata, + int *chunk_size_calc_method, uint32_t *chunk_size, + thin_discards_t *discards, thin_zero_t *zero_new_blocks); + +@@ -1011,6 +1021,7 @@ struct lvcreate_params { + + uint64_t permission; /* all */ + unsigned error_when_full; /* when segment supports it */ ++ thin_crop_metadata_t crop_metadata; + uint32_t read_ahead; /* all */ + int approx_alloc; /* all */ + alloc_policy_t alloc; /* all */ +diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h +index 2c22450..0f230e4 100644 +--- a/lib/metadata/metadata.h ++++ b/lib/metadata/metadata.h +@@ -512,8 +512,21 @@ int pool_below_threshold(const struct lv_segment *pool_seg); + int pool_check_overprovisioning(const struct logical_volume *lv); + int create_pool(struct logical_volume *pool_lv, const struct segment_type *segtype, + struct alloc_handle *ah, uint32_t stripes, uint32_t stripe_size); ++uint64_t get_thin_pool_max_metadata_size(struct cmd_context *cmd, struct profile *profile, ++ thin_crop_metadata_t *crop); ++thin_crop_metadata_t get_thin_pool_crop_metadata(struct cmd_context *cmd, ++ thin_crop_metadata_t crop, ++ uint64_t metadata_size); + uint64_t estimate_thin_pool_metadata_size(uint32_t data_extents, uint32_t extent_size, uint32_t chunk_size); + ++int update_pool_metadata_min_max(struct cmd_context *cmd, ++ uint32_t extent_size, ++ uint64_t min_metadata_size, /* required min */ ++ uint64_t max_metadata_size, /* writable max */ ++ uint64_t *metadata_size, /* current calculated */ ++ struct logical_volume *metadata_lv, /* name of converted LV or NULL */ ++ uint32_t *metadata_extents); /* resulting extent count */ ++ + /* + * Begin skeleton for external LVM library + */ +diff --git a/lib/metadata/pool_manip.c b/lib/metadata/pool_manip.c +index a9dc611..b67882e 100644 +--- a/lib/metadata/pool_manip.c ++++ b/lib/metadata/pool_manip.c +@@ -742,6 +742,52 @@ int handle_pool_metadata_spare(struct volume_group *vg, uint32_t extents, + return 1; + } + ++int update_pool_metadata_min_max(struct cmd_context *cmd, ++ uint32_t extent_size, ++ uint64_t min_metadata_size, /* required min */ ++ uint64_t max_metadata_size, /* writable max */ ++ uint64_t *metadata_size, /* current calculated */ ++ struct logical_volume *metadata_lv, /* name of converted LV or NULL */ ++ uint32_t *metadata_extents) /* resulting extent count */ ++{ ++ max_metadata_size = dm_round_up(max_metadata_size, extent_size); ++ min_metadata_size = dm_round_up(min_metadata_size, extent_size); ++ ++ if (*metadata_size > max_metadata_size) { ++ if (metadata_lv) { ++ log_print_unless_silent("Size %s of pool metadata volume %s is bigger then maximum usable size %s.", ++ display_size(cmd, *metadata_size), ++ display_lvname(metadata_lv), ++ display_size(cmd, max_metadata_size)); ++ } else { ++ if (*metadata_extents) ++ log_print_unless_silent("Reducing pool metadata size %s to maximum usable size %s.", ++ display_size(cmd, *metadata_size), ++ display_size(cmd, max_metadata_size)); ++ *metadata_size = max_metadata_size; ++ } ++ } else if (*metadata_size < min_metadata_size) { ++ if (metadata_lv) { ++ log_error("Can't use volume %s with size %s as pool metadata. Minimal required size is %s.", ++ display_lvname(metadata_lv), ++ display_size(cmd, *metadata_size), ++ display_size(cmd, min_metadata_size)); ++ return 0; ++ } else { ++ if (*metadata_extents) ++ log_print_unless_silent("Extending pool metadata size %s to required minimal size %s.", ++ display_size(cmd, *metadata_size), ++ display_size(cmd, min_metadata_size)); ++ *metadata_size = min_metadata_size; ++ } ++ } ++ ++ if (!(*metadata_extents = extents_from_size(cmd, *metadata_size, extent_size))) ++ return_0; ++ ++ return 1; ++} ++ + int vg_set_pool_metadata_spare(struct logical_volume *lv) + { + char new_name[NAME_LEN]; +diff --git a/lib/metadata/thin_manip.c b/lib/metadata/thin_manip.c +index 4591dd7..451c382 100644 +--- a/lib/metadata/thin_manip.c ++++ b/lib/metadata/thin_manip.c +@@ -610,9 +610,9 @@ static uint64_t _estimate_metadata_size(uint32_t data_extents, uint32_t extent_s + } + + /* Estimate maximal supportable thin pool data size for given chunk_size */ +-static uint64_t _estimate_max_data_size(uint32_t chunk_size) ++static uint64_t _estimate_max_data_size(uint64_t max_metadata_size, uint32_t chunk_size) + { +- return chunk_size * (DEFAULT_THIN_POOL_MAX_METADATA_SIZE * 2) * SECTOR_SIZE / UINT64_C(64); ++ return max_metadata_size * chunk_size * SECTOR_SIZE / UINT64_C(64); + } + + /* Estimate thin pool chunk size from data and metadata size (in sector units) */ +@@ -662,6 +662,38 @@ int get_default_allocation_thin_pool_chunk_size(struct cmd_context *cmd, struct + return 1; + } + ++/* Return max supported metadata size with selected cropping */ ++uint64_t get_thin_pool_max_metadata_size(struct cmd_context *cmd, struct profile *profile, ++ thin_crop_metadata_t *crop) ++{ ++ *crop = find_config_tree_bool(cmd, allocation_thin_pool_crop_metadata_CFG, profile) ? ++ THIN_CROP_METADATA_YES : THIN_CROP_METADATA_NO; ++ ++ return (*crop == THIN_CROP_METADATA_NO) ? ++ (2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE_V1_KB) : (2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE); ++} ++ ++/* ++ * With existing crop method, check if the metadata_size would need cropping. ++ * If not, set UNSELECTED, otherwise print some verbose info about selected cropping ++ */ ++thin_crop_metadata_t get_thin_pool_crop_metadata(struct cmd_context *cmd, ++ thin_crop_metadata_t crop, ++ uint64_t metadata_size) ++{ ++ const uint64_t crop_size = (2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE); ++ ++ if (metadata_size > crop_size) { ++ if (crop == THIN_CROP_METADATA_NO) ++ log_verbose("Using metadata size without cropping."); ++ else ++ log_verbose("Cropping metadata size to %s.", display_size(cmd, crop_size)); ++ } else ++ crop = THIN_CROP_METADATA_UNSELECTED; ++ ++ return crop; ++} ++ + int update_thin_pool_params(struct cmd_context *cmd, + struct profile *profile, + uint32_t extent_size, +@@ -669,10 +701,13 @@ int update_thin_pool_params(struct cmd_context *cmd, + unsigned attr, + uint32_t pool_data_extents, + uint32_t *pool_metadata_extents, ++ struct logical_volume *metadata_lv, ++ thin_crop_metadata_t *crop_metadata, + int *chunk_size_calc_method, uint32_t *chunk_size, + thin_discards_t *discards, thin_zero_t *zero_new_blocks) + { +- uint64_t pool_metadata_size = (uint64_t) *pool_metadata_extents * extent_size; ++ uint64_t pool_metadata_size; ++ uint64_t max_metadata_size; + uint32_t estimate_chunk_size; + uint64_t max_pool_data_size; + const char *str; +@@ -702,7 +737,9 @@ int update_thin_pool_params(struct cmd_context *cmd, + *zero_new_blocks = find_config_tree_bool(cmd, allocation_thin_pool_zero_CFG, profile) + ? THIN_ZERO_YES : THIN_ZERO_NO; + +- if (!pool_metadata_size) { ++ max_metadata_size = get_thin_pool_max_metadata_size(cmd, profile, crop_metadata); ++ ++ if (!*pool_metadata_extents) { + if (!*chunk_size) { + if (!get_default_allocation_thin_pool_chunk_size(cmd, profile, + chunk_size, +@@ -723,20 +760,20 @@ int update_thin_pool_params(struct cmd_context *cmd, + } else { + pool_metadata_size = _estimate_metadata_size(pool_data_extents, extent_size, *chunk_size); + +- if (pool_metadata_size > (DEFAULT_THIN_POOL_MAX_METADATA_SIZE * 2)) { ++ if (pool_metadata_size > max_metadata_size) { + /* Suggest bigger chunk size */ + estimate_chunk_size = + _estimate_chunk_size(pool_data_extents, extent_size, +- (DEFAULT_THIN_POOL_MAX_METADATA_SIZE * 2), attr); ++ max_metadata_size, attr); + log_warn("WARNING: Chunk size is too small for pool, suggested minimum is %s.", + display_size(cmd, estimate_chunk_size)); + } + } + + /* Round up to extent size silently */ +- if (pool_metadata_size % extent_size) +- pool_metadata_size += extent_size - pool_metadata_size % extent_size; ++ pool_metadata_size = dm_round_up(pool_metadata_size, extent_size); + } else { ++ pool_metadata_size = (uint64_t) *pool_metadata_extents * extent_size; + estimate_chunk_size = _estimate_chunk_size(pool_data_extents, extent_size, + pool_metadata_size, attr); + +@@ -751,7 +788,19 @@ int update_thin_pool_params(struct cmd_context *cmd, + } + } + +- max_pool_data_size = _estimate_max_data_size(*chunk_size); ++ /* Use not rounded max for data size */ ++ max_pool_data_size = _estimate_max_data_size(max_metadata_size, *chunk_size); ++ ++ if (!update_pool_metadata_min_max(cmd, extent_size, ++ 2 * DEFAULT_THIN_POOL_MIN_METADATA_SIZE, ++ max_metadata_size, ++ &pool_metadata_size, ++ metadata_lv, ++ pool_metadata_extents)) ++ return_0; ++ ++ *crop_metadata = get_thin_pool_crop_metadata(cmd, *crop_metadata, pool_metadata_size); ++ + if ((max_pool_data_size / extent_size) < pool_data_extents) { + log_error("Selected chunk size %s cannot address more then %s of thin pool data space.", + display_size(cmd, *chunk_size), display_size(cmd, max_pool_data_size)); +@@ -764,22 +813,6 @@ int update_thin_pool_params(struct cmd_context *cmd, + if (!validate_thin_pool_chunk_size(cmd, *chunk_size)) + return_0; + +- if (pool_metadata_size > (2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE)) { +- pool_metadata_size = 2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE; +- if (*pool_metadata_extents) +- log_warn("WARNING: Maximum supported pool metadata size is %s.", +- display_size(cmd, pool_metadata_size)); +- } else if (pool_metadata_size < (2 * DEFAULT_THIN_POOL_MIN_METADATA_SIZE)) { +- pool_metadata_size = 2 * DEFAULT_THIN_POOL_MIN_METADATA_SIZE; +- if (*pool_metadata_extents) +- log_warn("WARNING: Minimum supported pool metadata size is %s.", +- display_size(cmd, pool_metadata_size)); +- } +- +- if (!(*pool_metadata_extents = +- extents_from_size(cmd, pool_metadata_size, extent_size))) +- return_0; +- + if ((uint64_t) *chunk_size > (uint64_t) pool_data_extents * extent_size) { + log_error("Size of %s data volume cannot be smaller than chunk size %s.", + segtype->name, display_size(cmd, *chunk_size)); +@@ -958,12 +991,5 @@ int validate_thin_pool_chunk_size(struct cmd_context *cmd, uint32_t chunk_size) + + uint64_t estimate_thin_pool_metadata_size(uint32_t data_extents, uint32_t extent_size, uint32_t chunk_size) + { +- uint64_t sz = _estimate_metadata_size(data_extents, extent_size, chunk_size); +- +- if (sz > (2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE)) +- sz = 2 * DEFAULT_THIN_POOL_MAX_METADATA_SIZE; +- else if (sz < (2 * DEFAULT_THIN_POOL_MIN_METADATA_SIZE)) +- sz = 2 * DEFAULT_THIN_POOL_MIN_METADATA_SIZE; +- +- return sz; ++ return _estimate_metadata_size(data_extents, extent_size, chunk_size); + } +diff --git a/lib/thin/thin.c b/lib/thin/thin.c +index ba0da71..51bc269 100644 +--- a/lib/thin/thin.c ++++ b/lib/thin/thin.c +@@ -86,6 +86,7 @@ static int _thin_pool_text_import(struct lv_segment *seg, + struct logical_volume *pool_data_lv, *pool_metadata_lv; + const char *discards_str = NULL; + uint32_t zero = 0; ++ uint32_t crop = 0; + + if (!dm_config_get_str(sn, "metadata", &lv_name)) + return SEG_LOG_ERROR("Metadata must be a string in"); +@@ -131,6 +132,13 @@ static int _thin_pool_text_import(struct lv_segment *seg, + + seg->zero_new_blocks = (zero) ? THIN_ZERO_YES : THIN_ZERO_NO; + ++ if (dm_config_has_node(sn, "crop_metadata")) { ++ if (!dm_config_get_uint32(sn, "crop_metadata", &crop)) ++ return SEG_LOG_ERROR("Could not read crop_metadata for"); ++ seg->crop_metadata = (crop) ? THIN_CROP_METADATA_YES : THIN_CROP_METADATA_NO; ++ seg->lv->status |= LV_CROP_METADATA; ++ } ++ + /* Read messages */ + for (; sn; sn = sn->sib) + if (!(sn->v) && !_thin_pool_add_message(seg, sn->key, sn->child)) +@@ -177,6 +185,9 @@ static int _thin_pool_text_export(const struct lv_segment *seg, struct formatter + return 0; + } + ++ if (seg->crop_metadata != THIN_CROP_METADATA_UNSELECTED) ++ outf(f, "crop_metadata = %u", (seg->crop_metadata == THIN_CROP_METADATA_YES) ? 1 : 0); ++ + dm_list_iterate_items(tmsg, &seg->thin_messages) { + /* Extra validation */ + switch (tmsg->type) { +@@ -307,11 +318,12 @@ static int _thin_pool_add_target_line(struct dev_manager *dm, + else + low_water_mark = 0; + +- if (!dm_tree_node_add_thin_pool_target(node, len, +- seg->transaction_id, +- metadata_dlid, pool_dlid, +- seg->chunk_size, low_water_mark, +- (seg->zero_new_blocks == THIN_ZERO_YES) ? 0 : 1)) ++ if (!dm_tree_node_add_thin_pool_target_v1(node, len, ++ seg->transaction_id, ++ metadata_dlid, pool_dlid, ++ seg->chunk_size, low_water_mark, ++ (seg->zero_new_blocks == THIN_ZERO_YES) ? 0 : 1, ++ (seg->crop_metadata == THIN_CROP_METADATA_YES) ? 1 : 0)) + return_0; + + if (attr & THIN_FEATURE_DISCARDS) { +diff --git a/man/lvmthin.7_main b/man/lvmthin.7_main +index e6f1d63..3ce34a5 100644 +--- a/man/lvmthin.7_main ++++ b/man/lvmthin.7_main +@@ -1104,7 +1104,7 @@ The default value is shown by: + The amount of thin metadata depends on how many blocks are shared between + thin LVs (i.e. through snapshots). A thin pool with many snapshots may + need a larger metadata LV. Thin pool metadata LV sizes can be from 2MiB +-to 16GiB. ++to approximately 16GiB. + + When using lvcreate to create what will become a thin metadata LV, the + size is specified with the -L|--size option. +@@ -1119,6 +1119,14 @@ needed, so it is recommended to start with a size of 1GiB which should be + enough for all practical purposes. A thin pool metadata LV can later be + manually or automatically extended if needed. + ++Configurable setting ++.BR lvm.conf (5) ++.BR allocation / thin_pool_crop_metadata ++gives control over cropping to 15.81GiB to stay backward compatible with older ++versions of lvm2. With enabled cropping there can be observed some problems when ++using volumes above this size with thin tools (i.e. thin_repair). ++Cropping should be enabled only when compatibility is required. ++ + + .SS Create a thin snapshot of an external, read only LV + +diff --git a/tools/lvconvert.c b/tools/lvconvert.c +index 7b74afb..ce90279 100644 +--- a/tools/lvconvert.c ++++ b/tools/lvconvert.c +@@ -3032,6 +3032,7 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, + const char *policy_name; + struct dm_config_tree *policy_settings = NULL; + int pool_metadata_spare; ++ thin_crop_metadata_t crop_metadata; + thin_discards_t discards; + thin_zero_t zero_new_blocks; + int r = 0; +@@ -3196,6 +3197,8 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, + pool_segtype, target_attr, + lv->le_count, + &meta_extents, ++ metadata_lv, ++ &crop_metadata, + &chunk_calc, + &chunk_size, + &discards, &zero_new_blocks)) +@@ -3401,6 +3404,7 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, + goto_bad; + } else { + seg->transaction_id = 0; ++ seg->crop_metadata = crop_metadata; + seg->chunk_size = chunk_size; + seg->discards = discards; + seg->zero_new_blocks = zero_new_blocks; +diff --git a/tools/lvcreate.c b/tools/lvcreate.c +index e384291..1ee9e14 100644 +--- a/tools/lvcreate.c ++++ b/tools/lvcreate.c +@@ -391,6 +391,8 @@ static int _update_extents_params(struct volume_group *vg, + lp->segtype, lp->target_attr, + lp->extents, + &lp->pool_metadata_extents, ++ NULL, ++ &lp->crop_metadata, + &lp->thin_chunk_size_calc_policy, + &lp->chunk_size, + &lp->discards, diff --git a/SOURCES/lvm2-2_03_12-writecache-use-cleaner-message-instead-of-table-relo.patch b/SOURCES/lvm2-2_03_12-writecache-use-cleaner-message-instead-of-table-relo.patch new file mode 100644 index 0000000..9f08576 --- /dev/null +++ b/SOURCES/lvm2-2_03_12-writecache-use-cleaner-message-instead-of-table-relo.patch @@ -0,0 +1,45 @@ + lib/metadata/writecache_manip.c | 10 +++++++--- + tools/lvconvert.c | 2 ++ + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/lib/metadata/writecache_manip.c b/lib/metadata/writecache_manip.c +index 5004aa9..8150d07 100644 +--- a/lib/metadata/writecache_manip.c ++++ b/lib/metadata/writecache_manip.c +@@ -75,7 +75,7 @@ static int _get_writecache_kernel_status(struct cmd_context *cmd, + return 0; + } + +- if (!lv_info_with_seg_status(cmd, first_seg(lv), &status, 1, 1)) { ++ if (!lv_info_with_seg_status(cmd, first_seg(lv), &status, 0, 0)) { + log_error("Failed to get device mapper status for %s", display_lvname(lv)); + goto fail; + } +@@ -434,8 +434,12 @@ int lv_writecache_set_cleaner(struct logical_volume *lv) + seg->writecache_settings.cleaner_set = 1; + + if (lv_is_active(lv)) { +- if (!lv_update_and_reload(lv)) { +- log_error("Failed to update VG and reload LV."); ++ if (!vg_write(lv->vg) || !vg_commit(lv->vg)) { ++ log_error("Failed to update VG."); ++ return 0; ++ } ++ if (!lv_writecache_message(lv, "cleaner")) { ++ log_error("Failed to set writecache cleaner for %s.", display_lvname(lv)); + return 0; + } + } else { +diff --git a/tools/lvconvert.c b/tools/lvconvert.c +index 4323965..7b74afb 100644 +--- a/tools/lvconvert.c ++++ b/tools/lvconvert.c +@@ -5720,6 +5720,8 @@ static int _lvconvert_detach_writecache_when_clean(struct cmd_context *cmd, + return 0; + } + ++ log_debug("detach writecache check clean reading vg %s", id->vg_name); ++ + vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state, &error_flags, NULL); + + if (!vg) { diff --git a/SOURCES/lvm2-rhel8.patch b/SOURCES/lvm2-rhel8.patch index 9ad48c0..874c350 100644 --- a/SOURCES/lvm2-rhel8.patch +++ b/SOURCES/lvm2-rhel8.patch @@ -8,11 +8,11 @@ index a6ba8f6..2a15962 100644 +++ b/VERSION @@ -1 +1 @@ -2.03.11(2) (2021-01-08) -+2.03.11(2)-RHEL8 (2021-01-08) ++2.03.11(2)-RHEL8 (2021-01-28) diff --git a/VERSION_DM b/VERSION_DM index f44bc5f..2475a11 100644 --- a/VERSION_DM +++ b/VERSION_DM @@ -1 +1 @@ -1.02.175 (2021-01-08) -+1.02.175-RHEL8 (2021-01-08) ++1.02.175-RHEL8 (2021-01-28) diff --git a/SPECS/lvm2.spec b/SPECS/lvm2.spec index d5c51d7..6d97f19 100644 --- a/SPECS/lvm2.spec +++ b/SPECS/lvm2.spec @@ -57,16 +57,41 @@ Name: lvm2 Epoch: %{rhel} %endif Version: 2.03.11 -Release: 1%{?dist} +Release: 3%{?dist} License: GPLv2 URL: http://sourceware.org/lvm2 Source0: ftp://sourceware.org/pub/lvm2/releases/LVM2.%{version}.tgz Patch0: lvm2-rhel8.patch Patch1: lvm2-set-default-preferred_names.patch Patch2: lvm2-test-skip-problematic-tests.patch -# TODO: What shall we do with this? -# BZ 1868169: -#Patch15: 0004-Revert-wipe_lv-changes.patch +Patch3: lvm2-2_03_12-lvmlockd-sscanf-buffer-size-warnings.patch +# BZ 1915497: +Patch4: lvm2-2_03_12-alloc-enhance-estimation-of-sufficient_pes_free.patch +Patch5: lvm2-2_03_12-tests-check-thin-pool-corner-case-allocs.patch +Patch6: lvm2-2_03_12-tests-check-full-zeroing-of-thin-pool-metadata.patch +# BZ 1915580: +Patch7: lvm2-2_03_12-integrity-fix-segfault-on-error-path-when-replacing-.patch +# BZ 1872695: +Patch8: lvm2-2_03_12-devs-remove-invalid-path-name-aliases.patch +Patch9: lvm2-2_03_12-make-generate.patch +Patch10: lvm2-2_03_12-label_scan-fix-missing-free-of-filtered_devs.patch +# BZ 1917920: +Patch11: lvm2-2_03_12-pvck-fix-warning-and-exit-code-for-non-4k-mda1-offse.patch +Patch12: lvm2-2_03_12-WHATS_NEW-update.patch +# BZ 1921214: +Patch13: lvm2-2_03_12-writecache-use-cleaner-message-instead-of-table-relo.patch +# BZ 1909699: +Patch14: lvm2-2_03_12-man-update-lvmthin.patch +Patch15: lvm2-2_03_12-thin-improve-16g-support-for-thin-pool-metadata.patch +Patch16: lvm2-2_03_12-pool-limit-pmspare-to-16GiB.patch +Patch17: lvm2-2_03_12-cache-reuse-code-for-metadata-min_max.patch +Patch18: lvm2-2_03_12-tests-check-16G-thin-pool-metadata-size.patch +Patch19: lvm2-2_03_12-tests-update-thin-and-cache-checked-messages.patch +# BZ 1914389: +Patch20: lvm2-2_03_12-lvcreate-use-lv_passes_readonly_filter.patch +Patch21: lvm2-2_03_12-test-check-read_only_volume_list-tagging-works.patch +# BZ 1859659: +Patch22: lvm2-2_03_12-filter-mpath-work-with-nvme-devices.patch BuildRequires: gcc %if %{enable_testsuite} @@ -125,7 +150,26 @@ or more physical volumes and creating one or more logical volumes %patch0 -p1 -b .backup0 %patch1 -p1 -b .backup1 %patch2 -p1 -b .backup2 -#%%patch15 -p1 -b .backup15 +%patch3 -p1 -b .backup3 +%patch4 -p1 -b .backup4 +%patch5 -p1 -b .backup5 +%patch6 -p1 -b .backup6 +%patch7 -p1 -b .backup7 +%patch8 -p1 -b .backup8 +%patch9 -p1 -b .backup9 +%patch10 -p1 -b .backup10 +%patch11 -p1 -b .backup11 +%patch12 -p1 -b .backup12 +%patch13 -p1 -b .backup13 +%patch14 -p1 -b .backup14 +%patch15 -p1 -b .backup15 +%patch16 -p1 -b .backup16 +%patch17 -p1 -b .backup17 +%patch18 -p1 -b .backup18 +%patch19 -p1 -b .backup19 +%patch20 -p1 -b .backup20 +%patch21 -p1 -b .backup21 +%patch22 -p1 -b .backup22 %build %global _default_pid_dir /run @@ -730,6 +774,19 @@ An extensive functional testsuite for LVM2. %endif %changelog +* Wed Feb 03 2021 Marian Csontos - 2.03.11-3 +- Fix mpath filtering of NVMe devices. +- Check if lvcreate passes read_only_volume_list with tags and skips zeroing. +- Limit pool metadata spare to 16GiB. +- Improves conversion and allocation of pool metadata. +- Fix different limits used for metadata by lvm2 and thin-tools. +- Fix interrupting lvconvert --splitcache command with striped origin volumes. + +* Thu Jan 28 2021 Marian Csontos - 2.03.11-2 +- Fix problem with wiping of converted LVs. +- Fix memleak in scanning. +- Fix corner case allocation for thin-pools. + * Fri Jan 08 2021 Marian Csontos - 2.03.11-1 - Fix pvck handling MDA at offset different from 4096. - Partial or degraded activation of writecache is not allowed.