mrc0mmand / rpms / lvm2

Forked from rpms/lvm2 2 years ago
Clone

Blame SOURCES/lvm2-2_02_182-scan-use-full-md-filter-when-md-1.0-devices-are-pres.patch

3a5d46
 lib/cache/lvmcache.c               |  2 +-
3a5d46
 lib/device/dev-md.c                | 27 ++++++++++----
3a5d46
 lib/device/dev-type.h              |  1 +
3a5d46
 lib/filters/filter-md.c            | 74 +++++++++++++++++++-------------------
3a5d46
 lib/label/label.c                  | 14 ++++++++
3a5d46
 test/shell/pvcreate-md-fake-hdr.sh |  3 +-
3a5d46
 6 files changed, 75 insertions(+), 46 deletions(-)
3a5d46
3a5d46
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
3a5d46
index 2fba3ff..f55a14c 100644
3a5d46
--- a/lib/cache/lvmcache.c
3a5d46
+++ b/lib/cache/lvmcache.c
3a5d46
@@ -1002,7 +1002,7 @@ int lvmcache_dev_is_unchosen_duplicate(struct device *dev)
3a5d46
  * unused_duplicate_devs list, and restrict what we allow done with it.
3a5d46
  *
3a5d46
  * In the case of md components, we usually filter these out in filter-md,
3a5d46
- * but in the special case of md superblocks <= 1.0 where the superblock
3a5d46
+ * but in the special case of md superblock version 1.0 where the superblock
3a5d46
  * is at the end of the device, filter-md doesn't always eliminate them
3a5d46
  * first, so we eliminate them here.
3a5d46
  *
3a5d46
diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c
3a5d46
index f5a736f..7196dc0 100644
3a5d46
--- a/lib/device/dev-md.c
3a5d46
+++ b/lib/device/dev-md.c
3a5d46
@@ -142,13 +142,6 @@ static int _native_dev_is_md(struct device *dev, uint64_t *offset_found, int ful
3a5d46
 	 * command if it should do a full check (cmd->use_full_md_check),
3a5d46
 	 * and set it for commands that could possibly write to an md dev
3a5d46
 	 * (pvcreate/vgcreate/vgextend).
3a5d46
-	 *
3a5d46
-	 * For old md versions with magic numbers at the end of devices,
3a5d46
-	 * the md dev components won't be filtered out here when full is 0,
3a5d46
-	 * so they will be scanned, and appear as duplicate PVs in lvmcache.
3a5d46
-	 * The md device itself will be chosen as the primary duplicate,
3a5d46
-	 * and the components are dropped from the list of duplicates in,
3a5d46
-	 * i.e. a kind of post-scan filtering.
3a5d46
 	 */
3a5d46
 	if (!full) {
3a5d46
 		sb_offset = 0;
3a5d46
@@ -414,6 +407,26 @@ unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev)
3a5d46
 	return stripe_width_sectors;
3a5d46
 }
3a5d46
 
3a5d46
+int dev_is_md_with_end_superblock(struct dev_types *dt, struct device *dev)
3a5d46
+{
3a5d46
+	char version_string[MD_MAX_SYSFS_SIZE];
3a5d46
+	const char *attribute = "metadata_version";
3a5d46
+
3a5d46
+	if (MAJOR(dev->dev) != dt->md_major)
3a5d46
+		return 0;
3a5d46
+
3a5d46
+	if (_md_sysfs_attribute_scanf(dt, dev, attribute,
3a5d46
+				      "%s", &version_string) != 1)
3a5d46
+		return -1;
3a5d46
+
3a5d46
+	log_very_verbose("Device %s %s is %s.",
3a5d46
+			 dev_name(dev), attribute, version_string);
3a5d46
+
3a5d46
+	if (!strcmp(version_string, "1.0"))
3a5d46
+		return 1;
3a5d46
+	return 0;
3a5d46
+}
3a5d46
+
3a5d46
 #else
3a5d46
 
3a5d46
 int dev_is_md(struct device *dev __attribute__((unused)),
3a5d46
diff --git a/lib/device/dev-type.h b/lib/device/dev-type.h
3a5d46
index 843e254..f629a02 100644
3a5d46
--- a/lib/device/dev-type.h
3a5d46
+++ b/lib/device/dev-type.h
3a5d46
@@ -76,6 +76,7 @@ int wipe_known_signatures(struct cmd_context *cmd, struct device *dev, const cha
3a5d46
 
3a5d46
 /* Type-specific device properties */
3a5d46
 unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev);
3a5d46
+int dev_is_md_with_end_superblock(struct dev_types *dt, struct device *dev);
3a5d46
 
3a5d46
 /* Partitioning */
3a5d46
 int major_max_partitions(struct dev_types *dt, int major);
3a5d46
diff --git a/lib/filters/filter-md.c b/lib/filters/filter-md.c
3a5d46
index ab97b59..ad5b8e4 100644
3a5d46
--- a/lib/filters/filter-md.c
3a5d46
+++ b/lib/filters/filter-md.c
3a5d46
@@ -29,43 +29,43 @@
3a5d46
  *
3a5d46
  * (This is assuming lvm.conf md_component_detection=1.)
3a5d46
  *
3a5d46
- * If lvm does *not* ignore the components, then lvm will read lvm
3a5d46
- * labels from the md dev and from the component devs, and will see
3a5d46
- * them all as duplicates of each other.  LVM duplicate resolution
3a5d46
- * will then kick in and keep the md dev around to use and ignore
3a5d46
- * the components.
3a5d46
- *
3a5d46
- * It is better to exclude the components as early as possible during
3a5d46
- * lvm processing, ideally before lvm even looks for labels on the
3a5d46
- * components, so that duplicate resolution can be avoided.  There are
3a5d46
- * a number of ways that md components can be excluded earlier than
3a5d46
- * the duplicate resolution phase:
3a5d46
- *
3a5d46
- * - When external_device_info_source="udev", lvm discovers a device is
3a5d46
- *   an md component by asking udev during the initial filtering phase.
3a5d46
- *   However, lvm's default is to not use udev for this.  The
3a5d46
- *   alternative is "native" detection in which lvm tries to detect
3a5d46
- *   md components itself.
3a5d46
- *
3a5d46
- * - When using native detection, lvm's md filter looks for the md
3a5d46
- *   superblock at the start of devices.  It will see the md superblock
3a5d46
- *   on the components, exclude them in the md filter, and avoid
3a5d46
- *   handling them later in duplicate resolution.
3a5d46
- *
3a5d46
- * - When using native detection, lvm's md filter will not detect
3a5d46
- *   components when the md device has an older superblock version that
3a5d46
- *   places the superblock at the end of the device.  This case will
3a5d46
- *   fall back to duplicate resolution to exclude components.
3a5d46
- *
3a5d46
- * A variation of the description above occurs for lvm commands that
3a5d46
- * intend to create new PVs on devices (pvcreate, vgcreate, vgextend).
3a5d46
- * For these commands, the native md filter also reads the end of all
3a5d46
- * devices to check for the odd md superblocks.
3a5d46
- *
3a5d46
- * (The reason that external_device_info_source is not set to udev by
3a5d46
- * default is that there have be issues with udev not being promptly
3a5d46
- * or reliably updated about md state changes, causing the udev info
3a5d46
- * that lvm uses to be occasionally wrong.)
3a5d46
+ * If lvm does *not* ignore the components, then lvm may read lvm
3a5d46
+ * labels from the component devs and potentially the md dev,
3a5d46
+ * which can trigger duplicate detection, and/or cause lvm to display
3a5d46
+ * md components as PVs rather than ignoring them.
3a5d46
+ *
3a5d46
+ * If scanning md componenents causes duplicates to be seen, then
3a5d46
+ * the lvm duplicate resolution will exclude the components.
3a5d46
+ *
3a5d46
+ * The lvm md filter has three modes:
3a5d46
+ *
3a5d46
+ * 1. look for md superblock at the start of the device
3a5d46
+ * 2. look for md superblock at the start and end of the device
3a5d46
+ * 3. use udev to detect components
3a5d46
+ *
3a5d46
+ * mode 1 will not detect and exclude components of md devices
3a5d46
+ * that use superblock version 1.0 which is at the end of the device.
3a5d46
+ *
3a5d46
+ * mode 2 will detect these, but mode 2 doubles the i/o done by label
3a5d46
+ * scan, since there's a read at both the start and end of every device.
3a5d46
+ *
3a5d46
+ * mode 3 is used when external_device_info_source="udev".  It does
3a5d46
+ * not require any io from lvm, but this mode is not used by default
3a5d46
+ * because there have been problems getting reliable info from udev.
3a5d46
+ *
3a5d46
+ * lvm uses mode 2 when:
3a5d46
+ *
3a5d46
+ * - the command is pvcreate/vgcreate/vgextend, which format new
3a5d46
+ *   devices, and if the user ran these commands on a component
3a5d46
+ *   device of an md device 1.0, then it would cause problems.
3a5d46
+ *   FIXME: this would only really need to scan the end of the
3a5d46
+ *   devices being formatted, not all devices.
3a5d46
+ *
3a5d46
+ * - it sees an md device on the system using version 1.0.
3a5d46
+ *   The point of this is just to avoid displaying md components
3a5d46
+ *   from the 'pvs' command.
3a5d46
+ *   FIXME: the cost (double i/o) may not be worth the benefit
3a5d46
+ *   (not showing md components).
3a5d46
  */
3a5d46
 
3a5d46
 /*
3a5d46
diff --git a/lib/label/label.c b/lib/label/label.c
3a5d46
index e7e3997..6fb35ff 100644
3a5d46
--- a/lib/label/label.c
3a5d46
+++ b/lib/label/label.c
3a5d46
@@ -872,6 +872,20 @@ int label_scan(struct cmd_context *cmd)
3a5d46
 			bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
3a5d46
 			_scan_dev_close(dev);
3a5d46
 		}
3a5d46
+
3a5d46
+		/*
3a5d46
+		 * When md devices exist that use the old superblock at the
3a5d46
+		 * end of the device, then in order to detect and filter out
3a5d46
+		 * the component devices of those md devs, we need to enable
3a5d46
+		 * the full md filter which scans both the start and the end
3a5d46
+		 * of every device.  This doubles the amount of scanning i/o,
3a5d46
+		 * which we want to avoid.  FIXME: it may not be worth the
3a5d46
+		 * cost of double i/o just to avoid displaying md component
3a5d46
+		 * devs in 'pvs', which is a pretty harmless effect from a
3a5d46
+		 * pretty uncommon situation.
3a5d46
+		 */
3a5d46
+		if (dev_is_md_with_end_superblock(cmd->dev_types, dev))
3a5d46
+			cmd->use_full_md_check = 1;
3a5d46
 	};
3a5d46
 	dev_iter_destroy(iter);
3a5d46
 
3a5d46
diff --git a/test/shell/pvcreate-md-fake-hdr.sh b/test/shell/pvcreate-md-fake-hdr.sh
3a5d46
index b89fe43..4c9ac7c 100644
3a5d46
--- a/test/shell/pvcreate-md-fake-hdr.sh
3a5d46
+++ b/test/shell/pvcreate-md-fake-hdr.sh
3a5d46
@@ -89,6 +89,7 @@ sleep 1
3a5d46
 # (when mdadm supports repair)
3a5d46
 if mdadm --action=repair "$mddev" ; then
3a5d46
 	sleep 1
3a5d46
+	pvscan -vvvv
3a5d46
 	# should be showing correctly PV3 & PV4
3a5d46
-	pvs
3a5d46
+	pvs -vvvv "$dev3" "$dev4"
3a5d46
 fi