Blame SOURCES/0032-devices-file-do-not-clear-PVID-of-unread-devices.patch

ab86b0
From 08a5619a1d7a5a8dd6e0df6e4dedec47ce2533b7 Mon Sep 17 00:00:00 2001
ab86b0
From: David Teigland <teigland@redhat.com>
ab86b0
Date: Thu, 10 Feb 2022 14:00:25 -0600
ab86b0
Subject: [PATCH 32/54] devices file: do not clear PVID of unread devices
ab86b0
ab86b0
In a certain disconnected state, a block device is present on
ab86b0
the system, can be opened, reports a valid size, reports the
ab86b0
correct device id (wwid), and matches a devices file entry.
ab86b0
But, reading the device can still fail.  In this case,
ab86b0
device_ids_validate() was misinterpreting the read error as
ab86b0
the device having no data/label on it (and no PVID).
ab86b0
The validate function would then clear the PVID from the
ab86b0
devices file entry for the device, thinking that it was
ab86b0
fixing the devices file (making it consistent with the on disk
ab86b0
state.)  Fix this by not attempting to check and correct a
ab86b0
devices file entry that cannot be read.  Also make this case
ab86b0
explicit in the hints validation code (which was doing the
ab86b0
right thing but indirectly.)
ab86b0
---
ab86b0
 lib/device/device.h    |  1 +
ab86b0
 lib/device/device_id.c | 14 ++++++++++++++
ab86b0
 lib/label/hints.c      | 14 ++++++++++++++
ab86b0
 lib/label/label.c      |  8 ++++++++
ab86b0
 4 files changed, 37 insertions(+)
ab86b0
ab86b0
diff --git a/lib/device/device.h b/lib/device/device.h
ab86b0
index 9e471a9b5..8c3a8c30e 100644
ab86b0
--- a/lib/device/device.h
ab86b0
+++ b/lib/device/device.h
ab86b0
@@ -40,6 +40,7 @@
ab86b0
 #define DEV_IS_NVME		0x00040000	/* set if dev is nvme */
ab86b0
 #define DEV_MATCHED_USE_ID	0x00080000	/* matched an entry from cmd->use_devices */
ab86b0
 #define DEV_SCAN_FOUND_NOLABEL	0x00100000	/* label_scan read, passed filters, but no lvm label */
ab86b0
+#define DEV_SCAN_NOT_READ	0x00200000	/* label_scan not able to read dev */
ab86b0
 
ab86b0
 /*
ab86b0
  * Support for external device info.
ab86b0
diff --git a/lib/device/device_id.c b/lib/device/device_id.c
ab86b0
index aeaa1ffc6..7fe581571 100644
ab86b0
--- a/lib/device/device_id.c
ab86b0
+++ b/lib/device/device_id.c
ab86b0
@@ -1724,6 +1724,13 @@ void device_ids_validate(struct cmd_context *cmd, struct dm_list *scanned_devs,
ab86b0
 		if (scanned_devs && !dev_in_device_list(dev, scanned_devs))
ab86b0
 			continue;
ab86b0
 
ab86b0
+		/*
ab86b0
+		 * The matched device could not be read so we do not have
ab86b0
+		 * the PVID from disk and cannot verify the devices file entry.
ab86b0
+		 */
ab86b0
+		if (dev->flags & DEV_SCAN_NOT_READ)
ab86b0
+			continue;
ab86b0
+
ab86b0
 		/*
ab86b0
 		 * du and dev may have been matched, but the dev could still
ab86b0
 		 * have been excluded by other filters during label scan.
ab86b0
@@ -1806,6 +1813,13 @@ void device_ids_validate(struct cmd_context *cmd, struct dm_list *scanned_devs,
ab86b0
 		if (scanned_devs && !dev_in_device_list(dev, scanned_devs))
ab86b0
 			continue;
ab86b0
 
ab86b0
+		/*
ab86b0
+		 * The matched device could not be read so we do not have
ab86b0
+		 * the PVID from disk and cannot verify the devices file entry.
ab86b0
+		 */
ab86b0
+		if (dev->flags & DEV_SCAN_NOT_READ)
ab86b0
+			continue;
ab86b0
+
ab86b0
 		if (!cmd->filter->passes_filter(cmd, cmd->filter, dev, "persistent")) {
ab86b0
 			log_warn("Devices file %s is excluded by filter: %s.",
ab86b0
 				 dev_name(dev), dev_filtered_reason(dev));
ab86b0
diff --git a/lib/label/hints.c b/lib/label/hints.c
ab86b0
index 3ce9634f2..95d5d77b8 100644
ab86b0
--- a/lib/label/hints.c
ab86b0
+++ b/lib/label/hints.c
ab86b0
@@ -234,6 +234,7 @@ static int _touch_newhints(void)
ab86b0
 		return_0;
ab86b0
 	if (fclose(fp))
ab86b0
 		stack;
ab86b0
+	log_debug("newhints created");
ab86b0
 	return 1;
ab86b0
 }
ab86b0
 
ab86b0
@@ -504,6 +505,19 @@ int validate_hints(struct cmd_context *cmd, struct dm_list *hints)
ab86b0
 		if (!hint->chosen)
ab86b0
 			continue;
ab86b0
 
ab86b0
+		/* 
ab86b0
+		 * label_scan was unable to read the dev so we don't know its pvid.
ab86b0
+		 * Since we are unable to verify the hint is correct, it's possible
ab86b0
+		 * that the PVID is actually found on a different device, so don't
ab86b0
+		 * depend on hints. (This would also fail the following pvid check.)
ab86b0
+		 */
ab86b0
+		if (dev->flags & DEV_SCAN_NOT_READ) {
ab86b0
+			log_debug("Uncertain hint for unread device %d:%d %s",
ab86b0
+				  major(hint->devt), minor(hint->devt), dev_name(dev));
ab86b0
+			ret = 0;
ab86b0
+			continue;
ab86b0
+		}
ab86b0
+
ab86b0
 		if (strcmp(dev->pvid, hint->pvid)) {
ab86b0
 			log_debug("Invalid hint device %d:%d %s pvid %s had hint pvid %s",
ab86b0
 				  major(hint->devt), minor(hint->devt), dev_name(dev),
ab86b0
diff --git a/lib/label/label.c b/lib/label/label.c
ab86b0
index 9fac3e464..354ab35e2 100644
ab86b0
--- a/lib/label/label.c
ab86b0
+++ b/lib/label/label.c
ab86b0
@@ -686,6 +686,8 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
ab86b0
 
ab86b0
 	dm_list_iterate_items_safe(devl, devl2, devs) {
ab86b0
 
ab86b0
+		devl->dev->flags &= ~DEV_SCAN_NOT_READ;
ab86b0
+
ab86b0
 		/*
ab86b0
 		 * If we prefetch more devs than blocks in the cache, then the
ab86b0
 		 * cache will wait for earlier reads to complete, toss the
ab86b0
@@ -701,6 +703,7 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
ab86b0
 				log_debug_devs("Scan failed to open %s.", dev_name(devl->dev));
ab86b0
 				dm_list_del(&devl->list);
ab86b0
 				dm_list_add(&reopen_devs, &devl->list);
ab86b0
+				devl->dev->flags |= DEV_SCAN_NOT_READ;
ab86b0
 				continue;
ab86b0
 			}
ab86b0
 		}
ab86b0
@@ -724,6 +727,7 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
ab86b0
 			log_debug_devs("Scan failed to read %s.", dev_name(devl->dev));
ab86b0
 			scan_read_errors++;
ab86b0
 			scan_failed_count++;
ab86b0
+			devl->dev->flags |= DEV_SCAN_NOT_READ;
ab86b0
 			lvmcache_del_dev(devl->dev);
ab86b0
 			if (bb)
ab86b0
 				bcache_put(bb);
ab86b0
@@ -1113,6 +1117,10 @@ int label_scan(struct cmd_context *cmd)
ab86b0
 	 * filter", and this result needs to be cleared (wiped) so that the
ab86b0
 	 * complete set of filters (including those that require data) can be
ab86b0
 	 * checked in _process_block, where headers have been read.
ab86b0
+	 *
ab86b0
+	 * FIXME: devs that are filtered with data in _process_block
ab86b0
+	 * are not moved to the filtered_devs list like devs filtered
ab86b0
+	 * here without data.  Does that have any effect?
ab86b0
 	 */
ab86b0
 	log_debug_devs("Filtering devices to scan (nodata)");
ab86b0
 
ab86b0
-- 
ab86b0
2.34.3
ab86b0