mrc0mmand / rpms / lvm2

Forked from rpms/lvm2 2 years ago
Clone

Blame SOURCES/lvm2-2_02_186-Fix-rounding-writes-up-to-sector-size.patch

04bcc6
From 748d1076bf2ab590eb8cff68dba88d78f1f67e9f Mon Sep 17 00:00:00 2001
04bcc6
From: David Teigland <teigland@redhat.com>
04bcc6
Date: Wed, 24 Jul 2019 11:32:13 -0500
04bcc6
Subject: [PATCH 2/2] Fix rounding writes up to sector size
04bcc6
04bcc6
Do this at two levels, although one would be enough to
04bcc6
fix the problem seen recently:
04bcc6
04bcc6
- Ignore any reported sector size other than 512 of 4096.
04bcc6
  If either sector size (physical or logical) is reported
04bcc6
  as 512, then use 512.  If neither are reported as 512,
04bcc6
  and one or the other is reported as 4096, then use 4096.
04bcc6
  If neither is reported as either 512 or 4096, then use 512.
04bcc6
04bcc6
- When rounding up a limited write in bcache to be a multiple
04bcc6
  of the sector size, check that the resulting write size is
04bcc6
  not larger than the bcache block itself.  (This shouldn't
04bcc6
  happen if the sector size is 512 or 4096.)
04bcc6
04bcc6
(cherry picked from commit 7550665ba49ac7d497d5b212e14b69298ef01361)
04bcc6
04bcc6
Conflicts:
04bcc6
	lib/label/label.c
04bcc6
---
04bcc6
 lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
04bcc6
 lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
04bcc6
 lib/device/device.h |  8 +++--
04bcc6
 lib/label/label.c   | 30 +++++++++++++++---
04bcc6
 4 files changed, 171 insertions(+), 8 deletions(-)
04bcc6
04bcc6
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
04bcc6
index f64931f..423eeb7 100644
04bcc6
--- a/lib/device/bcache.c
04bcc6
+++ b/lib/device/bcache.c
04bcc6
@@ -170,6 +170,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 	sector_t offset;
04bcc6
 	sector_t nbytes;
04bcc6
 	sector_t limit_nbytes;
04bcc6
+	sector_t orig_nbytes;
04bcc6
 	sector_t extra_nbytes = 0;
04bcc6
 
04bcc6
 	if (((uintptr_t) data) & e->page_mask) {
04bcc6
@@ -192,11 +193,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 			return false;
04bcc6
 		}
04bcc6
 
04bcc6
+		/*
04bcc6
+		 * If the bcache block offset+len goes beyond where lvm is
04bcc6
+		 * intending to write, then reduce the len being written
04bcc6
+		 * (which is the bcache block size) so we don't write past
04bcc6
+		 * the limit set by lvm.  If after applying the limit, the
04bcc6
+		 * resulting size is not a multiple of the sector size (512
04bcc6
+		 * or 4096) then extend the reduced size to be a multiple of
04bcc6
+		 * the sector size (we don't want to write partial sectors.)
04bcc6
+		 */
04bcc6
 		if (offset + nbytes > _last_byte_offset) {
04bcc6
 			limit_nbytes = _last_byte_offset - offset;
04bcc6
-			if (limit_nbytes % _last_byte_sector_size)
04bcc6
+
04bcc6
+			if (limit_nbytes % _last_byte_sector_size) {
04bcc6
 				extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
04bcc6
 
04bcc6
+				/*
04bcc6
+				 * adding extra_nbytes to the reduced nbytes (limit_nbytes)
04bcc6
+				 * should make the final write size a multiple of the
04bcc6
+				 * sector size.  This should never result in a final size
04bcc6
+				 * larger than the bcache block size (as long as the bcache
04bcc6
+				 * block size is a multiple of the sector size).
04bcc6
+				 */
04bcc6
+				if (limit_nbytes + extra_nbytes > nbytes) {
04bcc6
+					log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
04bcc6
+						 (unsigned long long)offset,
04bcc6
+						 (unsigned long long)nbytes,
04bcc6
+						 (unsigned long long)limit_nbytes,
04bcc6
+						 (unsigned long long)extra_nbytes,
04bcc6
+						 (unsigned long long)_last_byte_sector_size);
04bcc6
+					extra_nbytes = 0;
04bcc6
+				}
04bcc6
+			}
04bcc6
+
04bcc6
+			orig_nbytes = nbytes;
04bcc6
+
04bcc6
 			if (extra_nbytes) {
04bcc6
 				log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
04bcc6
 					  (unsigned long long)offset,
04bcc6
@@ -211,6 +242,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 					  (unsigned long long)limit_nbytes);
04bcc6
 				nbytes = limit_nbytes;
04bcc6
 			}
04bcc6
+
04bcc6
+			/*
04bcc6
+			 * This shouldn't happen, the reduced+extended
04bcc6
+			 * nbytes value should never be larger than the
04bcc6
+			 * bcache block size.
04bcc6
+			 */
04bcc6
+			if (nbytes > orig_nbytes) {
04bcc6
+				log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
04bcc6
+					  (unsigned long long)offset,
04bcc6
+					  (unsigned long long)orig_nbytes,
04bcc6
+					  (unsigned long long)nbytes,
04bcc6
+					  (unsigned long long)limit_nbytes,
04bcc6
+					  (unsigned long long)extra_nbytes,
04bcc6
+					  (unsigned long long)_last_byte_sector_size);
04bcc6
+				return false;
04bcc6
+			}
04bcc6
 		}
04bcc6
 	}
04bcc6
 
04bcc6
@@ -405,6 +452,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 		uint64_t nbytes = len;
04bcc6
 		sector_t limit_nbytes = 0;
04bcc6
 		sector_t extra_nbytes = 0;
04bcc6
+		sector_t orig_nbytes = 0;
04bcc6
 
04bcc6
 		if (offset > _last_byte_offset) {
04bcc6
 			log_error("Limit write at %llu len %llu beyond last byte %llu",
04bcc6
@@ -417,9 +465,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 
04bcc6
 		if (offset + nbytes > _last_byte_offset) {
04bcc6
 			limit_nbytes = _last_byte_offset - offset;
04bcc6
-			if (limit_nbytes % _last_byte_sector_size)
04bcc6
+
04bcc6
+			if (limit_nbytes % _last_byte_sector_size) {
04bcc6
 				extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
04bcc6
 
04bcc6
+				/*
04bcc6
+				 * adding extra_nbytes to the reduced nbytes (limit_nbytes)
04bcc6
+				 * should make the final write size a multiple of the
04bcc6
+				 * sector size.  This should never result in a final size
04bcc6
+				 * larger than the bcache block size (as long as the bcache
04bcc6
+				 * block size is a multiple of the sector size).
04bcc6
+				 */
04bcc6
+				if (limit_nbytes + extra_nbytes > nbytes) {
04bcc6
+					log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
04bcc6
+						 (unsigned long long)offset,
04bcc6
+						 (unsigned long long)nbytes,
04bcc6
+						 (unsigned long long)limit_nbytes,
04bcc6
+						 (unsigned long long)extra_nbytes,
04bcc6
+						 (unsigned long long)_last_byte_sector_size);
04bcc6
+					extra_nbytes = 0;
04bcc6
+				}
04bcc6
+			}
04bcc6
+
04bcc6
+			orig_nbytes = nbytes;
04bcc6
+
04bcc6
 			if (extra_nbytes) {
04bcc6
 				log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
04bcc6
 					  (unsigned long long)offset,
04bcc6
@@ -434,6 +503,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
04bcc6
 					  (unsigned long long)limit_nbytes);
04bcc6
 				nbytes = limit_nbytes;
04bcc6
 			}
04bcc6
+
04bcc6
+			/*
04bcc6
+			 * This shouldn't happen, the reduced+extended
04bcc6
+			 * nbytes value should never be larger than the
04bcc6
+			 * bcache block size.
04bcc6
+			 */
04bcc6
+			if (nbytes > orig_nbytes) {
04bcc6
+				log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
04bcc6
+					  (unsigned long long)offset,
04bcc6
+					  (unsigned long long)orig_nbytes,
04bcc6
+					  (unsigned long long)nbytes,
04bcc6
+					  (unsigned long long)limit_nbytes,
04bcc6
+					  (unsigned long long)extra_nbytes,
04bcc6
+					  (unsigned long long)_last_byte_sector_size);
04bcc6
+				return false;
04bcc6
+			}
04bcc6
 		}
04bcc6
 
04bcc6
 		where = offset;
04bcc6
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
04bcc6
index 6996a44..90c54ce 100644
04bcc6
--- a/lib/device/dev-io.c
04bcc6
+++ b/lib/device/dev-io.c
04bcc6
@@ -136,6 +136,58 @@ static int _io(struct device_area *where, char *buffer, int should_write, dev_io
04bcc6
 	return (total == (size_t) where->size);
04bcc6
 }
04bcc6
 
04bcc6
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
04bcc6
+				unsigned int *logical_block_size)
04bcc6
+{
04bcc6
+	int fd = dev->bcache_fd;
04bcc6
+	int do_close = 0;
04bcc6
+	unsigned int pbs = 0;
04bcc6
+	unsigned int lbs = 0;
04bcc6
+
04bcc6
+	if (dev->physical_block_size || dev->logical_block_size) {
04bcc6
+		*physical_block_size = dev->physical_block_size;
04bcc6
+		*logical_block_size = dev->logical_block_size;
04bcc6
+		return 1;
04bcc6
+	}
04bcc6
+
04bcc6
+	if (fd <= 0) {
04bcc6
+		if (!dev_open_readonly(dev))
04bcc6
+			return 0;
04bcc6
+		fd = dev_fd(dev);
04bcc6
+		do_close = 1;
04bcc6
+	}
04bcc6
+
04bcc6
+	/*
04bcc6
+	 * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
04bcc6
+	 * "the lowest possible sector size that the hardware can operate on
04bcc6
+	 * without reverting to read-modify-write operations"
04bcc6
+	 */
04bcc6
+	if (ioctl(fd, BLKPBSZGET, &pbs)) {
04bcc6
+		stack;
04bcc6
+		pbs = 0;
04bcc6
+	}
04bcc6
+
04bcc6
+	/*
04bcc6
+	 * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
04bcc6
+	 * "the lowest possible block size that the storage device can address."
04bcc6
+	 */
04bcc6
+	if (ioctl(fd, BLKSSZGET, &lbs)) {
04bcc6
+		stack;
04bcc6
+		lbs = 0;
04bcc6
+	}
04bcc6
+
04bcc6
+	dev->physical_block_size = pbs;
04bcc6
+	dev->logical_block_size = lbs;
04bcc6
+
04bcc6
+	*physical_block_size = pbs;
04bcc6
+	*logical_block_size = lbs;
04bcc6
+
04bcc6
+	if (do_close && !dev_close_immediate(dev))
04bcc6
+		stack;
04bcc6
+
04bcc6
+	return 1;
04bcc6
+}
04bcc6
+
04bcc6
 /*-----------------------------------------------------------------
04bcc6
  * LVM2 uses O_DIRECT when performing metadata io, which requires
04bcc6
  * block size aligned accesses.  If any io is not aligned we have
04bcc6
diff --git a/lib/device/device.h b/lib/device/device.h
04bcc6
index bbd965a..aafc401 100644
04bcc6
--- a/lib/device/device.h
04bcc6
+++ b/lib/device/device.h
04bcc6
@@ -67,8 +67,10 @@ struct device {
04bcc6
 	int open_count;
04bcc6
 	int error_count;
04bcc6
 	int max_error_count;
04bcc6
-	int phys_block_size;
04bcc6
-	int block_size;
04bcc6
+	int phys_block_size;     /* From either BLKPBSZGET or BLKSSZGET, don't use */
04bcc6
+	int block_size;          /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
04bcc6
+	int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
04bcc6
+	int logical_block_size;  /* From BLKSSZGET: lowest possible block size that the storage device can address */
04bcc6
 	int read_ahead;
04bcc6
 	int bcache_fd;
04bcc6
 	uint32_t flags;
04bcc6
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
04bcc6
  * All io should use these routines.
04bcc6
  */
04bcc6
 int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
04bcc6
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
04bcc6
+                               unsigned int *logical_block_size);
04bcc6
 int dev_get_size(struct device *dev, uint64_t *size);
04bcc6
 int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
04bcc6
 int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
04bcc6
diff --git a/lib/label/label.c b/lib/label/label.c
04bcc6
index 4f8e135..66fb564 100644
04bcc6
--- a/lib/label/label.c
04bcc6
+++ b/lib/label/label.c
04bcc6
@@ -1492,12 +1492,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
04bcc6
 
04bcc6
 void dev_set_last_byte(struct device *dev, uint64_t offset)
04bcc6
 {
04bcc6
-	unsigned int phys_block_size = 0;
04bcc6
-	unsigned int block_size = 0;
04bcc6
+	unsigned int physical_block_size = 0;
04bcc6
+	unsigned int logical_block_size = 0;
04bcc6
+	unsigned int bs;
04bcc6
 
04bcc6
-	dev_get_block_size(dev, &phys_block_size, &block_size);
04bcc6
+	if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
04bcc6
+		stack;
04bcc6
+		return; /* FIXME: error path ? */
04bcc6
+	}
04bcc6
+
04bcc6
+	if ((physical_block_size == 512) && (logical_block_size == 512))
04bcc6
+		bs = 512;
04bcc6
+	else if ((physical_block_size == 4096) && (logical_block_size == 4096))
04bcc6
+		bs = 4096;
04bcc6
+	else if ((physical_block_size == 512) || (logical_block_size == 512)) {
04bcc6
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
04bcc6
+			  physical_block_size, logical_block_size);
04bcc6
+		bs = 512;
04bcc6
+	} else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
04bcc6
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
04bcc6
+			  physical_block_size, logical_block_size);
04bcc6
+		bs = 4096;
04bcc6
+	} else {
04bcc6
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
04bcc6
+			  physical_block_size, logical_block_size);
04bcc6
+		bs = 512;
04bcc6
+	}
04bcc6
 
04bcc6
-	bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
04bcc6
+	bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
04bcc6
 }
04bcc6
 
04bcc6
 void dev_unset_last_byte(struct device *dev)
04bcc6
-- 
04bcc6
1.8.3.1
04bcc6