|
|
04bcc6 |
From 748d1076bf2ab590eb8cff68dba88d78f1f67e9f Mon Sep 17 00:00:00 2001
|
|
|
04bcc6 |
From: David Teigland <teigland@redhat.com>
|
|
|
04bcc6 |
Date: Wed, 24 Jul 2019 11:32:13 -0500
|
|
|
04bcc6 |
Subject: [PATCH 2/2] Fix rounding writes up to sector size
|
|
|
04bcc6 |
|
|
|
04bcc6 |
Do this at two levels, although one would be enough to
|
|
|
04bcc6 |
fix the problem seen recently:
|
|
|
04bcc6 |
|
|
|
04bcc6 |
- Ignore any reported sector size other than 512 of 4096.
|
|
|
04bcc6 |
If either sector size (physical or logical) is reported
|
|
|
04bcc6 |
as 512, then use 512. If neither are reported as 512,
|
|
|
04bcc6 |
and one or the other is reported as 4096, then use 4096.
|
|
|
04bcc6 |
If neither is reported as either 512 or 4096, then use 512.
|
|
|
04bcc6 |
|
|
|
04bcc6 |
- When rounding up a limited write in bcache to be a multiple
|
|
|
04bcc6 |
of the sector size, check that the resulting write size is
|
|
|
04bcc6 |
not larger than the bcache block itself. (This shouldn't
|
|
|
04bcc6 |
happen if the sector size is 512 or 4096.)
|
|
|
04bcc6 |
|
|
|
04bcc6 |
(cherry picked from commit 7550665ba49ac7d497d5b212e14b69298ef01361)
|
|
|
04bcc6 |
|
|
|
04bcc6 |
Conflicts:
|
|
|
04bcc6 |
lib/label/label.c
|
|
|
04bcc6 |
---
|
|
|
04bcc6 |
lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
04bcc6 |
lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
|
|
|
04bcc6 |
lib/device/device.h | 8 +++--
|
|
|
04bcc6 |
lib/label/label.c | 30 +++++++++++++++---
|
|
|
04bcc6 |
4 files changed, 171 insertions(+), 8 deletions(-)
|
|
|
04bcc6 |
|
|
|
04bcc6 |
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
|
|
|
04bcc6 |
index f64931f..423eeb7 100644
|
|
|
04bcc6 |
--- a/lib/device/bcache.c
|
|
|
04bcc6 |
+++ b/lib/device/bcache.c
|
|
|
04bcc6 |
@@ -170,6 +170,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
sector_t offset;
|
|
|
04bcc6 |
sector_t nbytes;
|
|
|
04bcc6 |
sector_t limit_nbytes;
|
|
|
04bcc6 |
+ sector_t orig_nbytes;
|
|
|
04bcc6 |
sector_t extra_nbytes = 0;
|
|
|
04bcc6 |
|
|
|
04bcc6 |
if (((uintptr_t) data) & e->page_mask) {
|
|
|
04bcc6 |
@@ -192,11 +193,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
return false;
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * If the bcache block offset+len goes beyond where lvm is
|
|
|
04bcc6 |
+ * intending to write, then reduce the len being written
|
|
|
04bcc6 |
+ * (which is the bcache block size) so we don't write past
|
|
|
04bcc6 |
+ * the limit set by lvm. If after applying the limit, the
|
|
|
04bcc6 |
+ * resulting size is not a multiple of the sector size (512
|
|
|
04bcc6 |
+ * or 4096) then extend the reduced size to be a multiple of
|
|
|
04bcc6 |
+ * the sector size (we don't want to write partial sectors.)
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
if (offset + nbytes > _last_byte_offset) {
|
|
|
04bcc6 |
limit_nbytes = _last_byte_offset - offset;
|
|
|
04bcc6 |
- if (limit_nbytes % _last_byte_sector_size)
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if (limit_nbytes % _last_byte_sector_size) {
|
|
|
04bcc6 |
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
|
|
04bcc6 |
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
|
|
04bcc6 |
+ * should make the final write size a multiple of the
|
|
|
04bcc6 |
+ * sector size. This should never result in a final size
|
|
|
04bcc6 |
+ * larger than the bcache block size (as long as the bcache
|
|
|
04bcc6 |
+ * block size is a multiple of the sector size).
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
|
|
04bcc6 |
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
|
|
04bcc6 |
+ (unsigned long long)offset,
|
|
|
04bcc6 |
+ (unsigned long long)nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)limit_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)extra_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
04bcc6 |
+ extra_nbytes = 0;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ orig_nbytes = nbytes;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
if (extra_nbytes) {
|
|
|
04bcc6 |
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
|
|
04bcc6 |
(unsigned long long)offset,
|
|
|
04bcc6 |
@@ -211,6 +242,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
(unsigned long long)limit_nbytes);
|
|
|
04bcc6 |
nbytes = limit_nbytes;
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * This shouldn't happen, the reduced+extended
|
|
|
04bcc6 |
+ * nbytes value should never be larger than the
|
|
|
04bcc6 |
+ * bcache block size.
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (nbytes > orig_nbytes) {
|
|
|
04bcc6 |
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
|
|
04bcc6 |
+ (unsigned long long)offset,
|
|
|
04bcc6 |
+ (unsigned long long)orig_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)limit_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)extra_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
04bcc6 |
+ return false;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
|
|
|
04bcc6 |
@@ -405,6 +452,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
uint64_t nbytes = len;
|
|
|
04bcc6 |
sector_t limit_nbytes = 0;
|
|
|
04bcc6 |
sector_t extra_nbytes = 0;
|
|
|
04bcc6 |
+ sector_t orig_nbytes = 0;
|
|
|
04bcc6 |
|
|
|
04bcc6 |
if (offset > _last_byte_offset) {
|
|
|
04bcc6 |
log_error("Limit write at %llu len %llu beyond last byte %llu",
|
|
|
04bcc6 |
@@ -417,9 +465,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
|
|
|
04bcc6 |
if (offset + nbytes > _last_byte_offset) {
|
|
|
04bcc6 |
limit_nbytes = _last_byte_offset - offset;
|
|
|
04bcc6 |
- if (limit_nbytes % _last_byte_sector_size)
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if (limit_nbytes % _last_byte_sector_size) {
|
|
|
04bcc6 |
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
|
|
04bcc6 |
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
|
|
04bcc6 |
+ * should make the final write size a multiple of the
|
|
|
04bcc6 |
+ * sector size. This should never result in a final size
|
|
|
04bcc6 |
+ * larger than the bcache block size (as long as the bcache
|
|
|
04bcc6 |
+ * block size is a multiple of the sector size).
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
|
|
04bcc6 |
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
|
|
04bcc6 |
+ (unsigned long long)offset,
|
|
|
04bcc6 |
+ (unsigned long long)nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)limit_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)extra_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
04bcc6 |
+ extra_nbytes = 0;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ orig_nbytes = nbytes;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
if (extra_nbytes) {
|
|
|
04bcc6 |
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
|
|
04bcc6 |
(unsigned long long)offset,
|
|
|
04bcc6 |
@@ -434,6 +503,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
04bcc6 |
(unsigned long long)limit_nbytes);
|
|
|
04bcc6 |
nbytes = limit_nbytes;
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * This shouldn't happen, the reduced+extended
|
|
|
04bcc6 |
+ * nbytes value should never be larger than the
|
|
|
04bcc6 |
+ * bcache block size.
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (nbytes > orig_nbytes) {
|
|
|
04bcc6 |
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
|
|
04bcc6 |
+ (unsigned long long)offset,
|
|
|
04bcc6 |
+ (unsigned long long)orig_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)limit_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)extra_nbytes,
|
|
|
04bcc6 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
04bcc6 |
+ return false;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
|
|
|
04bcc6 |
where = offset;
|
|
|
04bcc6 |
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
|
|
|
04bcc6 |
index 6996a44..90c54ce 100644
|
|
|
04bcc6 |
--- a/lib/device/dev-io.c
|
|
|
04bcc6 |
+++ b/lib/device/dev-io.c
|
|
|
04bcc6 |
@@ -136,6 +136,58 @@ static int _io(struct device_area *where, char *buffer, int should_write, dev_io
|
|
|
04bcc6 |
return (total == (size_t) where->size);
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
|
|
|
04bcc6 |
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
|
|
04bcc6 |
+ unsigned int *logical_block_size)
|
|
|
04bcc6 |
+{
|
|
|
04bcc6 |
+ int fd = dev->bcache_fd;
|
|
|
04bcc6 |
+ int do_close = 0;
|
|
|
04bcc6 |
+ unsigned int pbs = 0;
|
|
|
04bcc6 |
+ unsigned int lbs = 0;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if (dev->physical_block_size || dev->logical_block_size) {
|
|
|
04bcc6 |
+ *physical_block_size = dev->physical_block_size;
|
|
|
04bcc6 |
+ *logical_block_size = dev->logical_block_size;
|
|
|
04bcc6 |
+ return 1;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if (fd <= 0) {
|
|
|
04bcc6 |
+ if (!dev_open_readonly(dev))
|
|
|
04bcc6 |
+ return 0;
|
|
|
04bcc6 |
+ fd = dev_fd(dev);
|
|
|
04bcc6 |
+ do_close = 1;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
|
|
|
04bcc6 |
+ * "the lowest possible sector size that the hardware can operate on
|
|
|
04bcc6 |
+ * without reverting to read-modify-write operations"
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (ioctl(fd, BLKPBSZGET, &pbs)) {
|
|
|
04bcc6 |
+ stack;
|
|
|
04bcc6 |
+ pbs = 0;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ /*
|
|
|
04bcc6 |
+ * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
|
|
|
04bcc6 |
+ * "the lowest possible block size that the storage device can address."
|
|
|
04bcc6 |
+ */
|
|
|
04bcc6 |
+ if (ioctl(fd, BLKSSZGET, &lbs)) {
|
|
|
04bcc6 |
+ stack;
|
|
|
04bcc6 |
+ lbs = 0;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ dev->physical_block_size = pbs;
|
|
|
04bcc6 |
+ dev->logical_block_size = lbs;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ *physical_block_size = pbs;
|
|
|
04bcc6 |
+ *logical_block_size = lbs;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if (do_close && !dev_close_immediate(dev))
|
|
|
04bcc6 |
+ stack;
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ return 1;
|
|
|
04bcc6 |
+}
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
/*-----------------------------------------------------------------
|
|
|
04bcc6 |
* LVM2 uses O_DIRECT when performing metadata io, which requires
|
|
|
04bcc6 |
* block size aligned accesses. If any io is not aligned we have
|
|
|
04bcc6 |
diff --git a/lib/device/device.h b/lib/device/device.h
|
|
|
04bcc6 |
index bbd965a..aafc401 100644
|
|
|
04bcc6 |
--- a/lib/device/device.h
|
|
|
04bcc6 |
+++ b/lib/device/device.h
|
|
|
04bcc6 |
@@ -67,8 +67,10 @@ struct device {
|
|
|
04bcc6 |
int open_count;
|
|
|
04bcc6 |
int error_count;
|
|
|
04bcc6 |
int max_error_count;
|
|
|
04bcc6 |
- int phys_block_size;
|
|
|
04bcc6 |
- int block_size;
|
|
|
04bcc6 |
+ int phys_block_size; /* From either BLKPBSZGET or BLKSSZGET, don't use */
|
|
|
04bcc6 |
+ int block_size; /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
|
|
|
04bcc6 |
+ int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
|
|
|
04bcc6 |
+ int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */
|
|
|
04bcc6 |
int read_ahead;
|
|
|
04bcc6 |
int bcache_fd;
|
|
|
04bcc6 |
uint32_t flags;
|
|
|
04bcc6 |
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
|
|
|
04bcc6 |
* All io should use these routines.
|
|
|
04bcc6 |
*/
|
|
|
04bcc6 |
int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
|
|
|
04bcc6 |
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
|
|
04bcc6 |
+ unsigned int *logical_block_size);
|
|
|
04bcc6 |
int dev_get_size(struct device *dev, uint64_t *size);
|
|
|
04bcc6 |
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
|
|
|
04bcc6 |
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
|
|
|
04bcc6 |
diff --git a/lib/label/label.c b/lib/label/label.c
|
|
|
04bcc6 |
index 4f8e135..66fb564 100644
|
|
|
04bcc6 |
--- a/lib/label/label.c
|
|
|
04bcc6 |
+++ b/lib/label/label.c
|
|
|
04bcc6 |
@@ -1492,12 +1492,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
|
|
|
04bcc6 |
|
|
|
04bcc6 |
void dev_set_last_byte(struct device *dev, uint64_t offset)
|
|
|
04bcc6 |
{
|
|
|
04bcc6 |
- unsigned int phys_block_size = 0;
|
|
|
04bcc6 |
- unsigned int block_size = 0;
|
|
|
04bcc6 |
+ unsigned int physical_block_size = 0;
|
|
|
04bcc6 |
+ unsigned int logical_block_size = 0;
|
|
|
04bcc6 |
+ unsigned int bs;
|
|
|
04bcc6 |
|
|
|
04bcc6 |
- dev_get_block_size(dev, &phys_block_size, &block_size);
|
|
|
04bcc6 |
+ if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
|
|
|
04bcc6 |
+ stack;
|
|
|
04bcc6 |
+ return; /* FIXME: error path ? */
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
+
|
|
|
04bcc6 |
+ if ((physical_block_size == 512) && (logical_block_size == 512))
|
|
|
04bcc6 |
+ bs = 512;
|
|
|
04bcc6 |
+ else if ((physical_block_size == 4096) && (logical_block_size == 4096))
|
|
|
04bcc6 |
+ bs = 4096;
|
|
|
04bcc6 |
+ else if ((physical_block_size == 512) || (logical_block_size == 512)) {
|
|
|
04bcc6 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
|
|
04bcc6 |
+ physical_block_size, logical_block_size);
|
|
|
04bcc6 |
+ bs = 512;
|
|
|
04bcc6 |
+ } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
|
|
|
04bcc6 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
|
|
|
04bcc6 |
+ physical_block_size, logical_block_size);
|
|
|
04bcc6 |
+ bs = 4096;
|
|
|
04bcc6 |
+ } else {
|
|
|
04bcc6 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
|
|
04bcc6 |
+ physical_block_size, logical_block_size);
|
|
|
04bcc6 |
+ bs = 512;
|
|
|
04bcc6 |
+ }
|
|
|
04bcc6 |
|
|
|
04bcc6 |
- bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
|
|
|
04bcc6 |
+ bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
|
|
|
04bcc6 |
}
|
|
|
04bcc6 |
|
|
|
04bcc6 |
void dev_unset_last_byte(struct device *dev)
|
|
|
04bcc6 |
--
|
|
|
04bcc6 |
1.8.3.1
|
|
|
04bcc6 |
|