lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++-- lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++ lib/device/device.h | 8 +++-- lib/label/label.c | 30 ++++++++++++++---- 4 files changed, 169 insertions(+), 10 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 7b09353..04fbf35 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -169,6 +169,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd, sector_t offset; sector_t nbytes; sector_t limit_nbytes; + sector_t orig_nbytes; sector_t extra_nbytes = 0; if (((uintptr_t) data) & e->page_mask) { @@ -191,11 +192,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd, return false; } + /* + * If the bcache block offset+len goes beyond where lvm is + * intending to write, then reduce the len being written + * (which is the bcache block size) so we don't write past + * the limit set by lvm. If after applying the limit, the + * resulting size is not a multiple of the sector size (512 + * or 4096) then extend the reduced size to be a multiple of + * the sector size (we don't want to write partial sectors.) + */ if (offset + nbytes > _last_byte_offset) { limit_nbytes = _last_byte_offset - offset; - if (limit_nbytes % _last_byte_sector_size) + + if (limit_nbytes % _last_byte_sector_size) { extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size); + /* + * adding extra_nbytes to the reduced nbytes (limit_nbytes) + * should make the final write size a multiple of the + * sector size. This should never result in a final size + * larger than the bcache block size (as long as the bcache + * block size is a multiple of the sector size). + */ + if (limit_nbytes + extra_nbytes > nbytes) { + log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu", + (unsigned long long)offset, + (unsigned long long)nbytes, + (unsigned long long)limit_nbytes, + (unsigned long long)extra_nbytes, + (unsigned long long)_last_byte_sector_size); + extra_nbytes = 0; + } + } + + orig_nbytes = nbytes; + if (extra_nbytes) { log_debug("Limit write at %llu len %llu to len %llu rounded to %llu", (unsigned long long)offset, @@ -210,6 +241,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd, (unsigned long long)limit_nbytes); nbytes = limit_nbytes; } + + /* + * This shouldn't happen, the reduced+extended + * nbytes value should never be larger than the + * bcache block size. + */ + if (nbytes > orig_nbytes) { + log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu", + (unsigned long long)offset, + (unsigned long long)orig_nbytes, + (unsigned long long)nbytes, + (unsigned long long)limit_nbytes, + (unsigned long long)extra_nbytes, + (unsigned long long)_last_byte_sector_size); + return false; + } } } @@ -403,6 +450,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd, uint64_t nbytes = len; sector_t limit_nbytes = 0; sector_t extra_nbytes = 0; + sector_t orig_nbytes = 0; if (offset > _last_byte_offset) { log_error("Limit write at %llu len %llu beyond last byte %llu", @@ -415,9 +463,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd, if (offset + nbytes > _last_byte_offset) { limit_nbytes = _last_byte_offset - offset; - if (limit_nbytes % _last_byte_sector_size) + + if (limit_nbytes % _last_byte_sector_size) { extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size); + /* + * adding extra_nbytes to the reduced nbytes (limit_nbytes) + * should make the final write size a multiple of the + * sector size. This should never result in a final size + * larger than the bcache block size (as long as the bcache + * block size is a multiple of the sector size). + */ + if (limit_nbytes + extra_nbytes > nbytes) { + log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu", + (unsigned long long)offset, + (unsigned long long)nbytes, + (unsigned long long)limit_nbytes, + (unsigned long long)extra_nbytes, + (unsigned long long)_last_byte_sector_size); + extra_nbytes = 0; + } + } + + orig_nbytes = nbytes; + if (extra_nbytes) { log_debug("Limit write at %llu len %llu to len %llu rounded to %llu", (unsigned long long)offset, @@ -432,6 +501,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd, (unsigned long long)limit_nbytes); nbytes = limit_nbytes; } + + /* + * This shouldn't happen, the reduced+extended + * nbytes value should never be larger than the + * bcache block size. + */ + if (nbytes > orig_nbytes) { + log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu", + (unsigned long long)offset, + (unsigned long long)orig_nbytes, + (unsigned long long)nbytes, + (unsigned long long)limit_nbytes, + (unsigned long long)extra_nbytes, + (unsigned long long)_last_byte_sector_size); + return false; + } } where = offset; diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c index 3fe2647..5fa0b7a 100644 --- a/lib/device/dev-io.c +++ b/lib/device/dev-io.c @@ -250,6 +250,58 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64 return 1; } +int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size, + unsigned int *logical_block_size) +{ + int fd = dev->bcache_fd; + int do_close = 0; + unsigned int pbs = 0; + unsigned int lbs = 0; + + if (dev->physical_block_size || dev->logical_block_size) { + *physical_block_size = dev->physical_block_size; + *logical_block_size = dev->logical_block_size; + return 1; + } + + if (fd <= 0) { + if (!dev_open_readonly(dev)) + return 0; + fd = dev_fd(dev); + do_close = 1; + } + + /* + * BLKPBSZGET from kernel comment for blk_queue_physical_block_size: + * "the lowest possible sector size that the hardware can operate on + * without reverting to read-modify-write operations" + */ + if (ioctl(fd, BLKPBSZGET, &pbs)) { + stack; + pbs = 0; + } + + /* + * BLKSSZGET from kernel comment for blk_queue_logical_block_size: + * "the lowest possible block size that the storage device can address." + */ + if (ioctl(fd, BLKSSZGET, &lbs)) { + stack; + lbs = 0; + } + + dev->physical_block_size = pbs; + dev->logical_block_size = lbs; + + *physical_block_size = pbs; + *logical_block_size = lbs; + + if (do_close && !dev_close_immediate(dev)) + stack; + + return 1; +} + /*----------------------------------------------------------------- * Public functions *---------------------------------------------------------------*/ diff --git a/lib/device/device.h b/lib/device/device.h index 30e1e79..bb65f84 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -67,8 +67,10 @@ struct device { /* private */ int fd; int open_count; - int phys_block_size; - int block_size; + int phys_block_size; /* From either BLKPBSZGET or BLKSSZGET, don't use */ + int block_size; /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */ + int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */ + int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */ int read_ahead; int bcache_fd; uint32_t flags; @@ -132,6 +134,8 @@ void dev_size_seqno_inc(void); * All io should use these routines. */ int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size); +int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size, + unsigned int *logical_block_size); int dev_get_size(struct device *dev, uint64_t *size); int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead); int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes); diff --git a/lib/label/label.c b/lib/label/label.c index 4c21d97..72be5ec 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -1472,16 +1472,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val) void dev_set_last_byte(struct device *dev, uint64_t offset) { - unsigned int phys_block_size = 0; - unsigned int block_size = 0; + unsigned int physical_block_size = 0; + unsigned int logical_block_size = 0; + unsigned int bs; - if (!dev_get_block_size(dev, &phys_block_size, &block_size)) { + if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) { stack; - /* FIXME ASSERT or regular error testing is missing */ - return; + return; /* FIXME: error path ? */ + } + + if ((physical_block_size == 512) && (logical_block_size == 512)) + bs = 512; + else if ((physical_block_size == 4096) && (logical_block_size == 4096)) + bs = 4096; + else if ((physical_block_size == 512) || (logical_block_size == 512)) { + log_debug("Set last byte mixed block sizes physical %u logical %u using 512", + physical_block_size, logical_block_size); + bs = 512; + } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) { + log_debug("Set last byte mixed block sizes physical %u logical %u using 4096", + physical_block_size, logical_block_size); + bs = 4096; + } else { + log_debug("Set last byte mixed block sizes physical %u logical %u using 512", + physical_block_size, logical_block_size); + bs = 512; } - bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size); + bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs); } void dev_unset_last_byte(struct device *dev)