|
|
26e710 |
lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
26e710 |
lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
|
|
|
26e710 |
lib/device/device.h | 8 +++--
|
|
|
26e710 |
lib/label/label.c | 30 ++++++++++++++----
|
|
|
26e710 |
4 files changed, 169 insertions(+), 10 deletions(-)
|
|
|
26e710 |
|
|
|
26e710 |
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
|
|
|
26e710 |
index 7b09353..04fbf35 100644
|
|
|
26e710 |
--- a/lib/device/bcache.c
|
|
|
26e710 |
+++ b/lib/device/bcache.c
|
|
|
26e710 |
@@ -169,6 +169,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
sector_t offset;
|
|
|
26e710 |
sector_t nbytes;
|
|
|
26e710 |
sector_t limit_nbytes;
|
|
|
26e710 |
+ sector_t orig_nbytes;
|
|
|
26e710 |
sector_t extra_nbytes = 0;
|
|
|
26e710 |
|
|
|
26e710 |
if (((uintptr_t) data) & e->page_mask) {
|
|
|
26e710 |
@@ -191,11 +192,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
return false;
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * If the bcache block offset+len goes beyond where lvm is
|
|
|
26e710 |
+ * intending to write, then reduce the len being written
|
|
|
26e710 |
+ * (which is the bcache block size) so we don't write past
|
|
|
26e710 |
+ * the limit set by lvm. If after applying the limit, the
|
|
|
26e710 |
+ * resulting size is not a multiple of the sector size (512
|
|
|
26e710 |
+ * or 4096) then extend the reduced size to be a multiple of
|
|
|
26e710 |
+ * the sector size (we don't want to write partial sectors.)
|
|
|
26e710 |
+ */
|
|
|
26e710 |
if (offset + nbytes > _last_byte_offset) {
|
|
|
26e710 |
limit_nbytes = _last_byte_offset - offset;
|
|
|
26e710 |
- if (limit_nbytes % _last_byte_sector_size)
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if (limit_nbytes % _last_byte_sector_size) {
|
|
|
26e710 |
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
|
|
26e710 |
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
|
|
26e710 |
+ * should make the final write size a multiple of the
|
|
|
26e710 |
+ * sector size. This should never result in a final size
|
|
|
26e710 |
+ * larger than the bcache block size (as long as the bcache
|
|
|
26e710 |
+ * block size is a multiple of the sector size).
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
|
|
26e710 |
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
|
|
26e710 |
+ (unsigned long long)offset,
|
|
|
26e710 |
+ (unsigned long long)nbytes,
|
|
|
26e710 |
+ (unsigned long long)limit_nbytes,
|
|
|
26e710 |
+ (unsigned long long)extra_nbytes,
|
|
|
26e710 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
26e710 |
+ extra_nbytes = 0;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ orig_nbytes = nbytes;
|
|
|
26e710 |
+
|
|
|
26e710 |
if (extra_nbytes) {
|
|
|
26e710 |
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
|
|
26e710 |
(unsigned long long)offset,
|
|
|
26e710 |
@@ -210,6 +241,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
(unsigned long long)limit_nbytes);
|
|
|
26e710 |
nbytes = limit_nbytes;
|
|
|
26e710 |
}
|
|
|
26e710 |
+
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * This shouldn't happen, the reduced+extended
|
|
|
26e710 |
+ * nbytes value should never be larger than the
|
|
|
26e710 |
+ * bcache block size.
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (nbytes > orig_nbytes) {
|
|
|
26e710 |
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
|
|
26e710 |
+ (unsigned long long)offset,
|
|
|
26e710 |
+ (unsigned long long)orig_nbytes,
|
|
|
26e710 |
+ (unsigned long long)nbytes,
|
|
|
26e710 |
+ (unsigned long long)limit_nbytes,
|
|
|
26e710 |
+ (unsigned long long)extra_nbytes,
|
|
|
26e710 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
26e710 |
+ return false;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
}
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
@@ -403,6 +450,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
uint64_t nbytes = len;
|
|
|
26e710 |
sector_t limit_nbytes = 0;
|
|
|
26e710 |
sector_t extra_nbytes = 0;
|
|
|
26e710 |
+ sector_t orig_nbytes = 0;
|
|
|
26e710 |
|
|
|
26e710 |
if (offset > _last_byte_offset) {
|
|
|
26e710 |
log_error("Limit write at %llu len %llu beyond last byte %llu",
|
|
|
26e710 |
@@ -415,9 +463,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
|
|
|
26e710 |
if (offset + nbytes > _last_byte_offset) {
|
|
|
26e710 |
limit_nbytes = _last_byte_offset - offset;
|
|
|
26e710 |
- if (limit_nbytes % _last_byte_sector_size)
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if (limit_nbytes % _last_byte_sector_size) {
|
|
|
26e710 |
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
|
|
26e710 |
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
|
|
26e710 |
+ * should make the final write size a multiple of the
|
|
|
26e710 |
+ * sector size. This should never result in a final size
|
|
|
26e710 |
+ * larger than the bcache block size (as long as the bcache
|
|
|
26e710 |
+ * block size is a multiple of the sector size).
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
|
|
26e710 |
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
|
|
26e710 |
+ (unsigned long long)offset,
|
|
|
26e710 |
+ (unsigned long long)nbytes,
|
|
|
26e710 |
+ (unsigned long long)limit_nbytes,
|
|
|
26e710 |
+ (unsigned long long)extra_nbytes,
|
|
|
26e710 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
26e710 |
+ extra_nbytes = 0;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ orig_nbytes = nbytes;
|
|
|
26e710 |
+
|
|
|
26e710 |
if (extra_nbytes) {
|
|
|
26e710 |
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
|
|
26e710 |
(unsigned long long)offset,
|
|
|
26e710 |
@@ -432,6 +501,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
|
|
26e710 |
(unsigned long long)limit_nbytes);
|
|
|
26e710 |
nbytes = limit_nbytes;
|
|
|
26e710 |
}
|
|
|
26e710 |
+
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * This shouldn't happen, the reduced+extended
|
|
|
26e710 |
+ * nbytes value should never be larger than the
|
|
|
26e710 |
+ * bcache block size.
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (nbytes > orig_nbytes) {
|
|
|
26e710 |
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
|
|
26e710 |
+ (unsigned long long)offset,
|
|
|
26e710 |
+ (unsigned long long)orig_nbytes,
|
|
|
26e710 |
+ (unsigned long long)nbytes,
|
|
|
26e710 |
+ (unsigned long long)limit_nbytes,
|
|
|
26e710 |
+ (unsigned long long)extra_nbytes,
|
|
|
26e710 |
+ (unsigned long long)_last_byte_sector_size);
|
|
|
26e710 |
+ return false;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
where = offset;
|
|
|
26e710 |
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
|
|
|
26e710 |
index 3fe2647..5fa0b7a 100644
|
|
|
26e710 |
--- a/lib/device/dev-io.c
|
|
|
26e710 |
+++ b/lib/device/dev-io.c
|
|
|
26e710 |
@@ -250,6 +250,58 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
|
|
|
26e710 |
return 1;
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
|
|
26e710 |
+ unsigned int *logical_block_size)
|
|
|
26e710 |
+{
|
|
|
26e710 |
+ int fd = dev->bcache_fd;
|
|
|
26e710 |
+ int do_close = 0;
|
|
|
26e710 |
+ unsigned int pbs = 0;
|
|
|
26e710 |
+ unsigned int lbs = 0;
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if (dev->physical_block_size || dev->logical_block_size) {
|
|
|
26e710 |
+ *physical_block_size = dev->physical_block_size;
|
|
|
26e710 |
+ *logical_block_size = dev->logical_block_size;
|
|
|
26e710 |
+ return 1;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if (fd <= 0) {
|
|
|
26e710 |
+ if (!dev_open_readonly(dev))
|
|
|
26e710 |
+ return 0;
|
|
|
26e710 |
+ fd = dev_fd(dev);
|
|
|
26e710 |
+ do_close = 1;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
|
|
|
26e710 |
+ * "the lowest possible sector size that the hardware can operate on
|
|
|
26e710 |
+ * without reverting to read-modify-write operations"
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (ioctl(fd, BLKPBSZGET, &pbs)) {
|
|
|
26e710 |
+ stack;
|
|
|
26e710 |
+ pbs = 0;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ /*
|
|
|
26e710 |
+ * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
|
|
|
26e710 |
+ * "the lowest possible block size that the storage device can address."
|
|
|
26e710 |
+ */
|
|
|
26e710 |
+ if (ioctl(fd, BLKSSZGET, &lbs)) {
|
|
|
26e710 |
+ stack;
|
|
|
26e710 |
+ lbs = 0;
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ dev->physical_block_size = pbs;
|
|
|
26e710 |
+ dev->logical_block_size = lbs;
|
|
|
26e710 |
+
|
|
|
26e710 |
+ *physical_block_size = pbs;
|
|
|
26e710 |
+ *logical_block_size = lbs;
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if (do_close && !dev_close_immediate(dev))
|
|
|
26e710 |
+ stack;
|
|
|
26e710 |
+
|
|
|
26e710 |
+ return 1;
|
|
|
26e710 |
+}
|
|
|
26e710 |
+
|
|
|
26e710 |
/*-----------------------------------------------------------------
|
|
|
26e710 |
* Public functions
|
|
|
26e710 |
*---------------------------------------------------------------*/
|
|
|
26e710 |
diff --git a/lib/device/device.h b/lib/device/device.h
|
|
|
26e710 |
index 30e1e79..bb65f84 100644
|
|
|
26e710 |
--- a/lib/device/device.h
|
|
|
26e710 |
+++ b/lib/device/device.h
|
|
|
26e710 |
@@ -67,8 +67,10 @@ struct device {
|
|
|
26e710 |
/* private */
|
|
|
26e710 |
int fd;
|
|
|
26e710 |
int open_count;
|
|
|
26e710 |
- int phys_block_size;
|
|
|
26e710 |
- int block_size;
|
|
|
26e710 |
+ int phys_block_size; /* From either BLKPBSZGET or BLKSSZGET, don't use */
|
|
|
26e710 |
+ int block_size; /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
|
|
|
26e710 |
+ int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
|
|
|
26e710 |
+ int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */
|
|
|
26e710 |
int read_ahead;
|
|
|
26e710 |
int bcache_fd;
|
|
|
26e710 |
uint32_t flags;
|
|
|
26e710 |
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
|
|
|
26e710 |
* All io should use these routines.
|
|
|
26e710 |
*/
|
|
|
26e710 |
int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
|
|
|
26e710 |
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
|
|
26e710 |
+ unsigned int *logical_block_size);
|
|
|
26e710 |
int dev_get_size(struct device *dev, uint64_t *size);
|
|
|
26e710 |
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
|
|
|
26e710 |
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
|
|
|
26e710 |
diff --git a/lib/label/label.c b/lib/label/label.c
|
|
|
26e710 |
index 4c21d97..72be5ec 100644
|
|
|
26e710 |
--- a/lib/label/label.c
|
|
|
26e710 |
+++ b/lib/label/label.c
|
|
|
26e710 |
@@ -1472,16 +1472,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
|
|
|
26e710 |
|
|
|
26e710 |
void dev_set_last_byte(struct device *dev, uint64_t offset)
|
|
|
26e710 |
{
|
|
|
26e710 |
- unsigned int phys_block_size = 0;
|
|
|
26e710 |
- unsigned int block_size = 0;
|
|
|
26e710 |
+ unsigned int physical_block_size = 0;
|
|
|
26e710 |
+ unsigned int logical_block_size = 0;
|
|
|
26e710 |
+ unsigned int bs;
|
|
|
26e710 |
|
|
|
26e710 |
- if (!dev_get_block_size(dev, &phys_block_size, &block_size)) {
|
|
|
26e710 |
+ if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
|
|
|
26e710 |
stack;
|
|
|
26e710 |
- /* FIXME ASSERT or regular error testing is missing */
|
|
|
26e710 |
- return;
|
|
|
26e710 |
+ return; /* FIXME: error path ? */
|
|
|
26e710 |
+ }
|
|
|
26e710 |
+
|
|
|
26e710 |
+ if ((physical_block_size == 512) && (logical_block_size == 512))
|
|
|
26e710 |
+ bs = 512;
|
|
|
26e710 |
+ else if ((physical_block_size == 4096) && (logical_block_size == 4096))
|
|
|
26e710 |
+ bs = 4096;
|
|
|
26e710 |
+ else if ((physical_block_size == 512) || (logical_block_size == 512)) {
|
|
|
26e710 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
|
|
26e710 |
+ physical_block_size, logical_block_size);
|
|
|
26e710 |
+ bs = 512;
|
|
|
26e710 |
+ } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
|
|
|
26e710 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
|
|
|
26e710 |
+ physical_block_size, logical_block_size);
|
|
|
26e710 |
+ bs = 4096;
|
|
|
26e710 |
+ } else {
|
|
|
26e710 |
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
|
|
26e710 |
+ physical_block_size, logical_block_size);
|
|
|
26e710 |
+ bs = 512;
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
- bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
|
|
|
26e710 |
+ bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
|
|
|
26e710 |
}
|
|
|
26e710 |
|
|
|
26e710 |
void dev_unset_last_byte(struct device *dev)
|