Blame SOURCES/lvm2-2_02_182-metadata-prevent-writing-beyond-metadata-area.patch

d8b906
From 87bd4a350468023b14251b9bde98f88ffb419268 Mon Sep 17 00:00:00 2001
d8b906
From: David Teigland <teigland@redhat.com>
d8b906
Date: Mon, 29 Oct 2018 11:06:00 -0500
d8b906
Subject: [PATCH 2/2] metadata: prevent writing beyond metadata area
d8b906
d8b906
lvm uses a bcache block size of 128K.  A bcache block
d8b906
at the end of the metadata area will overlap the PEs
d8b906
from which LVs are allocated.  How much depends on
d8b906
alignments.  When lvm reads and writes one of these
d8b906
bcache blocks to update VG metadata, it can also be
d8b906
reading and writing PEs that belong to an LV.
d8b906
d8b906
If these overlapping PEs are being written to by the
d8b906
LV user (e.g. filesystem) at the same time that lvm
d8b906
is modifying VG metadata in the overlapping bcache
d8b906
block, then the user's updates to the PEs can be lost.
d8b906
d8b906
This patch is a quick hack to prevent lvm from writing
d8b906
past the end of the metadata area.
d8b906
d8b906
(cherry picked from commit ab27d5dc2a5c3bf23ab8fed438f1542015dc723d)
d8b906
---
d8b906
 lib/device/bcache.c           | 79 +++++++++++++++++++++++++++++++++++++++++--
d8b906
 lib/device/bcache.h           |  3 ++
d8b906
 lib/format_text/format-text.c | 10 ++++++
d8b906
 lib/label/label.c             | 35 ++++++++++++++++++-
d8b906
 lib/label/label.h             |  2 ++
d8b906
 lib/metadata/mirror.c         |  4 +++
d8b906
 6 files changed, 130 insertions(+), 3 deletions(-)
d8b906
d8b906
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
d8b906
index b1f7d2a..5ac2558 100644
d8b906
--- a/lib/device/bcache.c
d8b906
+++ b/lib/device/bcache.c
d8b906
@@ -156,6 +156,10 @@ static void _async_destroy(struct io_engine *ioe)
d8b906
 	dm_free(e);
d8b906
 }
d8b906
 
d8b906
+static int _last_byte_fd;
d8b906
+static uint64_t _last_byte_offset;
d8b906
+static int _last_byte_sector_size;
d8b906
+
d8b906
 static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
d8b906
 			 sector_t sb, sector_t se, void *data, void *context)
d8b906
 {
d8b906
@@ -163,12 +167,53 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
d8b906
 	struct iocb *cb_array[1];
d8b906
 	struct control_block *cb;
d8b906
 	struct async_engine *e = _to_async(ioe);
d8b906
+	sector_t offset;
d8b906
+	sector_t nbytes;
d8b906
+	sector_t limit_nbytes;
d8b906
+	sector_t extra_nbytes = 0;
d8b906
 
d8b906
 	if (((uintptr_t) data) & e->page_mask) {
d8b906
 		log_warn("misaligned data buffer");
d8b906
 		return false;
d8b906
 	}
d8b906
 
d8b906
+	offset = sb << SECTOR_SHIFT;
d8b906
+	nbytes = (se - sb) << SECTOR_SHIFT;
d8b906
+
d8b906
+	/*
d8b906
+	 * If bcache block goes past where lvm wants to write, then clamp it.
d8b906
+	 */
d8b906
+	if ((d == DIR_WRITE) && _last_byte_offset && (fd == _last_byte_fd)) {
d8b906
+		if (offset > _last_byte_offset) {
d8b906
+			log_error("Limit write at %llu len %llu beyond last byte %llu",
d8b906
+				  (unsigned long long)offset,
d8b906
+				  (unsigned long long)nbytes,
d8b906
+				  (unsigned long long)_last_byte_offset);
d8b906
+			return false;
d8b906
+		}
d8b906
+
d8b906
+		if (offset + nbytes > _last_byte_offset) {
d8b906
+			limit_nbytes = _last_byte_offset - offset;
d8b906
+			if (limit_nbytes % _last_byte_sector_size)
d8b906
+				extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
d8b906
+
d8b906
+			if (extra_nbytes) {
d8b906
+				log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
d8b906
+					  (unsigned long long)offset,
d8b906
+					  (unsigned long long)nbytes,
d8b906
+					  (unsigned long long)limit_nbytes,
d8b906
+					  (unsigned long long)(limit_nbytes + extra_nbytes));
d8b906
+				nbytes = limit_nbytes + extra_nbytes;
d8b906
+			} else {
d8b906
+				log_debug("Limit write at %llu len %llu to len %llu",
d8b906
+					  (unsigned long long)offset,
d8b906
+					  (unsigned long long)nbytes,
d8b906
+					  (unsigned long long)limit_nbytes);
d8b906
+				nbytes = limit_nbytes;
d8b906
+			}
d8b906
+		}
d8b906
+	}
d8b906
+
d8b906
 	cb = _cb_alloc(e->cbs, context);
d8b906
 	if (!cb) {
d8b906
 		log_warn("couldn't allocate control block");
d8b906
@@ -179,10 +224,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
d8b906
 
d8b906
 	cb->cb.aio_fildes = (int) fd;
d8b906
 	cb->cb.u.c.buf = data;
d8b906
-	cb->cb.u.c.offset = sb << SECTOR_SHIFT;
d8b906
-	cb->cb.u.c.nbytes = (se - sb) << SECTOR_SHIFT;
d8b906
+	cb->cb.u.c.offset = offset;
d8b906
+	cb->cb.u.c.nbytes = nbytes;
d8b906
 	cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE;
d8b906
 
d8b906
+#if 0
d8b906
+	if (d == DIR_READ) {
d8b906
+		log_debug("io R off %llu bytes %llu",
d8b906
+			  (unsigned long long)cb->cb.u.c.offset,
d8b906
+			  (unsigned long long)cb->cb.u.c.nbytes);
d8b906
+	} else {
d8b906
+		log_debug("io W off %llu bytes %llu",
d8b906
+			  (unsigned long long)cb->cb.u.c.offset,
d8b906
+			  (unsigned long long)cb->cb.u.c.nbytes);
d8b906
+	}
d8b906
+#endif
d8b906
+
d8b906
 	cb_array[0] = &cb->cb;
d8b906
 	do {
d8b906
 		r = io_submit(e->aio_context, 1, cb_array);
d8b906
@@ -1145,3 +1202,21 @@ bool bcache_invalidate_fd(struct bcache *cache, int fd)
d8b906
 
d8b906
 //----------------------------------------------------------------
d8b906
 
d8b906
+void bcache_set_last_byte(struct bcache *cache, int fd, uint64_t offset, int sector_size)
d8b906
+{
d8b906
+	_last_byte_fd = fd;
d8b906
+	_last_byte_offset = offset;
d8b906
+	_last_byte_sector_size = sector_size;
d8b906
+	if (!sector_size)
d8b906
+		_last_byte_sector_size = 512;
d8b906
+}
d8b906
+
d8b906
+void bcache_unset_last_byte(struct bcache *cache, int fd)
d8b906
+{
d8b906
+	if (_last_byte_fd == fd) {
d8b906
+		_last_byte_fd = 0;
d8b906
+		_last_byte_offset = 0;
d8b906
+		_last_byte_sector_size = 0;
d8b906
+	}
d8b906
+}
d8b906
+
d8b906
diff --git a/lib/device/bcache.h b/lib/device/bcache.h
d8b906
index b0aebb4..cb902ef 100644
d8b906
--- a/lib/device/bcache.h
d8b906
+++ b/lib/device/bcache.h
d8b906
@@ -158,6 +158,9 @@ bool bcache_write_bytes(struct bcache *cache, int fd, uint64_t start, size_t len
d8b906
 bool bcache_zero_bytes(struct bcache *cache, int fd, uint64_t start, size_t len);
d8b906
 bool bcache_set_bytes(struct bcache *cache, int fd, uint64_t start, size_t len, uint8_t val);
d8b906
 
d8b906
+void bcache_set_last_byte(struct bcache *cache, int fd, uint64_t offset, int sector_size);
d8b906
+void bcache_unset_last_byte(struct bcache *cache, int fd);
d8b906
+
d8b906
 //----------------------------------------------------------------
d8b906
 
d8b906
 #endif
d8b906
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
d8b906
index 792d75a..36afba1 100644
d8b906
--- a/lib/format_text/format-text.c
d8b906
+++ b/lib/format_text/format-text.c
d8b906
@@ -400,10 +400,14 @@ static int _raw_write_mda_header(const struct format_type *fmt,
d8b906
 					     MDA_HEADER_SIZE -
d8b906
 					     sizeof(mdah->checksum_xl)));
d8b906
 
d8b906
+	dev_set_last_byte(dev, start_byte + MDA_HEADER_SIZE);
d8b906
+
d8b906
 	if (!dev_write_bytes(dev, start_byte, MDA_HEADER_SIZE, mdah)) {
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		log_error("Failed to write mda header to %s fd %d", dev_name(dev), dev->bcache_fd);
d8b906
 		return 0;
d8b906
 	}
d8b906
+	dev_unset_last_byte(dev);
d8b906
 
d8b906
 	return 1;
d8b906
 }
d8b906
@@ -677,10 +681,13 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
d8b906
 			    (unsigned long long)(mdac->rlocn.size - new_wrap),
d8b906
 			    (unsigned long long)new_wrap);
d8b906
 
d8b906
+	dev_set_last_byte(mdac->area.dev, mdac->area.start + mdah->size);
d8b906
+
d8b906
 	if (!dev_write_bytes(mdac->area.dev, mdac->area.start + mdac->rlocn.offset,
d8b906
 		                (size_t) (mdac->rlocn.size - new_wrap),
d8b906
 		                fidtc->raw_metadata_buf)) {
d8b906
 		log_error("Failed to write metadata to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd);
d8b906
+		dev_unset_last_byte(mdac->area.dev);
d8b906
 		goto out;
d8b906
 	}
d8b906
 
d8b906
@@ -694,10 +701,13 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
d8b906
 			                (size_t) new_wrap,
d8b906
 			                fidtc->raw_metadata_buf + mdac->rlocn.size - new_wrap)) {
d8b906
 			log_error("Failed to write metadata wrap to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd);
d8b906
+			dev_unset_last_byte(mdac->area.dev);
d8b906
 			goto out;
d8b906
 		}
d8b906
 	}
d8b906
 
d8b906
+	dev_unset_last_byte(mdac->area.dev);
d8b906
+
d8b906
 	mdac->rlocn.checksum = calc_crc(INITIAL_CRC, (uint8_t *)fidtc->raw_metadata_buf,
d8b906
 					(uint32_t) (mdac->rlocn.size -
d8b906
 						    new_wrap));
d8b906
diff --git a/lib/label/label.c b/lib/label/label.c
d8b906
index bafa543..d2cfe62 100644
d8b906
--- a/lib/label/label.c
d8b906
+++ b/lib/label/label.c
d8b906
@@ -172,6 +172,7 @@ int label_write(struct device *dev, struct label *label)
d8b906
 {
d8b906
 	char buf[LABEL_SIZE] __attribute__((aligned(8)));
d8b906
 	struct label_header *lh = (struct label_header *) buf;
d8b906
+	uint64_t offset;
d8b906
 	int r = 1;
d8b906
 
d8b906
 	if (!label->labeller->ops->write) {
d8b906
@@ -206,11 +207,17 @@ int label_write(struct device *dev, struct label *label)
d8b906
 		return 0;
d8b906
 	}
d8b906
 
d8b906
-	if (!dev_write_bytes(dev, label->sector << SECTOR_SHIFT, LABEL_SIZE, buf)) {
d8b906
+	offset = label->sector << SECTOR_SHIFT;
d8b906
+
d8b906
+	dev_set_last_byte(dev, offset + LABEL_SIZE);
d8b906
+
d8b906
+	if (!dev_write_bytes(dev, offset, LABEL_SIZE, buf)) {
d8b906
 		log_debug_devs("Failed to write label to %s", dev_name(dev));
d8b906
 		r = 0;
d8b906
 	}
d8b906
 
d8b906
+	dev_unset_last_byte(dev);
d8b906
+
d8b906
 	return r;
d8b906
 }
d8b906
 
d8b906
@@ -1256,9 +1263,12 @@ bool dev_write_zeros(struct device *dev, uint64_t start, size_t len)
d8b906
 		}
d8b906
 	}
d8b906
 
d8b906
+	dev_set_last_byte(dev, start + len);
d8b906
+
d8b906
 	if (!bcache_zero_bytes(scan_bcache, dev->bcache_fd, start, len)) {
d8b906
 		log_error("Error writing device %s at %llu length %u.",
d8b906
 			  dev_name(dev), (unsigned long long)start, (uint32_t)len);
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		label_scan_invalidate(dev);
d8b906
 		return false;
d8b906
 	}
d8b906
@@ -1266,9 +1276,11 @@ bool dev_write_zeros(struct device *dev, uint64_t start, size_t len)
d8b906
 	if (!bcache_flush(scan_bcache)) {
d8b906
 		log_error("Error writing device %s at %llu length %u.",
d8b906
 			  dev_name(dev), (unsigned long long)start, (uint32_t)len);
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		label_scan_invalidate(dev);
d8b906
 		return false;
d8b906
 	}
d8b906
+	dev_unset_last_byte(dev);
d8b906
 	return true;
d8b906
 }
d8b906
 
d8b906
@@ -1302,9 +1314,12 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
d8b906
 		}
d8b906
 	}
d8b906
 
d8b906
+	dev_set_last_byte(dev, start + len);
d8b906
+
d8b906
 	if (!bcache_set_bytes(scan_bcache, dev->bcache_fd, start, len, val)) {
d8b906
 		log_error("Error writing device %s at %llu length %u.",
d8b906
 			  dev_name(dev), (unsigned long long)start, (uint32_t)len);
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		label_scan_invalidate(dev);
d8b906
 		return false;
d8b906
 	}
d8b906
@@ -1312,9 +1327,27 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
d8b906
 	if (!bcache_flush(scan_bcache)) {
d8b906
 		log_error("Error writing device %s at %llu length %u.",
d8b906
 			  dev_name(dev), (unsigned long long)start, (uint32_t)len);
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		label_scan_invalidate(dev);
d8b906
 		return false;
d8b906
 	}
d8b906
+
d8b906
+	dev_unset_last_byte(dev);
d8b906
 	return true;
d8b906
 }
d8b906
 
d8b906
+void dev_set_last_byte(struct device *dev, uint64_t offset)
d8b906
+{
d8b906
+	unsigned int phys_block_size = 0;
d8b906
+	unsigned int block_size = 0;
d8b906
+
d8b906
+	dev_get_block_size(dev, &phys_block_size, &block_size);
d8b906
+
d8b906
+	bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
d8b906
+}
d8b906
+
d8b906
+void dev_unset_last_byte(struct device *dev)
d8b906
+{
d8b906
+	bcache_unset_last_byte(scan_bcache, dev->bcache_fd);
d8b906
+}
d8b906
+
d8b906
diff --git a/lib/label/label.h b/lib/label/label.h
d8b906
index 5ed8bc8..e2b8263 100644
d8b906
--- a/lib/label/label.h
d8b906
+++ b/lib/label/label.h
d8b906
@@ -125,5 +125,7 @@ bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data);
d8b906
 bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data);
d8b906
 bool dev_write_zeros(struct device *dev, uint64_t start, size_t len);
d8b906
 bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val);
d8b906
+void dev_set_last_byte(struct device *dev, uint64_t offset);
d8b906
+void dev_unset_last_byte(struct device *dev);
d8b906
 
d8b906
 #endif
d8b906
diff --git a/lib/metadata/mirror.c b/lib/metadata/mirror.c
d8b906
index c7d8a9e..b1dcaa0 100644
d8b906
--- a/lib/metadata/mirror.c
d8b906
+++ b/lib/metadata/mirror.c
d8b906
@@ -302,10 +302,14 @@ static int _write_log_header(struct cmd_context *cmd, struct logical_volume *lv)
d8b906
 		return 0;
d8b906
 	}
d8b906
 
d8b906
+	dev_set_last_byte(dev, sizeof(log_header));
d8b906
+
d8b906
 	if (!dev_write_bytes(dev, UINT64_C(0), sizeof(log_header), &log_header)) {
d8b906
+		dev_unset_last_byte(dev);
d8b906
 		log_error("Failed to write log header to %s.", name);
d8b906
 		return 0;
d8b906
 	}
d8b906
+	dev_unset_last_byte(dev);
d8b906
 
d8b906
 	label_scan_invalidate(dev);
d8b906
 
d8b906
-- 
d8b906
1.8.3.1
d8b906