Blame SOURCES/0032-kpartx-read-devices-with-direct-IO.patch

96a22b
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
96a22b
From: Benjamin Marzinski <bmarzins@redhat.com>
96a22b
Date: Fri, 26 Jun 2020 20:06:24 -0500
96a22b
Subject: [PATCH] kpartx: read devices with direct IO
96a22b
96a22b
If kpartx is used on top of shared storage, and a device has its
96a22b
partition table changed on one machine, and then kpartx is run on
96a22b
another, it may not see the new data, because the cache still contains
96a22b
the old data, and there is nothing to tell the machine running kpartx to
96a22b
invalidate it. To solve this, kpartx should read the devices using
96a22b
direct io.
96a22b
96a22b
One issue with how this code has been updated is that the original code
96a22b
for getblock() always read 1024 bytes. The new code reads a logical
96a22b
sector size chunk of the device, and returns a pointer to the 512 byte
96a22b
sector that the caller asked for, within that (possibly larger) chunk.
96a22b
This means that if the logical sector size is 512, then the code is now
96a22b
only reading 512 bytes.  Looking through the code for the various
96a22b
partition types, I can't see a case where more than 512 bytes is needed
96a22b
and getblock() is used.  If anyone has a reason why this code should be
96a22b
reading 1024 bytes at minmum, I can certainly change this.  But when I
96a22b
looked, I couldn't find a case where reading 512 bytes would cause a
96a22b
problem.
96a22b
96a22b
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
96a22b
---
96a22b
 kpartx/dasd.c   |  7 ++++---
96a22b
 kpartx/gpt.c    | 22 +++++++++----------
96a22b
 kpartx/kpartx.c | 56 +++++++++++++++++++++++++++++++++++++++----------
96a22b
 kpartx/kpartx.h |  2 ++
96a22b
 4 files changed, 61 insertions(+), 26 deletions(-)
96a22b
96a22b
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
96a22b
index 14b9d3aa..f0398645 100644
96a22b
--- a/kpartx/dasd.c
96a22b
+++ b/kpartx/dasd.c
96a22b
@@ -22,6 +22,7 @@
96a22b
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
96a22b
  */
96a22b
 
96a22b
+#define _GNU_SOURCE
96a22b
 #include <stdio.h>
96a22b
 #include <stdlib.h>
96a22b
 #include <unistd.h>
96a22b
@@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
96a22b
 
96a22b
 		sprintf(pathname, "/dev/.kpartx-node-%u-%u",
96a22b
 			(unsigned int)major(dev), (unsigned int)minor(dev));
96a22b
-		if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
96a22b
+		if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == -1) {
96a22b
 			/* Devicenode does not exist. Try to create one */
96a22b
 			if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
96a22b
 				/* Couldn't create a device node */
96a22b
 				return -1;
96a22b
 			}
96a22b
-			fd_dasd = open(pathname, O_RDONLY);
96a22b
+			fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
96a22b
 			/*
96a22b
 			 * The file will vanish when the last process (we)
96a22b
 			 * has ceased to access it.
96a22b
@@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
96a22b
 	 * Get volume label, extract name and type.
96a22b
 	 */
96a22b
 
96a22b
-	if (!(data = (unsigned char *)malloc(blocksize)))
96a22b
+	if (aligned_malloc((void **)&data, blocksize, NULL))
96a22b
 		goto out;
96a22b
 
96a22b
 
96a22b
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
96a22b
index 785b34ea..f7fefb70 100644
96a22b
--- a/kpartx/gpt.c
96a22b
+++ b/kpartx/gpt.c
96a22b
@@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
96a22b
 
96a22b
 	if (!count) return NULL;
96a22b
 
96a22b
-	pte = (gpt_entry *)malloc(count);
96a22b
-	if (!pte)
96a22b
+	if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
96a22b
 		return NULL;
96a22b
 	memset(pte, 0, count);
96a22b
 
96a22b
@@ -269,12 +268,11 @@ static gpt_header *
96a22b
 alloc_read_gpt_header(int fd, uint64_t lba)
96a22b
 {
96a22b
 	gpt_header *gpt;
96a22b
-	gpt = (gpt_header *)
96a22b
-	    malloc(sizeof (gpt_header));
96a22b
-	if (!gpt)
96a22b
+	size_t size = sizeof (gpt_header);
96a22b
+	if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
96a22b
 		return NULL;
96a22b
-	memset(gpt, 0, sizeof (*gpt));
96a22b
-	if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
96a22b
+	memset(gpt, 0, size);
96a22b
+	if (!read_lba(fd, lba, gpt, size)) {
96a22b
 		free(gpt);
96a22b
 		return NULL;
96a22b
 	}
96a22b
@@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
96a22b
 	gpt_header *pgpt = NULL, *agpt = NULL;
96a22b
 	gpt_entry *pptes = NULL, *aptes = NULL;
96a22b
 	legacy_mbr *legacymbr = NULL;
96a22b
+	size_t size = sizeof(legacy_mbr);
96a22b
 	uint64_t lastlba;
96a22b
 	if (!gpt || !ptes)
96a22b
 		return 0;
96a22b
@@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
96a22b
 	}
96a22b
 
96a22b
 	/* This will be added to the EFI Spec. per Intel after v1.02. */
96a22b
-	legacymbr = malloc(sizeof (*legacymbr));
96a22b
-	if (legacymbr) {
96a22b
-		memset(legacymbr, 0, sizeof (*legacymbr));
96a22b
-		read_lba(fd, 0, (uint8_t *) legacymbr,
96a22b
-			 sizeof (*legacymbr));
96a22b
+	if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
96a22b
+			   &size) == 0) {
96a22b
+		memset(legacymbr, 0, size);
96a22b
+		read_lba(fd, 0, (uint8_t *) legacymbr, size);
96a22b
 		good_pmbr = is_pmbr_valid(legacymbr);
96a22b
 		free(legacymbr);
96a22b
 		legacymbr=NULL;
96a22b
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
96a22b
index d3620c5c..c24ad6d9 100644
96a22b
--- a/kpartx/kpartx.c
96a22b
+++ b/kpartx/kpartx.c
96a22b
@@ -19,6 +19,7 @@
96a22b
  * cva, 2002-10-26
96a22b
  */
96a22b
 
96a22b
+#define _GNU_SOURCE
96a22b
 #include <stdio.h>
96a22b
 #include <fcntl.h>
96a22b
 #include <errno.h>
96a22b
@@ -41,7 +42,6 @@
96a22b
 
96a22b
 #define SIZE(a) (sizeof(a)/sizeof((a)[0]))
96a22b
 
96a22b
-#define READ_SIZE	1024
96a22b
 #define MAXTYPES	64
96a22b
 #define MAXSLICES	256
96a22b
 #define DM_TARGET	"linear"
96a22b
@@ -388,7 +388,7 @@ main(int argc, char **argv){
96a22b
 		set_delimiter(mapname, delim);
96a22b
 	}
96a22b
 
96a22b
-	fd = open(device, O_RDONLY);
96a22b
+	fd = open(device, O_RDONLY | O_DIRECT);
96a22b
 
96a22b
 	if (fd == -1) {
96a22b
 		perror(device);
96a22b
@@ -690,9 +690,9 @@ xmalloc (size_t size) {
96a22b
  */
96a22b
 
96a22b
 static int
96a22b
-sseek(int fd, unsigned int secnr) {
96a22b
+sseek(int fd, unsigned int secnr, int secsz) {
96a22b
 	off64_t in, out;
96a22b
-	in = ((off64_t) secnr << 9);
96a22b
+	in = ((off64_t) secnr * secsz);
96a22b
 	out = 1;
96a22b
 
96a22b
 	if ((out = lseek64(fd, in, SEEK_SET)) != in)
96a22b
@@ -703,6 +703,31 @@ sseek(int fd, unsigned int secnr) {
96a22b
 	return 0;
96a22b
 }
96a22b
 
96a22b
+int
96a22b
+aligned_malloc(void **mem_p, size_t align, size_t *size_p)
96a22b
+{
96a22b
+	static size_t pgsize = 0;
96a22b
+	size_t size;
96a22b
+	int err;
96a22b
+
96a22b
+	if (!mem_p || !align || (size_p && !*size_p))
96a22b
+		return EINVAL;
96a22b
+
96a22b
+	if (!pgsize)
96a22b
+		pgsize = getpagesize();
96a22b
+
96a22b
+	if (size_p)
96a22b
+		size = ((*size_p + align - 1) / align) * align;
96a22b
+	else
96a22b
+		size = pgsize;
96a22b
+
96a22b
+	err = posix_memalign(mem_p, pgsize, size);
96a22b
+	if (!err && size_p)
96a22b
+		*size_p = size;
96a22b
+	return err;
96a22b
+}
96a22b
+
96a22b
+/* always in sector size blocks */
96a22b
 static
96a22b
 struct block {
96a22b
 	unsigned int secnr;
96a22b
@@ -710,30 +735,39 @@ struct block {
96a22b
 	struct block *next;
96a22b
 } *blockhead;
96a22b
 
96a22b
+/* blknr is always in 512 byte blocks */
96a22b
 char *
96a22b
-getblock (int fd, unsigned int secnr) {
96a22b
+getblock (int fd, unsigned int blknr) {
96a22b
+	unsigned int secsz = get_sector_size(fd);
96a22b
+	unsigned int blks_per_sec = secsz / 512;
96a22b
+	unsigned int secnr = blknr / blks_per_sec;
96a22b
+	unsigned int blk_off = (blknr % blks_per_sec) * 512;
96a22b
 	struct block *bp;
96a22b
 
96a22b
 	for (bp = blockhead; bp; bp = bp->next)
96a22b
 
96a22b
 		if (bp->secnr == secnr)
96a22b
-			return bp->block;
96a22b
+			return bp->block + blk_off;
96a22b
 
96a22b
-	if (sseek(fd, secnr))
96a22b
+	if (sseek(fd, secnr, secsz))
96a22b
 		return NULL;
96a22b
 
96a22b
 	bp = xmalloc(sizeof(struct block));
96a22b
 	bp->secnr = secnr;
96a22b
 	bp->next = blockhead;
96a22b
 	blockhead = bp;
96a22b
-	bp->block = (char *) xmalloc(READ_SIZE);
96a22b
+	if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
96a22b
+		fprintf(stderr, "aligned_malloc failed\n");
96a22b
+		exit(1);
96a22b
+	}
96a22b
 
96a22b
-	if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
96a22b
+	if (read(fd, bp->block, secsz) != secsz) {
96a22b
 		fprintf(stderr, "read error, sector %d\n", secnr);
96a22b
-		bp->block = NULL;
96a22b
+		blockhead = bp->next;
96a22b
+		return NULL;
96a22b
 	}
96a22b
 
96a22b
-	return bp->block;
96a22b
+	return bp->block + blk_off;
96a22b
 }
96a22b
 
96a22b
 int
96a22b
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
96a22b
index 67edeb82..727632c1 100644
96a22b
--- a/kpartx/kpartx.h
96a22b
+++ b/kpartx/kpartx.h
96a22b
@@ -1,6 +1,7 @@
96a22b
 #ifndef _KPARTX_H
96a22b
 #define _KPARTX_H
96a22b
 
96a22b
+#include <stddef.h>
96a22b
 #include <stdint.h>
96a22b
 #include <sys/ioctl.h>
96a22b
 
96a22b
@@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
96a22b
 extern ptreader read_sun_pt;
96a22b
 extern ptreader read_ps3_pt;
96a22b
 
96a22b
+int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
96a22b
 char *getblock(int fd, unsigned int secnr);
96a22b
 
96a22b
 static inline unsigned int
96a22b
-- 
96a22b
2.17.2
96a22b