Blame SOURCES/0032-kpartx-read-devices-with-direct-IO.patch

785c99
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
785c99
From: Benjamin Marzinski <bmarzins@redhat.com>
785c99
Date: Fri, 26 Jun 2020 20:06:24 -0500
785c99
Subject: [PATCH] kpartx: read devices with direct IO
785c99
785c99
If kpartx is used on top of shared storage, and a device has its
785c99
partition table changed on one machine, and then kpartx is run on
785c99
another, it may not see the new data, because the cache still contains
785c99
the old data, and there is nothing to tell the machine running kpartx to
785c99
invalidate it. To solve this, kpartx should read the devices using
785c99
direct io.
785c99
785c99
One issue with how this code has been updated is that the original code
785c99
for getblock() always read 1024 bytes. The new code reads a logical
785c99
sector size chunk of the device, and returns a pointer to the 512 byte
785c99
sector that the caller asked for, within that (possibly larger) chunk.
785c99
This means that if the logical sector size is 512, then the code is now
785c99
only reading 512 bytes.  Looking through the code for the various
785c99
partition types, I can't see a case where more than 512 bytes is needed
785c99
and getblock() is used.  If anyone has a reason why this code should be
785c99
reading 1024 bytes at minmum, I can certainly change this.  But when I
785c99
looked, I couldn't find a case where reading 512 bytes would cause a
785c99
problem.
785c99
785c99
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
785c99
---
785c99
 kpartx/dasd.c   |  7 ++++---
785c99
 kpartx/gpt.c    | 22 +++++++++----------
785c99
 kpartx/kpartx.c | 56 +++++++++++++++++++++++++++++++++++++++----------
785c99
 kpartx/kpartx.h |  2 ++
785c99
 4 files changed, 61 insertions(+), 26 deletions(-)
785c99
785c99
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
785c99
index 14b9d3aa..f0398645 100644
785c99
--- a/kpartx/dasd.c
785c99
+++ b/kpartx/dasd.c
785c99
@@ -22,6 +22,7 @@
785c99
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
785c99
  */
785c99
 
785c99
+#define _GNU_SOURCE
785c99
 #include <stdio.h>
785c99
 #include <stdlib.h>
785c99
 #include <unistd.h>
785c99
@@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
785c99
 
785c99
 		sprintf(pathname, "/dev/.kpartx-node-%u-%u",
785c99
 			(unsigned int)major(dev), (unsigned int)minor(dev));
785c99
-		if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
785c99
+		if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == -1) {
785c99
 			/* Devicenode does not exist. Try to create one */
785c99
 			if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
785c99
 				/* Couldn't create a device node */
785c99
 				return -1;
785c99
 			}
785c99
-			fd_dasd = open(pathname, O_RDONLY);
785c99
+			fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
785c99
 			/*
785c99
 			 * The file will vanish when the last process (we)
785c99
 			 * has ceased to access it.
785c99
@@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
785c99
 	 * Get volume label, extract name and type.
785c99
 	 */
785c99
 
785c99
-	if (!(data = (unsigned char *)malloc(blocksize)))
785c99
+	if (aligned_malloc((void **)&data, blocksize, NULL))
785c99
 		goto out;
785c99
 
785c99
 
785c99
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
785c99
index 785b34ea..f7fefb70 100644
785c99
--- a/kpartx/gpt.c
785c99
+++ b/kpartx/gpt.c
785c99
@@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
785c99
 
785c99
 	if (!count) return NULL;
785c99
 
785c99
-	pte = (gpt_entry *)malloc(count);
785c99
-	if (!pte)
785c99
+	if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
785c99
 		return NULL;
785c99
 	memset(pte, 0, count);
785c99
 
785c99
@@ -269,12 +268,11 @@ static gpt_header *
785c99
 alloc_read_gpt_header(int fd, uint64_t lba)
785c99
 {
785c99
 	gpt_header *gpt;
785c99
-	gpt = (gpt_header *)
785c99
-	    malloc(sizeof (gpt_header));
785c99
-	if (!gpt)
785c99
+	size_t size = sizeof (gpt_header);
785c99
+	if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
785c99
 		return NULL;
785c99
-	memset(gpt, 0, sizeof (*gpt));
785c99
-	if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
785c99
+	memset(gpt, 0, size);
785c99
+	if (!read_lba(fd, lba, gpt, size)) {
785c99
 		free(gpt);
785c99
 		return NULL;
785c99
 	}
785c99
@@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
785c99
 	gpt_header *pgpt = NULL, *agpt = NULL;
785c99
 	gpt_entry *pptes = NULL, *aptes = NULL;
785c99
 	legacy_mbr *legacymbr = NULL;
785c99
+	size_t size = sizeof(legacy_mbr);
785c99
 	uint64_t lastlba;
785c99
 	if (!gpt || !ptes)
785c99
 		return 0;
785c99
@@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
785c99
 	}
785c99
 
785c99
 	/* This will be added to the EFI Spec. per Intel after v1.02. */
785c99
-	legacymbr = malloc(sizeof (*legacymbr));
785c99
-	if (legacymbr) {
785c99
-		memset(legacymbr, 0, sizeof (*legacymbr));
785c99
-		read_lba(fd, 0, (uint8_t *) legacymbr,
785c99
-			 sizeof (*legacymbr));
785c99
+	if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
785c99
+			   &size) == 0) {
785c99
+		memset(legacymbr, 0, size);
785c99
+		read_lba(fd, 0, (uint8_t *) legacymbr, size);
785c99
 		good_pmbr = is_pmbr_valid(legacymbr);
785c99
 		free(legacymbr);
785c99
 		legacymbr=NULL;
785c99
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
785c99
index d3620c5c..c24ad6d9 100644
785c99
--- a/kpartx/kpartx.c
785c99
+++ b/kpartx/kpartx.c
785c99
@@ -19,6 +19,7 @@
785c99
  * cva, 2002-10-26
785c99
  */
785c99
 
785c99
+#define _GNU_SOURCE
785c99
 #include <stdio.h>
785c99
 #include <fcntl.h>
785c99
 #include <errno.h>
785c99
@@ -41,7 +42,6 @@
785c99
 
785c99
 #define SIZE(a) (sizeof(a)/sizeof((a)[0]))
785c99
 
785c99
-#define READ_SIZE	1024
785c99
 #define MAXTYPES	64
785c99
 #define MAXSLICES	256
785c99
 #define DM_TARGET	"linear"
785c99
@@ -388,7 +388,7 @@ main(int argc, char **argv){
785c99
 		set_delimiter(mapname, delim);
785c99
 	}
785c99
 
785c99
-	fd = open(device, O_RDONLY);
785c99
+	fd = open(device, O_RDONLY | O_DIRECT);
785c99
 
785c99
 	if (fd == -1) {
785c99
 		perror(device);
785c99
@@ -690,9 +690,9 @@ xmalloc (size_t size) {
785c99
  */
785c99
 
785c99
 static int
785c99
-sseek(int fd, unsigned int secnr) {
785c99
+sseek(int fd, unsigned int secnr, int secsz) {
785c99
 	off64_t in, out;
785c99
-	in = ((off64_t) secnr << 9);
785c99
+	in = ((off64_t) secnr * secsz);
785c99
 	out = 1;
785c99
 
785c99
 	if ((out = lseek64(fd, in, SEEK_SET)) != in)
785c99
@@ -703,6 +703,31 @@ sseek(int fd, unsigned int secnr) {
785c99
 	return 0;
785c99
 }
785c99
 
785c99
+int
785c99
+aligned_malloc(void **mem_p, size_t align, size_t *size_p)
785c99
+{
785c99
+	static size_t pgsize = 0;
785c99
+	size_t size;
785c99
+	int err;
785c99
+
785c99
+	if (!mem_p || !align || (size_p && !*size_p))
785c99
+		return EINVAL;
785c99
+
785c99
+	if (!pgsize)
785c99
+		pgsize = getpagesize();
785c99
+
785c99
+	if (size_p)
785c99
+		size = ((*size_p + align - 1) / align) * align;
785c99
+	else
785c99
+		size = pgsize;
785c99
+
785c99
+	err = posix_memalign(mem_p, pgsize, size);
785c99
+	if (!err && size_p)
785c99
+		*size_p = size;
785c99
+	return err;
785c99
+}
785c99
+
785c99
+/* always in sector size blocks */
785c99
 static
785c99
 struct block {
785c99
 	unsigned int secnr;
785c99
@@ -710,30 +735,39 @@ struct block {
785c99
 	struct block *next;
785c99
 } *blockhead;
785c99
 
785c99
+/* blknr is always in 512 byte blocks */
785c99
 char *
785c99
-getblock (int fd, unsigned int secnr) {
785c99
+getblock (int fd, unsigned int blknr) {
785c99
+	unsigned int secsz = get_sector_size(fd);
785c99
+	unsigned int blks_per_sec = secsz / 512;
785c99
+	unsigned int secnr = blknr / blks_per_sec;
785c99
+	unsigned int blk_off = (blknr % blks_per_sec) * 512;
785c99
 	struct block *bp;
785c99
 
785c99
 	for (bp = blockhead; bp; bp = bp->next)
785c99
 
785c99
 		if (bp->secnr == secnr)
785c99
-			return bp->block;
785c99
+			return bp->block + blk_off;
785c99
 
785c99
-	if (sseek(fd, secnr))
785c99
+	if (sseek(fd, secnr, secsz))
785c99
 		return NULL;
785c99
 
785c99
 	bp = xmalloc(sizeof(struct block));
785c99
 	bp->secnr = secnr;
785c99
 	bp->next = blockhead;
785c99
 	blockhead = bp;
785c99
-	bp->block = (char *) xmalloc(READ_SIZE);
785c99
+	if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
785c99
+		fprintf(stderr, "aligned_malloc failed\n");
785c99
+		exit(1);
785c99
+	}
785c99
 
785c99
-	if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
785c99
+	if (read(fd, bp->block, secsz) != secsz) {
785c99
 		fprintf(stderr, "read error, sector %d\n", secnr);
785c99
-		bp->block = NULL;
785c99
+		blockhead = bp->next;
785c99
+		return NULL;
785c99
 	}
785c99
 
785c99
-	return bp->block;
785c99
+	return bp->block + blk_off;
785c99
 }
785c99
 
785c99
 int
785c99
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
785c99
index 67edeb82..727632c1 100644
785c99
--- a/kpartx/kpartx.h
785c99
+++ b/kpartx/kpartx.h
785c99
@@ -1,6 +1,7 @@
785c99
 #ifndef _KPARTX_H
785c99
 #define _KPARTX_H
785c99
 
785c99
+#include <stddef.h>
785c99
 #include <stdint.h>
785c99
 #include <sys/ioctl.h>
785c99
 
785c99
@@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
785c99
 extern ptreader read_sun_pt;
785c99
 extern ptreader read_ps3_pt;
785c99
 
785c99
+int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
785c99
 char *getblock(int fd, unsigned int secnr);
785c99
 
785c99
 static inline unsigned int
785c99
-- 
785c99
2.17.2
785c99