|
|
016a62 |
From e69f257e657473ba59f48692d387e292a24892bb Mon Sep 17 00:00:00 2001
|
|
|
016a62 |
From: "plai@redhat.com" <plai@redhat.com>
|
|
|
016a62 |
Date: Tue, 20 Aug 2019 16:12:50 +0100
|
|
|
016a62 |
Subject: [PATCH 03/11] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
|
|
016a62 |
|
|
|
016a62 |
RH-Author: plai@redhat.com
|
|
|
016a62 |
Message-id: <1566317571-5697-4-git-send-email-plai@redhat.com>
|
|
|
016a62 |
Patchwork-id: 90082
|
|
|
016a62 |
O-Subject: [RHEL8.2 qemu-kvm PATCH 3/4] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
|
|
016a62 |
Bugzilla: 1539282
|
|
|
016a62 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
016a62 |
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
|
|
|
016a62 |
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
|
|
|
016a62 |
|
|
|
016a62 |
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
|
|
016a62 |
|
|
|
016a62 |
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
|
|
|
016a62 |
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
|
|
|
016a62 |
|
|
|
016a62 |
However, we still need to consider the underlying huge page size
|
|
|
016a62 |
during munmap() because it requires that both address and length be a
|
|
|
016a62 |
multiple of the underlying huge page size for Huge TLB mappings.
|
|
|
016a62 |
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
|
|
|
016a62 |
section of the munmap(2) manual:
|
|
|
016a62 |
|
|
|
016a62 |
"For munmap(), addr and length must both be a multiple of the
|
|
|
016a62 |
underlying huge page size."
|
|
|
016a62 |
|
|
|
016a62 |
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
|
|
|
016a62 |
mappings because the mapped segment can be aligned with the underlying
|
|
|
016a62 |
huge page size, not aligned with the native system page size, as
|
|
|
016a62 |
returned by getpagesize().
|
|
|
016a62 |
|
|
|
016a62 |
This has the side effect of not releasing huge pages back to the pool
|
|
|
016a62 |
after a hugetlbfs file-backed memory device is hot-unplugged.
|
|
|
016a62 |
|
|
|
016a62 |
This patch fixes the situation in qemu_ram_mmap() and
|
|
|
016a62 |
qemu_ram_munmap() by considering the underlying page size on ppc64.
|
|
|
016a62 |
|
|
|
016a62 |
After this patch, memory hot-unplug releases huge pages back to the
|
|
|
016a62 |
pool.
|
|
|
016a62 |
|
|
|
016a62 |
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
|
|
|
016a62 |
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
|
|
016a62 |
Reviewed-by: Greg Kurz <groug@kaod.org>
|
|
|
016a62 |
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
|
|
016a62 |
(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647)
|
|
|
016a62 |
Signed-off-by: Paul Lai <plai@redhat.com>
|
|
|
016a62 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
016a62 |
---
|
|
|
016a62 |
exec.c | 4 ++--
|
|
|
016a62 |
include/qemu/mmap-alloc.h | 2 +-
|
|
|
016a62 |
util/mmap-alloc.c | 22 ++++++++++++++++------
|
|
|
016a62 |
util/oslib-posix.c | 2 +-
|
|
|
016a62 |
4 files changed, 20 insertions(+), 10 deletions(-)
|
|
|
016a62 |
|
|
|
016a62 |
diff --git a/exec.c b/exec.c
|
|
|
016a62 |
index a79eaa3..9112d8b 100644
|
|
|
016a62 |
--- a/exec.c
|
|
|
016a62 |
+++ b/exec.c
|
|
|
016a62 |
@@ -1679,7 +1679,7 @@ static void *file_ram_alloc(RAMBlock *block,
|
|
|
016a62 |
if (mem_prealloc) {
|
|
|
016a62 |
os_mem_prealloc(fd, area, memory, smp_cpus, errp);
|
|
|
016a62 |
if (errp && *errp) {
|
|
|
016a62 |
- qemu_ram_munmap(area, memory);
|
|
|
016a62 |
+ qemu_ram_munmap(fd, area, memory);
|
|
|
016a62 |
return NULL;
|
|
|
016a62 |
}
|
|
|
016a62 |
}
|
|
|
016a62 |
@@ -2200,7 +2200,7 @@ static void reclaim_ramblock(RAMBlock *block)
|
|
|
016a62 |
xen_invalidate_map_cache_entry(block->host);
|
|
|
016a62 |
#ifndef _WIN32
|
|
|
016a62 |
} else if (block->fd >= 0) {
|
|
|
016a62 |
- qemu_ram_munmap(block->host, block->max_length);
|
|
|
016a62 |
+ qemu_ram_munmap(block->fd, block->host, block->max_length);
|
|
|
016a62 |
close(block->fd);
|
|
|
016a62 |
#endif
|
|
|
016a62 |
} else {
|
|
|
016a62 |
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
|
|
|
016a62 |
index 190688a..eec98d8 100644
|
|
|
016a62 |
--- a/include/qemu/mmap-alloc.h
|
|
|
016a62 |
+++ b/include/qemu/mmap-alloc.h
|
|
|
016a62 |
@@ -28,6 +28,6 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
bool shared,
|
|
|
016a62 |
bool is_pmem);
|
|
|
016a62 |
|
|
|
016a62 |
-void qemu_ram_munmap(void *ptr, size_t size);
|
|
|
016a62 |
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
|
|
|
016a62 |
|
|
|
016a62 |
#endif
|
|
|
016a62 |
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
|
|
|
016a62 |
index b29fcee..bbd9077 100644
|
|
|
016a62 |
--- a/util/mmap-alloc.c
|
|
|
016a62 |
+++ b/util/mmap-alloc.c
|
|
|
016a62 |
@@ -82,6 +82,7 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
int flags;
|
|
|
016a62 |
int guardfd;
|
|
|
016a62 |
size_t offset;
|
|
|
016a62 |
+ size_t pagesize;
|
|
|
016a62 |
size_t total;
|
|
|
016a62 |
void *guardptr;
|
|
|
016a62 |
void *ptr;
|
|
|
016a62 |
@@ -102,7 +103,8 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
* anonymous memory is OK.
|
|
|
016a62 |
*/
|
|
|
016a62 |
flags = MAP_PRIVATE;
|
|
|
016a62 |
- if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
|
|
|
016a62 |
+ pagesize = qemu_fd_getpagesize(fd);
|
|
|
016a62 |
+ if (fd == -1 || pagesize == getpagesize()) {
|
|
|
016a62 |
guardfd = -1;
|
|
|
016a62 |
flags |= MAP_ANONYMOUS;
|
|
|
016a62 |
} else {
|
|
|
016a62 |
@@ -111,6 +113,7 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
}
|
|
|
016a62 |
#else
|
|
|
016a62 |
guardfd = -1;
|
|
|
016a62 |
+ pagesize = getpagesize();
|
|
|
016a62 |
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
|
|
016a62 |
#endif
|
|
|
016a62 |
|
|
|
016a62 |
@@ -122,7 +125,7 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
|
|
|
016a62 |
assert(is_power_of_2(align));
|
|
|
016a62 |
/* Always align to host page size */
|
|
|
016a62 |
- assert(align >= getpagesize());
|
|
|
016a62 |
+ assert(align >= pagesize);
|
|
|
016a62 |
|
|
|
016a62 |
flags = MAP_FIXED;
|
|
|
016a62 |
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
|
|
|
016a62 |
@@ -145,17 +148,24 @@ void *qemu_ram_mmap(int fd,
|
|
|
016a62 |
* a guard page guarding against potential buffer overflows.
|
|
|
016a62 |
*/
|
|
|
016a62 |
total -= offset;
|
|
|
016a62 |
- if (total > size + getpagesize()) {
|
|
|
016a62 |
- munmap(ptr + size + getpagesize(), total - size - getpagesize());
|
|
|
016a62 |
+ if (total > size + pagesize) {
|
|
|
016a62 |
+ munmap(ptr + size + pagesize, total - size - pagesize);
|
|
|
016a62 |
}
|
|
|
016a62 |
|
|
|
016a62 |
return ptr;
|
|
|
016a62 |
}
|
|
|
016a62 |
|
|
|
016a62 |
-void qemu_ram_munmap(void *ptr, size_t size)
|
|
|
016a62 |
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
|
|
|
016a62 |
{
|
|
|
016a62 |
+ size_t pagesize;
|
|
|
016a62 |
+
|
|
|
016a62 |
if (ptr) {
|
|
|
016a62 |
/* Unmap both the RAM block and the guard page */
|
|
|
016a62 |
- munmap(ptr, size + getpagesize());
|
|
|
016a62 |
+#if defined(__powerpc64__) && defined(__linux__)
|
|
|
016a62 |
+ pagesize = qemu_fd_getpagesize(fd);
|
|
|
016a62 |
+#else
|
|
|
016a62 |
+ pagesize = getpagesize();
|
|
|
016a62 |
+#endif
|
|
|
016a62 |
+ munmap(ptr, size + pagesize);
|
|
|
016a62 |
}
|
|
|
016a62 |
}
|
|
|
016a62 |
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
|
|
|
016a62 |
index c36b2bb..7b6db04 100644
|
|
|
016a62 |
--- a/util/oslib-posix.c
|
|
|
016a62 |
+++ b/util/oslib-posix.c
|
|
|
016a62 |
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
|
|
|
016a62 |
void qemu_anon_ram_free(void *ptr, size_t size)
|
|
|
016a62 |
{
|
|
|
016a62 |
trace_qemu_anon_ram_free(ptr, size);
|
|
|
016a62 |
- qemu_ram_munmap(ptr, size);
|
|
|
016a62 |
+ qemu_ram_munmap(-1, ptr, size);
|
|
|
016a62 |
}
|
|
|
016a62 |
|
|
|
016a62 |
void qemu_set_block(int fd)
|
|
|
016a62 |
--
|
|
|
016a62 |
1.8.3.1
|
|
|
016a62 |
|