|
|
7711c0 |
From 6eda983528f3d74ecd4bdecf6aae127e9c93ed48 Mon Sep 17 00:00:00 2001
|
|
|
7711c0 |
From: Sam Bobroff <sbobroff@redhat.com>
|
|
|
7711c0 |
Date: Tue, 16 Apr 2019 05:29:10 +0200
|
|
|
7711c0 |
Subject: [PATCH 163/163] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
|
|
7711c0 |
|
|
|
7711c0 |
RH-Author: Sam Bobroff <sbobroff@redhat.com>
|
|
|
7711c0 |
Message-id: <1555392550-21945-3-git-send-email-sbobroff@redhat.com>
|
|
|
7711c0 |
Patchwork-id: 85702
|
|
|
7711c0 |
O-Subject: [RHEL-7.7 qemu-kvm-rhev BZ1672819 PATCH 2/2 REPOST] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
|
|
7711c0 |
Bugzilla: 1672819
|
|
|
7711c0 |
RH-Acked-by: David Gibson <dgibson@redhat.com>
|
|
|
7711c0 |
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
|
7711c0 |
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
|
|
|
7711c0 |
|
|
|
7711c0 |
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
|
|
7711c0 |
|
|
|
7711c0 |
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
|
|
|
7711c0 |
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
|
|
|
7711c0 |
|
|
|
7711c0 |
However, we still need to consider the underlying huge page size
|
|
|
7711c0 |
during munmap() because it requires that both address and length be a
|
|
|
7711c0 |
multiple of the underlying huge page size for Huge TLB mappings.
|
|
|
7711c0 |
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
|
|
|
7711c0 |
section of the munmap(2) manual:
|
|
|
7711c0 |
|
|
|
7711c0 |
"For munmap(), addr and length must both be a multiple of the
|
|
|
7711c0 |
underlying huge page size."
|
|
|
7711c0 |
|
|
|
7711c0 |
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
|
|
|
7711c0 |
mappings because the mapped segment can be aligned with the underlying
|
|
|
7711c0 |
huge page size, not aligned with the native system page size, as
|
|
|
7711c0 |
returned by getpagesize().
|
|
|
7711c0 |
|
|
|
7711c0 |
This has the side effect of not releasing huge pages back to the pool
|
|
|
7711c0 |
after a hugetlbfs file-backed memory device is hot-unplugged.
|
|
|
7711c0 |
|
|
|
7711c0 |
This patch fixes the situation in qemu_ram_mmap() and
|
|
|
7711c0 |
qemu_ram_munmap() by considering the underlying page size on ppc64.
|
|
|
7711c0 |
|
|
|
7711c0 |
After this patch, memory hot-unplug releases huge pages back to the
|
|
|
7711c0 |
pool.
|
|
|
7711c0 |
|
|
|
7711c0 |
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
|
|
|
7711c0 |
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
|
|
7711c0 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
7711c0 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
7711c0 |
Reviewed-by: Greg Kurz <groug@kaod.org>
|
|
|
7711c0 |
(cherry picked from commit 7265c2b9716369b339d778b9ef64a8161eb8f99b)
|
|
|
7711c0 |
|
|
|
7711c0 |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1672819
|
|
|
7711c0 |
Testing: Check that hugepage backed RAM removed from a guest is free'd
|
|
|
7711c0 |
on the host.
|
|
|
7711c0 |
Signed-off-by: Sam Bobroff <sbobroff@redhat.com>
|
|
|
7711c0 |
Upstream: Patch is in dgibson/ppc-for-4.0
|
|
|
7711c0 |
|
|
|
7711c0 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
7711c0 |
---
|
|
|
7711c0 |
exec.c | 4 ++--
|
|
|
7711c0 |
include/qemu/mmap-alloc.h | 2 +-
|
|
|
7711c0 |
util/mmap-alloc.c | 22 ++++++++++++++++------
|
|
|
7711c0 |
util/oslib-posix.c | 2 +-
|
|
|
7711c0 |
4 files changed, 20 insertions(+), 10 deletions(-)
|
|
|
7711c0 |
|
|
|
7711c0 |
diff --git a/exec.c b/exec.c
|
|
|
7711c0 |
index d87a51a..82e85ff 100644
|
|
|
7711c0 |
--- a/exec.c
|
|
|
7711c0 |
+++ b/exec.c
|
|
|
7711c0 |
@@ -1714,7 +1714,7 @@ static void *file_ram_alloc(RAMBlock *block,
|
|
|
7711c0 |
if (mem_prealloc) {
|
|
|
7711c0 |
os_mem_prealloc(fd, area, memory, smp_cpus, errp);
|
|
|
7711c0 |
if (errp && *errp) {
|
|
|
7711c0 |
- qemu_ram_munmap(area, memory);
|
|
|
7711c0 |
+ qemu_ram_munmap(fd, area, memory);
|
|
|
7711c0 |
return NULL;
|
|
|
7711c0 |
}
|
|
|
7711c0 |
}
|
|
|
7711c0 |
@@ -2235,7 +2235,7 @@ static void reclaim_ramblock(RAMBlock *block)
|
|
|
7711c0 |
xen_invalidate_map_cache_entry(block->host);
|
|
|
7711c0 |
#ifndef _WIN32
|
|
|
7711c0 |
} else if (block->fd >= 0) {
|
|
|
7711c0 |
- qemu_ram_munmap(block->host, block->max_length);
|
|
|
7711c0 |
+ qemu_ram_munmap(block->fd, block->host, block->max_length);
|
|
|
7711c0 |
close(block->fd);
|
|
|
7711c0 |
#endif
|
|
|
7711c0 |
} else {
|
|
|
7711c0 |
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
|
|
|
7711c0 |
index 50385e3..ef04f0e 100644
|
|
|
7711c0 |
--- a/include/qemu/mmap-alloc.h
|
|
|
7711c0 |
+++ b/include/qemu/mmap-alloc.h
|
|
|
7711c0 |
@@ -9,6 +9,6 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
|
|
|
7711c0 |
|
|
|
7711c0 |
void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
|
|
|
7711c0 |
|
|
|
7711c0 |
-void qemu_ram_munmap(void *ptr, size_t size);
|
|
|
7711c0 |
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
|
|
|
7711c0 |
|
|
|
7711c0 |
#endif
|
|
|
7711c0 |
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
|
|
|
7711c0 |
index 94ee517..19607c1 100644
|
|
|
7711c0 |
--- a/util/mmap-alloc.c
|
|
|
7711c0 |
+++ b/util/mmap-alloc.c
|
|
|
7711c0 |
@@ -78,6 +78,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
|
|
|
7711c0 |
int flags;
|
|
|
7711c0 |
int guardfd;
|
|
|
7711c0 |
size_t offset;
|
|
|
7711c0 |
+ size_t pagesize;
|
|
|
7711c0 |
size_t total;
|
|
|
7711c0 |
void *guardptr;
|
|
|
7711c0 |
void *ptr;
|
|
|
7711c0 |
@@ -98,7 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
|
|
|
7711c0 |
* anonymous memory is OK.
|
|
|
7711c0 |
*/
|
|
|
7711c0 |
flags = MAP_PRIVATE;
|
|
|
7711c0 |
- if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
|
|
|
7711c0 |
+ pagesize = qemu_fd_getpagesize(fd);
|
|
|
7711c0 |
+ if (fd == -1 || pagesize == getpagesize()) {
|
|
|
7711c0 |
guardfd = -1;
|
|
|
7711c0 |
flags |= MAP_ANONYMOUS;
|
|
|
7711c0 |
} else {
|
|
|
7711c0 |
@@ -107,6 +109,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
|
|
|
7711c0 |
}
|
|
|
7711c0 |
#else
|
|
|
7711c0 |
guardfd = -1;
|
|
|
7711c0 |
+ pagesize = getpagesize();
|
|
|
7711c0 |
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
|
|
7711c0 |
#endif
|
|
|
7711c0 |
|
|
|
7711c0 |
@@ -118,7 +121,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
|
|
|
7711c0 |
|
|
|
7711c0 |
assert(is_power_of_2(align));
|
|
|
7711c0 |
/* Always align to host page size */
|
|
|
7711c0 |
- assert(align >= getpagesize());
|
|
|
7711c0 |
+ assert(align >= pagesize);
|
|
|
7711c0 |
|
|
|
7711c0 |
flags = MAP_FIXED;
|
|
|
7711c0 |
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
|
|
|
7711c0 |
@@ -141,17 +144,24 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
|
|
|
7711c0 |
* a guard page guarding against potential buffer overflows.
|
|
|
7711c0 |
*/
|
|
|
7711c0 |
total -= offset;
|
|
|
7711c0 |
- if (total > size + getpagesize()) {
|
|
|
7711c0 |
- munmap(ptr + size + getpagesize(), total - size - getpagesize());
|
|
|
7711c0 |
+ if (total > size + pagesize) {
|
|
|
7711c0 |
+ munmap(ptr + size + pagesize, total - size - pagesize);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
return ptr;
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
-void qemu_ram_munmap(void *ptr, size_t size)
|
|
|
7711c0 |
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
|
|
|
7711c0 |
{
|
|
|
7711c0 |
+ size_t pagesize;
|
|
|
7711c0 |
+
|
|
|
7711c0 |
if (ptr) {
|
|
|
7711c0 |
/* Unmap both the RAM block and the guard page */
|
|
|
7711c0 |
- munmap(ptr, size + getpagesize());
|
|
|
7711c0 |
+#if defined(__powerpc64__) && defined(__linux__)
|
|
|
7711c0 |
+ pagesize = qemu_fd_getpagesize(fd);
|
|
|
7711c0 |
+#else
|
|
|
7711c0 |
+ pagesize = getpagesize();
|
|
|
7711c0 |
+#endif
|
|
|
7711c0 |
+ munmap(ptr, size + pagesize);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
}
|
|
|
7711c0 |
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
|
|
|
7711c0 |
index 13b6f8d..a24dd01 100644
|
|
|
7711c0 |
--- a/util/oslib-posix.c
|
|
|
7711c0 |
+++ b/util/oslib-posix.c
|
|
|
7711c0 |
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
|
|
|
7711c0 |
void qemu_anon_ram_free(void *ptr, size_t size)
|
|
|
7711c0 |
{
|
|
|
7711c0 |
trace_qemu_anon_ram_free(ptr, size);
|
|
|
7711c0 |
- qemu_ram_munmap(ptr, size);
|
|
|
7711c0 |
+ qemu_ram_munmap(-1, ptr, size);
|
|
|
7711c0 |
}
|
|
|
7711c0 |
|
|
|
7711c0 |
void qemu_set_block(int fd)
|
|
|
7711c0 |
--
|
|
|
7711c0 |
1.8.3.1
|
|
|
7711c0 |
|