From e69f257e657473ba59f48692d387e292a24892bb Mon Sep 17 00:00:00 2001 From: "plai@redhat.com" Date: Tue, 20 Aug 2019 16:12:50 +0100 Subject: [PATCH 03/11] mmap-alloc: fix hugetlbfs misaligned length in ppc64 RH-Author: plai@redhat.com Message-id: <1566317571-5697-4-git-send-email-plai@redhat.com> Patchwork-id: 90082 O-Subject: [RHEL8.2 qemu-kvm PATCH 3/4] mmap-alloc: fix hugetlbfs misaligned length in ppc64 Bugzilla: 1539282 RH-Acked-by: Stefan Hajnoczi RH-Acked-by: Pankaj Gupta RH-Acked-by: Eduardo Habkost From: Murilo Opsfelder Araujo The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc: fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64. However, we still need to consider the underlying huge page size during munmap() because it requires that both address and length be a multiple of the underlying huge page size for Huge TLB mappings. Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES section of the munmap(2) manual: "For munmap(), addr and length must both be a multiple of the underlying huge page size." On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB mappings because the mapped segment can be aligned with the underlying huge page size, not aligned with the native system page size, as returned by getpagesize(). This has the side effect of not releasing huge pages back to the pool after a hugetlbfs file-backed memory device is hot-unplugged. This patch fixes the situation in qemu_ram_mmap() and qemu_ram_munmap() by considering the underlying page size on ppc64. After this patch, memory hot-unplug releases huge pages back to the pool. Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218 Signed-off-by: Murilo Opsfelder Araujo Reviewed-by: Greg Kurz Signed-off-by: David Gibson (cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647) Signed-off-by: Paul Lai Signed-off-by: Danilo C. L. de Paula --- exec.c | 4 ++-- include/qemu/mmap-alloc.h | 2 +- util/mmap-alloc.c | 22 ++++++++++++++++------ util/oslib-posix.c | 2 +- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/exec.c b/exec.c index a79eaa3..9112d8b 100644 --- a/exec.c +++ b/exec.c @@ -1679,7 +1679,7 @@ static void *file_ram_alloc(RAMBlock *block, if (mem_prealloc) { os_mem_prealloc(fd, area, memory, smp_cpus, errp); if (errp && *errp) { - qemu_ram_munmap(area, memory); + qemu_ram_munmap(fd, area, memory); return NULL; } } @@ -2200,7 +2200,7 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - qemu_ram_munmap(block->host, block->max_length); + qemu_ram_munmap(block->fd, block->host, block->max_length); close(block->fd); #endif } else { diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index 190688a..eec98d8 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -28,6 +28,6 @@ void *qemu_ram_mmap(int fd, bool shared, bool is_pmem); -void qemu_ram_munmap(void *ptr, size_t size); +void qemu_ram_munmap(int fd, void *ptr, size_t size); #endif diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index b29fcee..bbd9077 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -82,6 +82,7 @@ void *qemu_ram_mmap(int fd, int flags; int guardfd; size_t offset; + size_t pagesize; size_t total; void *guardptr; void *ptr; @@ -102,7 +103,8 @@ void *qemu_ram_mmap(int fd, * anonymous memory is OK. */ flags = MAP_PRIVATE; - if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) { + pagesize = qemu_fd_getpagesize(fd); + if (fd == -1 || pagesize == getpagesize()) { guardfd = -1; flags |= MAP_ANONYMOUS; } else { @@ -111,6 +113,7 @@ void *qemu_ram_mmap(int fd, } #else guardfd = -1; + pagesize = getpagesize(); flags = MAP_PRIVATE | MAP_ANONYMOUS; #endif @@ -122,7 +125,7 @@ void *qemu_ram_mmap(int fd, assert(is_power_of_2(align)); /* Always align to host page size */ - assert(align >= getpagesize()); + assert(align >= pagesize); flags = MAP_FIXED; flags |= fd == -1 ? MAP_ANONYMOUS : 0; @@ -145,17 +148,24 @@ void *qemu_ram_mmap(int fd, * a guard page guarding against potential buffer overflows. */ total -= offset; - if (total > size + getpagesize()) { - munmap(ptr + size + getpagesize(), total - size - getpagesize()); + if (total > size + pagesize) { + munmap(ptr + size + pagesize, total - size - pagesize); } return ptr; } -void qemu_ram_munmap(void *ptr, size_t size) +void qemu_ram_munmap(int fd, void *ptr, size_t size) { + size_t pagesize; + if (ptr) { /* Unmap both the RAM block and the guard page */ - munmap(ptr, size + getpagesize()); +#if defined(__powerpc64__) && defined(__linux__) + pagesize = qemu_fd_getpagesize(fd); +#else + pagesize = getpagesize(); +#endif + munmap(ptr, size + pagesize); } } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index c36b2bb..7b6db04 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -153,7 +153,7 @@ void qemu_vfree(void *ptr) void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); - qemu_ram_munmap(ptr, size); + qemu_ram_munmap(-1, ptr, size); } void qemu_set_block(int fd) -- 1.8.3.1