Blame SOURCES/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch

7711c0
From 6eda983528f3d74ecd4bdecf6aae127e9c93ed48 Mon Sep 17 00:00:00 2001
7711c0
From: Sam Bobroff <sbobroff@redhat.com>
7711c0
Date: Tue, 16 Apr 2019 05:29:10 +0200
7711c0
Subject: [PATCH 163/163] mmap-alloc: fix hugetlbfs misaligned length in ppc64
7711c0
7711c0
RH-Author: Sam Bobroff <sbobroff@redhat.com>
7711c0
Message-id: <1555392550-21945-3-git-send-email-sbobroff@redhat.com>
7711c0
Patchwork-id: 85702
7711c0
O-Subject: [RHEL-7.7 qemu-kvm-rhev BZ1672819 PATCH 2/2 REPOST] mmap-alloc: fix hugetlbfs misaligned length in ppc64
7711c0
Bugzilla: 1672819
7711c0
RH-Acked-by: David Gibson <dgibson@redhat.com>
7711c0
RH-Acked-by: Thomas Huth <thuth@redhat.com>
7711c0
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
7711c0
7711c0
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
7711c0
7711c0
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
7711c0
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
7711c0
7711c0
However, we still need to consider the underlying huge page size
7711c0
during munmap() because it requires that both address and length be a
7711c0
multiple of the underlying huge page size for Huge TLB mappings.
7711c0
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
7711c0
section of the munmap(2) manual:
7711c0
7711c0
  "For munmap(), addr and length must both be a multiple of the
7711c0
  underlying huge page size."
7711c0
7711c0
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
7711c0
mappings because the mapped segment can be aligned with the underlying
7711c0
huge page size, not aligned with the native system page size, as
7711c0
returned by getpagesize().
7711c0
7711c0
This has the side effect of not releasing huge pages back to the pool
7711c0
after a hugetlbfs file-backed memory device is hot-unplugged.
7711c0
7711c0
This patch fixes the situation in qemu_ram_mmap() and
7711c0
qemu_ram_munmap() by considering the underlying page size on ppc64.
7711c0
7711c0
After this patch, memory hot-unplug releases huge pages back to the
7711c0
pool.
7711c0
7711c0
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
7711c0
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
7711c0
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7711c0
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
7711c0
Reviewed-by: Greg Kurz <groug@kaod.org>
7711c0
(cherry picked from commit 7265c2b9716369b339d778b9ef64a8161eb8f99b)
7711c0
7711c0
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1672819
7711c0
Testing: Check that hugepage backed RAM removed from a guest is free'd
7711c0
on the host.
7711c0
Signed-off-by: Sam Bobroff <sbobroff@redhat.com>
7711c0
Upstream: Patch is in dgibson/ppc-for-4.0
7711c0
7711c0
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
7711c0
---
7711c0
 exec.c                    |  4 ++--
7711c0
 include/qemu/mmap-alloc.h |  2 +-
7711c0
 util/mmap-alloc.c         | 22 ++++++++++++++++------
7711c0
 util/oslib-posix.c        |  2 +-
7711c0
 4 files changed, 20 insertions(+), 10 deletions(-)
7711c0
7711c0
diff --git a/exec.c b/exec.c
7711c0
index d87a51a..82e85ff 100644
7711c0
--- a/exec.c
7711c0
+++ b/exec.c
7711c0
@@ -1714,7 +1714,7 @@ static void *file_ram_alloc(RAMBlock *block,
7711c0
     if (mem_prealloc) {
7711c0
         os_mem_prealloc(fd, area, memory, smp_cpus, errp);
7711c0
         if (errp && *errp) {
7711c0
-            qemu_ram_munmap(area, memory);
7711c0
+            qemu_ram_munmap(fd, area, memory);
7711c0
             return NULL;
7711c0
         }
7711c0
     }
7711c0
@@ -2235,7 +2235,7 @@ static void reclaim_ramblock(RAMBlock *block)
7711c0
         xen_invalidate_map_cache_entry(block->host);
7711c0
 #ifndef _WIN32
7711c0
     } else if (block->fd >= 0) {
7711c0
-        qemu_ram_munmap(block->host, block->max_length);
7711c0
+        qemu_ram_munmap(block->fd, block->host, block->max_length);
7711c0
         close(block->fd);
7711c0
 #endif
7711c0
     } else {
7711c0
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
7711c0
index 50385e3..ef04f0e 100644
7711c0
--- a/include/qemu/mmap-alloc.h
7711c0
+++ b/include/qemu/mmap-alloc.h
7711c0
@@ -9,6 +9,6 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
7711c0
 
7711c0
 void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
7711c0
 
7711c0
-void qemu_ram_munmap(void *ptr, size_t size);
7711c0
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
7711c0
 
7711c0
 #endif
7711c0
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
7711c0
index 94ee517..19607c1 100644
7711c0
--- a/util/mmap-alloc.c
7711c0
+++ b/util/mmap-alloc.c
7711c0
@@ -78,6 +78,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
7711c0
     int flags;
7711c0
     int guardfd;
7711c0
     size_t offset;
7711c0
+    size_t pagesize;
7711c0
     size_t total;
7711c0
     void *guardptr;
7711c0
     void *ptr;
7711c0
@@ -98,7 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
7711c0
      * anonymous memory is OK.
7711c0
      */
7711c0
     flags = MAP_PRIVATE;
7711c0
-    if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
7711c0
+    pagesize = qemu_fd_getpagesize(fd);
7711c0
+    if (fd == -1 || pagesize == getpagesize()) {
7711c0
         guardfd = -1;
7711c0
         flags |= MAP_ANONYMOUS;
7711c0
     } else {
7711c0
@@ -107,6 +109,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
7711c0
     }
7711c0
 #else
7711c0
     guardfd = -1;
7711c0
+    pagesize = getpagesize();
7711c0
     flags = MAP_PRIVATE | MAP_ANONYMOUS;
7711c0
 #endif
7711c0
 
7711c0
@@ -118,7 +121,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
7711c0
 
7711c0
     assert(is_power_of_2(align));
7711c0
     /* Always align to host page size */
7711c0
-    assert(align >= getpagesize());
7711c0
+    assert(align >= pagesize);
7711c0
 
7711c0
     flags = MAP_FIXED;
7711c0
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
7711c0
@@ -141,17 +144,24 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
7711c0
      * a guard page guarding against potential buffer overflows.
7711c0
      */
7711c0
     total -= offset;
7711c0
-    if (total > size + getpagesize()) {
7711c0
-        munmap(ptr + size + getpagesize(), total - size - getpagesize());
7711c0
+    if (total > size + pagesize) {
7711c0
+        munmap(ptr + size + pagesize, total - size - pagesize);
7711c0
     }
7711c0
 
7711c0
     return ptr;
7711c0
 }
7711c0
 
7711c0
-void qemu_ram_munmap(void *ptr, size_t size)
7711c0
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
7711c0
 {
7711c0
+    size_t pagesize;
7711c0
+
7711c0
     if (ptr) {
7711c0
         /* Unmap both the RAM block and the guard page */
7711c0
-        munmap(ptr, size + getpagesize());
7711c0
+#if defined(__powerpc64__) && defined(__linux__)
7711c0
+        pagesize = qemu_fd_getpagesize(fd);
7711c0
+#else
7711c0
+        pagesize = getpagesize();
7711c0
+#endif
7711c0
+        munmap(ptr, size + pagesize);
7711c0
     }
7711c0
 }
7711c0
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
7711c0
index 13b6f8d..a24dd01 100644
7711c0
--- a/util/oslib-posix.c
7711c0
+++ b/util/oslib-posix.c
7711c0
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
7711c0
 void qemu_anon_ram_free(void *ptr, size_t size)
7711c0
 {
7711c0
     trace_qemu_anon_ram_free(ptr, size);
7711c0
-    qemu_ram_munmap(ptr, size);
7711c0
+    qemu_ram_munmap(-1, ptr, size);
7711c0
 }
7711c0
 
7711c0
 void qemu_set_block(int fd)
7711c0
-- 
7711c0
1.8.3.1
7711c0