Blame SOURCES/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch

4ec855
From e69f257e657473ba59f48692d387e292a24892bb Mon Sep 17 00:00:00 2001
4ec855
From: "plai@redhat.com" <plai@redhat.com>
4ec855
Date: Tue, 20 Aug 2019 16:12:50 +0100
4ec855
Subject: [PATCH 03/11] mmap-alloc: fix hugetlbfs misaligned length in ppc64
4ec855
4ec855
RH-Author: plai@redhat.com
4ec855
Message-id: <1566317571-5697-4-git-send-email-plai@redhat.com>
4ec855
Patchwork-id: 90082
4ec855
O-Subject: [RHEL8.2 qemu-kvm PATCH 3/4] mmap-alloc: fix hugetlbfs misaligned length in ppc64
4ec855
Bugzilla: 1539282
4ec855
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
4ec855
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
4ec855
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
4ec855
4ec855
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
4ec855
4ec855
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
4ec855
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
4ec855
4ec855
However, we still need to consider the underlying huge page size
4ec855
during munmap() because it requires that both address and length be a
4ec855
multiple of the underlying huge page size for Huge TLB mappings.
4ec855
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
4ec855
section of the munmap(2) manual:
4ec855
4ec855
  "For munmap(), addr and length must both be a multiple of the
4ec855
  underlying huge page size."
4ec855
4ec855
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
4ec855
mappings because the mapped segment can be aligned with the underlying
4ec855
huge page size, not aligned with the native system page size, as
4ec855
returned by getpagesize().
4ec855
4ec855
This has the side effect of not releasing huge pages back to the pool
4ec855
after a hugetlbfs file-backed memory device is hot-unplugged.
4ec855
4ec855
This patch fixes the situation in qemu_ram_mmap() and
4ec855
qemu_ram_munmap() by considering the underlying page size on ppc64.
4ec855
4ec855
After this patch, memory hot-unplug releases huge pages back to the
4ec855
pool.
4ec855
4ec855
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
4ec855
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
4ec855
Reviewed-by: Greg Kurz <groug@kaod.org>
4ec855
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
4ec855
(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647)
4ec855
Signed-off-by: Paul Lai <plai@redhat.com>
4ec855
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
4ec855
---
4ec855
 exec.c                    |  4 ++--
4ec855
 include/qemu/mmap-alloc.h |  2 +-
4ec855
 util/mmap-alloc.c         | 22 ++++++++++++++++------
4ec855
 util/oslib-posix.c        |  2 +-
4ec855
 4 files changed, 20 insertions(+), 10 deletions(-)
4ec855
4ec855
diff --git a/exec.c b/exec.c
4ec855
index a79eaa3..9112d8b 100644
4ec855
--- a/exec.c
4ec855
+++ b/exec.c
4ec855
@@ -1679,7 +1679,7 @@ static void *file_ram_alloc(RAMBlock *block,
4ec855
     if (mem_prealloc) {
4ec855
         os_mem_prealloc(fd, area, memory, smp_cpus, errp);
4ec855
         if (errp && *errp) {
4ec855
-            qemu_ram_munmap(area, memory);
4ec855
+            qemu_ram_munmap(fd, area, memory);
4ec855
             return NULL;
4ec855
         }
4ec855
     }
4ec855
@@ -2200,7 +2200,7 @@ static void reclaim_ramblock(RAMBlock *block)
4ec855
         xen_invalidate_map_cache_entry(block->host);
4ec855
 #ifndef _WIN32
4ec855
     } else if (block->fd >= 0) {
4ec855
-        qemu_ram_munmap(block->host, block->max_length);
4ec855
+        qemu_ram_munmap(block->fd, block->host, block->max_length);
4ec855
         close(block->fd);
4ec855
 #endif
4ec855
     } else {
4ec855
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
4ec855
index 190688a..eec98d8 100644
4ec855
--- a/include/qemu/mmap-alloc.h
4ec855
+++ b/include/qemu/mmap-alloc.h
4ec855
@@ -28,6 +28,6 @@ void *qemu_ram_mmap(int fd,
4ec855
                     bool shared,
4ec855
                     bool is_pmem);
4ec855
 
4ec855
-void qemu_ram_munmap(void *ptr, size_t size);
4ec855
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
4ec855
 
4ec855
 #endif
4ec855
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
4ec855
index b29fcee..bbd9077 100644
4ec855
--- a/util/mmap-alloc.c
4ec855
+++ b/util/mmap-alloc.c
4ec855
@@ -82,6 +82,7 @@ void *qemu_ram_mmap(int fd,
4ec855
     int flags;
4ec855
     int guardfd;
4ec855
     size_t offset;
4ec855
+    size_t pagesize;
4ec855
     size_t total;
4ec855
     void *guardptr;
4ec855
     void *ptr;
4ec855
@@ -102,7 +103,8 @@ void *qemu_ram_mmap(int fd,
4ec855
      * anonymous memory is OK.
4ec855
      */
4ec855
     flags = MAP_PRIVATE;
4ec855
-    if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
4ec855
+    pagesize = qemu_fd_getpagesize(fd);
4ec855
+    if (fd == -1 || pagesize == getpagesize()) {
4ec855
         guardfd = -1;
4ec855
         flags |= MAP_ANONYMOUS;
4ec855
     } else {
4ec855
@@ -111,6 +113,7 @@ void *qemu_ram_mmap(int fd,
4ec855
     }
4ec855
 #else
4ec855
     guardfd = -1;
4ec855
+    pagesize = getpagesize();
4ec855
     flags = MAP_PRIVATE | MAP_ANONYMOUS;
4ec855
 #endif
4ec855
 
4ec855
@@ -122,7 +125,7 @@ void *qemu_ram_mmap(int fd,
4ec855
 
4ec855
     assert(is_power_of_2(align));
4ec855
     /* Always align to host page size */
4ec855
-    assert(align >= getpagesize());
4ec855
+    assert(align >= pagesize);
4ec855
 
4ec855
     flags = MAP_FIXED;
4ec855
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
4ec855
@@ -145,17 +148,24 @@ void *qemu_ram_mmap(int fd,
4ec855
      * a guard page guarding against potential buffer overflows.
4ec855
      */
4ec855
     total -= offset;
4ec855
-    if (total > size + getpagesize()) {
4ec855
-        munmap(ptr + size + getpagesize(), total - size - getpagesize());
4ec855
+    if (total > size + pagesize) {
4ec855
+        munmap(ptr + size + pagesize, total - size - pagesize);
4ec855
     }
4ec855
 
4ec855
     return ptr;
4ec855
 }
4ec855
 
4ec855
-void qemu_ram_munmap(void *ptr, size_t size)
4ec855
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
4ec855
 {
4ec855
+    size_t pagesize;
4ec855
+
4ec855
     if (ptr) {
4ec855
         /* Unmap both the RAM block and the guard page */
4ec855
-        munmap(ptr, size + getpagesize());
4ec855
+#if defined(__powerpc64__) && defined(__linux__)
4ec855
+        pagesize = qemu_fd_getpagesize(fd);
4ec855
+#else
4ec855
+        pagesize = getpagesize();
4ec855
+#endif
4ec855
+        munmap(ptr, size + pagesize);
4ec855
     }
4ec855
 }
4ec855
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
4ec855
index c36b2bb..7b6db04 100644
4ec855
--- a/util/oslib-posix.c
4ec855
+++ b/util/oslib-posix.c
4ec855
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
4ec855
 void qemu_anon_ram_free(void *ptr, size_t size)
4ec855
 {
4ec855
     trace_qemu_anon_ram_free(ptr, size);
4ec855
-    qemu_ram_munmap(ptr, size);
4ec855
+    qemu_ram_munmap(-1, ptr, size);
4ec855
 }
4ec855
 
4ec855
 void qemu_set_block(int fd)
4ec855
-- 
4ec855
1.8.3.1
4ec855