0a122b
From 0de3b24dc9428a37330f9065bc3626a3a07a9200 Mon Sep 17 00:00:00 2001
0a122b
Message-Id: <0de3b24dc9428a37330f9065bc3626a3a07a9200.1387382496.git.minovotn@redhat.com>
0a122b
In-Reply-To: <c5386144fbf09f628148101bc674e2421cdd16e3.1387382496.git.minovotn@redhat.com>
0a122b
References: <c5386144fbf09f628148101bc674e2421cdd16e3.1387382496.git.minovotn@redhat.com>
0a122b
From: Marcelo Tosatti <mtosatti@redhat.com>
0a122b
Date: Thu, 12 Dec 2013 01:36:44 +0100
0a122b
Subject: [PATCH 46/46] mempath: prefault pages manually (v4)
0a122b
0a122b
RH-Author: Marcelo Tosatti <mtosatti@redhat.com>
0a122b
Message-id: <20131212013644.GA8646@amt.cnet>
0a122b
Patchwork-id: 56243
0a122b
O-Subject: [RHEL7 qemu-kvm PATCH] mempath: prefault pages manually (v4)
0a122b
Bugzilla: 1026554
0a122b
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
0a122b
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
0a122b
RH-Acked-by: Andrew Jones <drjones@redhat.com>
0a122b
0a122b
commit ef36fa1492e9105f3fa607b56edc63df513d7da1 upstream
0a122b
0a122b
v4: s/fail/failed/  (Peter Maydell)
0a122b
0a122b
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
0a122b
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
0a122b
BZ: 1026554
0a122b
Signed-off-by: Michal Novotny <minovotn@redhat.com>
0a122b
---
0a122b
 exec.c          | 59 +++++++++++++++++++++++++++++++++++++++++++++------------
0a122b
 qemu-options.hx |  2 --
0a122b
 vl.c            |  4 ----
0a122b
 3 files changed, 47 insertions(+), 18 deletions(-)
0a122b
0a122b
diff --git a/exec.c b/exec.c
0a122b
index 64af205..571cea4 100644
0a122b
--- a/exec.c
0a122b
+++ b/exec.c
0a122b
@@ -865,6 +865,13 @@ static long gethugepagesize(const char *path)
0a122b
     return fs.f_bsize;
0a122b
 }
0a122b
 
0a122b
+static sigjmp_buf sigjump;
0a122b
+
0a122b
+static void sigbus_handler(int signal)
0a122b
+{
0a122b
+    siglongjmp(sigjump, 1);
0a122b
+}
0a122b
+
0a122b
 static void *file_ram_alloc(RAMBlock *block,
0a122b
                             ram_addr_t memory,
0a122b
                             const char *path)
0a122b
@@ -874,9 +881,6 @@ static void *file_ram_alloc(RAMBlock *block,
0a122b
     char *c;
0a122b
     void *area;
0a122b
     int fd;
0a122b
-#ifdef MAP_POPULATE
0a122b
-    int flags;
0a122b
-#endif
0a122b
     unsigned long hpagesize;
0a122b
 
0a122b
     hpagesize = gethugepagesize(path);
0a122b
@@ -924,21 +928,52 @@ static void *file_ram_alloc(RAMBlock *block,
0a122b
     if (ftruncate(fd, memory))
0a122b
         perror("ftruncate");
0a122b
 
0a122b
-#ifdef MAP_POPULATE
0a122b
-    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
0a122b
-     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
0a122b
-     * to sidestep this quirk.
0a122b
-     */
0a122b
-    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
0a122b
-    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
0a122b
-#else
0a122b
     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
0a122b
-#endif
0a122b
     if (area == MAP_FAILED) {
0a122b
         perror("file_ram_alloc: can't mmap RAM pages");
0a122b
         close(fd);
0a122b
         return (NULL);
0a122b
     }
0a122b
+
0a122b
+    if (mem_prealloc) {
0a122b
+        int ret, i;
0a122b
+        struct sigaction act, oldact;
0a122b
+        sigset_t set, oldset;
0a122b
+
0a122b
+        memset(&act, 0, sizeof(act));
0a122b
+        act.sa_handler = &sigbus_handler;
0a122b
+        act.sa_flags = 0;
0a122b
+
0a122b
+        ret = sigaction(SIGBUS, &act, &oldact);
0a122b
+        if (ret) {
0a122b
+            perror("file_ram_alloc: failed to install signal handler");
0a122b
+            exit(1);
0a122b
+        }
0a122b
+
0a122b
+        /* unblock SIGBUS */
0a122b
+        sigemptyset(&set);
0a122b
+        sigaddset(&set, SIGBUS);
0a122b
+        pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
0a122b
+
0a122b
+        if (sigsetjmp(sigjump, 1)) {
0a122b
+            fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
0a122b
+            exit(1);
0a122b
+        }
0a122b
+
0a122b
+        /* MAP_POPULATE silently ignores failures */
0a122b
+        for (i = 0; i < (memory/hpagesize)-1; i++) {
0a122b
+            memset(area + (hpagesize*i), 0, 1);
0a122b
+        }
0a122b
+
0a122b
+        ret = sigaction(SIGBUS, &oldact, NULL);
0a122b
+        if (ret) {
0a122b
+            perror("file_ram_alloc: failed to reinstall signal handler");
0a122b
+            exit(1);
0a122b
+        }
0a122b
+
0a122b
+        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
0a122b
+    }
0a122b
+
0a122b
     block->fd = fd;
0a122b
     return area;
0a122b
 }
0a122b
diff --git a/qemu-options.hx b/qemu-options.hx
0a122b
index e3c5d67..d9320b5 100644
0a122b
--- a/qemu-options.hx
0a122b
+++ b/qemu-options.hx
0a122b
@@ -228,7 +228,6 @@ STEXI
0a122b
 Allocate guest RAM from a temporarily created file in @var{path}.
0a122b
 ETEXI
0a122b
 
0a122b
-#ifdef MAP_POPULATE
0a122b
 DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
0a122b
     "-mem-prealloc   preallocate guest memory (use with -mem-path)\n",
0a122b
     QEMU_ARCH_ALL)
0a122b
@@ -237,7 +236,6 @@ STEXI
0a122b
 @findex -mem-prealloc
0a122b
 Preallocate memory when using -mem-path.
0a122b
 ETEXI
0a122b
-#endif
0a122b
 
0a122b
 DEF("k", HAS_ARG, QEMU_OPTION_k,
0a122b
     "-k language     use keyboard layout (for example 'fr' for French)\n",
0a122b
diff --git a/vl.c b/vl.c
0a122b
index 0cbbdf0..da12b90 100644
0a122b
--- a/vl.c
0a122b
+++ b/vl.c
0a122b
@@ -188,9 +188,7 @@ static int display_remote;
0a122b
 const char* keyboard_layout = NULL;
0a122b
 ram_addr_t ram_size;
0a122b
 const char *mem_path = NULL;
0a122b
-#ifdef MAP_POPULATE
0a122b
 int mem_prealloc = 0; /* force preallocation of physical target memory */
0a122b
-#endif
0a122b
 int nb_nics;
0a122b
 NICInfo nd_table[MAX_NICS];
0a122b
 int autostart;
0a122b
@@ -3174,11 +3172,9 @@ int main(int argc, char **argv, char **envp)
0a122b
             case QEMU_OPTION_mempath:
0a122b
                 mem_path = optarg;
0a122b
                 break;
0a122b
-#ifdef MAP_POPULATE
0a122b
             case QEMU_OPTION_mem_prealloc:
0a122b
                 mem_prealloc = 1;
0a122b
                 break;
0a122b
-#endif
0a122b
             case QEMU_OPTION_d:
0a122b
                 log_mask = optarg;
0a122b
                 break;
0a122b
-- 
0a122b
1.7.11.7
0a122b