26ba25
From b872c9566d3f18b4d6580a7b271bbbc21c026a36 Mon Sep 17 00:00:00 2001
26ba25
From: "plai@redhat.com" <plai@redhat.com>
26ba25
Date: Mon, 7 Jan 2019 17:02:19 +0000
26ba25
Subject: [PATCH 18/22] hostmem-file: add the 'pmem' option
26ba25
26ba25
RH-Author: plai@redhat.com
26ba25
Message-id: <1546880543-24860-7-git-send-email-plai@redhat.com>
26ba25
Patchwork-id: 83892
26ba25
O-Subject: [RHEL8.0 qemu-kvm PATCH v7 06/10] hostmem-file: add the 'pmem' option
26ba25
Bugzilla: 1539285
26ba25
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
26ba25
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
26ba25
26ba25
From: Junyan He <junyan.he@intel.com>
26ba25
26ba25
When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
26ba25
needs to know whether the backend storage is a real persistent memory,
26ba25
in order to decide whether special operations should be performed to
26ba25
ensure the data persistence.
26ba25
26ba25
This boolean option 'pmem' allows users to specify whether the backend
26ba25
storage of memory-backend-file is a real persistent memory. If
26ba25
'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
26ba25
corresponding memory region. If 'pmem' is set while lack of libpmem
26ba25
support, a error is generated.
26ba25
26ba25
Signed-off-by: Junyan He <junyan.he@intel.com>
26ba25
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
26ba25
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
26ba25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26ba25
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
26ba25
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
26ba25
(cherry picked from commit a4de8552b2580adf6fa4874439217b65d3bdd88b)
26ba25
Signed-off-by: Paul Lai <plai@redhat.com>
26ba25
26ba25
Resolved Conflicts:
26ba25
	docs/nvdimm.txt
26ba25
26ba25
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
26ba25
---
26ba25
 backends/hostmem-file.c | 43 +++++++++++++++++++++++++++++++++++++++++--
26ba25
 docs/nvdimm.txt         | 42 ++++++++++++++++++++++++++++++++++++++++++
26ba25
 exec.c                  |  8 ++++++++
26ba25
 include/exec/memory.h   |  4 ++++
26ba25
 include/exec/ram_addr.h |  3 +++
26ba25
 qemu-options.hx         |  7 +++++++
26ba25
 6 files changed, 105 insertions(+), 2 deletions(-)
26ba25
26ba25
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
26ba25
index 34c68bb..2476dcb 100644
26ba25
--- a/backends/hostmem-file.c
26ba25
+++ b/backends/hostmem-file.c
26ba25
@@ -12,6 +12,7 @@
26ba25
 #include "qemu/osdep.h"
26ba25
 #include "qapi/error.h"
26ba25
 #include "qemu-common.h"
26ba25
+#include "qemu/error-report.h"
26ba25
 #include "sysemu/hostmem.h"
26ba25
 #include "sysemu/sysemu.h"
26ba25
 #include "qom/object_interfaces.h"
26ba25
@@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
26ba25
 struct HostMemoryBackendFile {
26ba25
     HostMemoryBackend parent_obj;
26ba25
 
26ba25
-    bool discard_data;
26ba25
     char *mem_path;
26ba25
     uint64_t align;
26ba25
+    bool discard_data;
26ba25
+    bool is_pmem;
26ba25
 };
26ba25
 
26ba25
 static void
26ba25
@@ -59,7 +61,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
26ba25
         memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
26ba25
                                  path,
26ba25
                                  backend->size, fb->align,
26ba25
-                                 backend->share ? RAM_SHARED : 0,
26ba25
+                                 (backend->share ? RAM_SHARED : 0) |
26ba25
+                                 (fb->is_pmem ? RAM_PMEM : 0),
26ba25
                                  fb->mem_path, errp);
26ba25
         g_free(path);
26ba25
     }
26ba25
@@ -131,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
26ba25
     error_propagate(errp, local_err);
26ba25
 }
26ba25
 
26ba25
+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
26ba25
+{
26ba25
+    return MEMORY_BACKEND_FILE(o)->is_pmem;
26ba25
+}
26ba25
+
26ba25
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
26ba25
+{
26ba25
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
26ba25
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
26ba25
+
26ba25
+    if (host_memory_backend_mr_inited(backend)) {
26ba25
+        error_setg(errp, "cannot change property 'pmem' of %s '%s'",
26ba25
+                   object_get_typename(o),
26ba25
+                   object_get_canonical_path_component(o));
26ba25
+        return;
26ba25
+    }
26ba25
+
26ba25
+#ifndef CONFIG_LIBPMEM
26ba25
+    if (value) {
26ba25
+        Error *local_err = NULL;
26ba25
+        error_setg(&local_err,
26ba25
+                   "Lack of libpmem support while setting the 'pmem=on'"
26ba25
+                   " of %s '%s'. We can't ensure data persistence.",
26ba25
+                   object_get_typename(o),
26ba25
+                   object_get_canonical_path_component(o));
26ba25
+        error_propagate(errp, local_err);
26ba25
+        return;
26ba25
+    }
26ba25
+#endif
26ba25
+
26ba25
+    fb->is_pmem = value;
26ba25
+}
26ba25
+
26ba25
 static void file_backend_unparent(Object *obj)
26ba25
 {
26ba25
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
26ba25
@@ -162,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
26ba25
         file_memory_backend_get_align,
26ba25
         file_memory_backend_set_align,
26ba25
         NULL, NULL, &error_abort);
26ba25
+    object_class_property_add_bool(oc, "pmem",
26ba25
+        file_memory_backend_get_pmem, file_memory_backend_set_pmem,
26ba25
+        &error_abort);
26ba25
 }
26ba25
 
26ba25
 static void file_backend_instance_finalize(Object *o)
26ba25
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
26ba25
index e903d8b..5f158a6 100644
26ba25
--- a/docs/nvdimm.txt
26ba25
+++ b/docs/nvdimm.txt
26ba25
@@ -153,3 +153,45 @@ guest NVDIMM region mapping structure.  This unarmed flag indicates
26ba25
 guest software that this vNVDIMM device contains a region that cannot
26ba25
 accept persistent writes. In result, for example, the guest Linux
26ba25
 NVDIMM driver, marks such vNVDIMM device as read-only.
26ba25
+
26ba25
+NVDIMM Persistence
26ba25
+------------------
26ba25
+
26ba25
+ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
26ba25
+which allows the platform to communicate what features it supports related to
26ba25
+NVDIMM data persistence.  Users can provide a persistence value to a guest via
26ba25
+the optional "nvdimm-persistence" machine command line option:
26ba25
+
26ba25
+    -machine pc,accel=kvm,nvdimm,nvdimm-persistence=cpu
26ba25
+
26ba25
+There are currently two valid values for this option:
26ba25
+
26ba25
+"mem-ctrl" - The platform supports flushing dirty data from the memory
26ba25
+             controller to the NVDIMMs in the event of power loss.
26ba25
+
26ba25
+"cpu"      - The platform supports flushing dirty data from the CPU cache to
26ba25
+             the NVDIMMs in the event of power loss.  This implies that the
26ba25
+             platform also supports flushing dirty data through the memory
26ba25
+             controller on power loss.
26ba25
+
26ba25
+If the vNVDIMM backend is in host persistent memory that can be accessed in
26ba25
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
26ba25
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
26ba25
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
26ba25
+will take necessary operations to guarantee the persistence of its own writes
26ba25
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
26ba25
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
26ba25
+a "lack of libpmem support" message to ensure the persistence is available.
26ba25
+For example, if we want to ensure the persistence for some backend file,
26ba25
+use the QEMU command line:
26ba25
+
26ba25
+    -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
26ba25
+
26ba25
+References
26ba25
+----------
26ba25
+
26ba25
+[1] NVM Programming Model (NPM)
26ba25
+	Version 1.2
26ba25
+    https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
26ba25
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
26ba25
+    http://pmem.io/pmdk/
26ba25
diff --git a/exec.c b/exec.c
26ba25
index 8d58e8f..9028700 100644
26ba25
--- a/exec.c
26ba25
+++ b/exec.c
26ba25
@@ -2049,6 +2049,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
26ba25
     Error *local_err = NULL;
26ba25
     int64_t file_size;
26ba25
 
26ba25
+    /* Just support these ram flags by now. */
26ba25
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
26ba25
+
26ba25
     if (xen_enabled()) {
26ba25
         error_setg(errp, "-mem-path not supported with Xen");
26ba25
         return NULL;
26ba25
@@ -3867,6 +3870,11 @@ err:
26ba25
     return ret;
26ba25
 }
26ba25
 
26ba25
+bool ramblock_is_pmem(RAMBlock *rb)
26ba25
+{
26ba25
+    return rb->flags & RAM_PMEM;
26ba25
+}
26ba25
+
26ba25
 #endif
26ba25
 
26ba25
 void page_size_init(void)
26ba25
diff --git a/include/exec/memory.h b/include/exec/memory.h
26ba25
index b3abe61..fd2c574 100644
26ba25
--- a/include/exec/memory.h
26ba25
+++ b/include/exec/memory.h
26ba25
@@ -122,6 +122,9 @@ typedef struct IOMMUNotifier IOMMUNotifier;
26ba25
 /* RAM can be migrated */
26ba25
 #define RAM_MIGRATABLE (1 << 4)
26ba25
 
26ba25
+/* RAM is a persistent kind memory */
26ba25
+#define RAM_PMEM (1 << 5)
26ba25
+
26ba25
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
26ba25
                                        IOMMUNotifierFlag flags,
26ba25
                                        hwaddr start, hwaddr end)
26ba25
@@ -541,6 +544,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
26ba25
  *         (getpagesize()) will be used.
26ba25
  * @ram_flags: Memory region features:
26ba25
  *             - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
26ba25
+ *             - RAM_PMEM: the memory is persistent memory
26ba25
  *             Other bits are ignored now.
26ba25
  * @path: the path in which to allocate the RAM.
26ba25
  * @errp: pointer to Error*, to store an error if it happens.
26ba25
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
26ba25
index 67e163e..922305d 100644
26ba25
--- a/include/exec/ram_addr.h
26ba25
+++ b/include/exec/ram_addr.h
26ba25
@@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
26ba25
     return host_addr_offset >> TARGET_PAGE_BITS;
26ba25
 }
26ba25
 
26ba25
+bool ramblock_is_pmem(RAMBlock *rb);
26ba25
+
26ba25
 long qemu_getrampagesize(void);
26ba25
 unsigned long last_ram_page(void);
26ba25
 
26ba25
@@ -84,6 +86,7 @@ unsigned long last_ram_page(void);
26ba25
  *  @ram_flags: specify the properties of the ram block, which can be one
26ba25
  *              or bit-or of following values
26ba25
  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
26ba25
+ *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
26ba25
  *              Other bits are ignored.
26ba25
  *  @mem_path or @fd: specify the backing file or device
26ba25
  *  @errp: pointer to Error*, to store an error if it happens
26ba25
diff --git a/qemu-options.hx b/qemu-options.hx
26ba25
index 683ab0d..1b6786b 100644
26ba25
--- a/qemu-options.hx
26ba25
+++ b/qemu-options.hx
26ba25
@@ -4051,6 +4051,13 @@ requires an alignment different than the default one used by QEMU, eg
26ba25
 the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
26ba25
 such cases, users can specify the required alignment via this option.
26ba25
 
26ba25
+The @option{pmem} option specifies whether the backing file specified
26ba25
+by @option{mem-path} is in host persistent memory that can be accessed
26ba25
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
26ba25
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
26ba25
+guarantee the persistence of its own writes to @option{mem-path}
26ba25
+(e.g. in vNVDIMM label emulation and live migration).
26ba25
+
26ba25
 @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
26ba25
 
26ba25
 Creates a memory backend object, which can be used to back the guest RAM.
26ba25
-- 
26ba25
1.8.3.1
26ba25