Blame SOURCES/kvm-hostmem-file-add-the-pmem-option.patch

1bdc94
From 0b7c71b8b4afcbc92a9ef549d485c54da92204b3 Mon Sep 17 00:00:00 2001
1bdc94
From: "plai@redhat.com" <plai@redhat.com>
1bdc94
Date: Fri, 31 Aug 2018 16:25:56 +0200
1bdc94
Subject: [PATCH 14/29] hostmem-file: add the 'pmem' option
1bdc94
1bdc94
RH-Author: plai@redhat.com
1bdc94
Message-id: <1535732759-22481-7-git-send-email-plai@redhat.com>
1bdc94
Patchwork-id: 82007
1bdc94
O-Subject: [RHEL7.6 PATCH BZ 1539280 6/9] hostmem-file: add the 'pmem' option
1bdc94
Bugzilla: 1539280
1bdc94
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
1bdc94
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
1bdc94
From: Junyan He <junyan.he@intel.com>
1bdc94
1bdc94
When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
1bdc94
needs to know whether the backend storage is a real persistent memory,
1bdc94
in order to decide whether special operations should be performed to
1bdc94
ensure the data persistence.
1bdc94
1bdc94
This boolean option 'pmem' allows users to specify whether the backend
1bdc94
storage of memory-backend-file is a real persistent memory. If
1bdc94
'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
1bdc94
corresponding memory region. If 'pmem' is set while lack of libpmem
1bdc94
support, a error is generated.
1bdc94
1bdc94
Signed-off-by: Junyan He <junyan.he@intel.com>
1bdc94
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
1bdc94
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
1bdc94
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
1bdc94
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
(cherry picked from commit a4de8552b2580adf6fa4874439217b65d3bdd88b)
1bdc94
Signed-off-by: Paul Lai <plai@redhat.com>
1bdc94
1bdc94
Resolved Conflicts:
1bdc94
	docs/nvdimm.txt
1bdc94
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 backends/hostmem-file.c | 43 +++++++++++++++++++++++++++++++++++++++++--
1bdc94
 docs/nvdimm.txt         | 42 ++++++++++++++++++++++++++++++++++++++++++
1bdc94
 exec.c                  |  8 ++++++++
1bdc94
 include/exec/memory.h   |  4 ++++
1bdc94
 include/exec/ram_addr.h |  3 +++
1bdc94
 qemu-options.hx         |  7 +++++++
1bdc94
 6 files changed, 105 insertions(+), 2 deletions(-)
1bdc94
1bdc94
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
1bdc94
index 34c68bb..2476dcb 100644
1bdc94
--- a/backends/hostmem-file.c
1bdc94
+++ b/backends/hostmem-file.c
1bdc94
@@ -12,6 +12,7 @@
1bdc94
 #include "qemu/osdep.h"
1bdc94
 #include "qapi/error.h"
1bdc94
 #include "qemu-common.h"
1bdc94
+#include "qemu/error-report.h"
1bdc94
 #include "sysemu/hostmem.h"
1bdc94
 #include "sysemu/sysemu.h"
1bdc94
 #include "qom/object_interfaces.h"
1bdc94
@@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
1bdc94
 struct HostMemoryBackendFile {
1bdc94
     HostMemoryBackend parent_obj;
1bdc94
 
1bdc94
-    bool discard_data;
1bdc94
     char *mem_path;
1bdc94
     uint64_t align;
1bdc94
+    bool discard_data;
1bdc94
+    bool is_pmem;
1bdc94
 };
1bdc94
 
1bdc94
 static void
1bdc94
@@ -59,7 +61,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
1bdc94
         memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
1bdc94
                                  path,
1bdc94
                                  backend->size, fb->align,
1bdc94
-                                 backend->share ? RAM_SHARED : 0,
1bdc94
+                                 (backend->share ? RAM_SHARED : 0) |
1bdc94
+                                 (fb->is_pmem ? RAM_PMEM : 0),
1bdc94
                                  fb->mem_path, errp);
1bdc94
         g_free(path);
1bdc94
     }
1bdc94
@@ -131,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
1bdc94
     error_propagate(errp, local_err);
1bdc94
 }
1bdc94
 
1bdc94
+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
1bdc94
+{
1bdc94
+    return MEMORY_BACKEND_FILE(o)->is_pmem;
1bdc94
+}
1bdc94
+
1bdc94
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
1bdc94
+{
1bdc94
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
1bdc94
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
1bdc94
+
1bdc94
+    if (host_memory_backend_mr_inited(backend)) {
1bdc94
+        error_setg(errp, "cannot change property 'pmem' of %s '%s'",
1bdc94
+                   object_get_typename(o),
1bdc94
+                   object_get_canonical_path_component(o));
1bdc94
+        return;
1bdc94
+    }
1bdc94
+
1bdc94
+#ifndef CONFIG_LIBPMEM
1bdc94
+    if (value) {
1bdc94
+        Error *local_err = NULL;
1bdc94
+        error_setg(&local_err,
1bdc94
+                   "Lack of libpmem support while setting the 'pmem=on'"
1bdc94
+                   " of %s '%s'. We can't ensure data persistence.",
1bdc94
+                   object_get_typename(o),
1bdc94
+                   object_get_canonical_path_component(o));
1bdc94
+        error_propagate(errp, local_err);
1bdc94
+        return;
1bdc94
+    }
1bdc94
+#endif
1bdc94
+
1bdc94
+    fb->is_pmem = value;
1bdc94
+}
1bdc94
+
1bdc94
 static void file_backend_unparent(Object *obj)
1bdc94
 {
1bdc94
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
1bdc94
@@ -162,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
1bdc94
         file_memory_backend_get_align,
1bdc94
         file_memory_backend_set_align,
1bdc94
         NULL, NULL, &error_abort);
1bdc94
+    object_class_property_add_bool(oc, "pmem",
1bdc94
+        file_memory_backend_get_pmem, file_memory_backend_set_pmem,
1bdc94
+        &error_abort);
1bdc94
 }
1bdc94
 
1bdc94
 static void file_backend_instance_finalize(Object *o)
1bdc94
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
1bdc94
index e903d8b..5f158a6 100644
1bdc94
--- a/docs/nvdimm.txt
1bdc94
+++ b/docs/nvdimm.txt
1bdc94
@@ -153,3 +153,45 @@ guest NVDIMM region mapping structure.  This unarmed flag indicates
1bdc94
 guest software that this vNVDIMM device contains a region that cannot
1bdc94
 accept persistent writes. In result, for example, the guest Linux
1bdc94
 NVDIMM driver, marks such vNVDIMM device as read-only.
1bdc94
+
1bdc94
+NVDIMM Persistence
1bdc94
+------------------
1bdc94
+
1bdc94
+ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
1bdc94
+which allows the platform to communicate what features it supports related to
1bdc94
+NVDIMM data persistence.  Users can provide a persistence value to a guest via
1bdc94
+the optional "nvdimm-persistence" machine command line option:
1bdc94
+
1bdc94
+    -machine pc,accel=kvm,nvdimm,nvdimm-persistence=cpu
1bdc94
+
1bdc94
+There are currently two valid values for this option:
1bdc94
+
1bdc94
+"mem-ctrl" - The platform supports flushing dirty data from the memory
1bdc94
+             controller to the NVDIMMs in the event of power loss.
1bdc94
+
1bdc94
+"cpu"      - The platform supports flushing dirty data from the CPU cache to
1bdc94
+             the NVDIMMs in the event of power loss.  This implies that the
1bdc94
+             platform also supports flushing dirty data through the memory
1bdc94
+             controller on power loss.
1bdc94
+
1bdc94
+If the vNVDIMM backend is in host persistent memory that can be accessed in
1bdc94
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
1bdc94
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
1bdc94
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
1bdc94
+will take necessary operations to guarantee the persistence of its own writes
1bdc94
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
1bdc94
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
1bdc94
+a "lack of libpmem support" message to ensure the persistence is available.
1bdc94
+For example, if we want to ensure the persistence for some backend file,
1bdc94
+use the QEMU command line:
1bdc94
+
1bdc94
+    -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
1bdc94
+
1bdc94
+References
1bdc94
+----------
1bdc94
+
1bdc94
+[1] NVM Programming Model (NPM)
1bdc94
+	Version 1.2
1bdc94
+    https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
1bdc94
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
1bdc94
+    http://pmem.io/pmdk/
1bdc94
diff --git a/exec.c b/exec.c
1bdc94
index 295142b..c670185 100644
1bdc94
--- a/exec.c
1bdc94
+++ b/exec.c
1bdc94
@@ -2045,6 +2045,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
1bdc94
     Error *local_err = NULL;
1bdc94
     int64_t file_size;
1bdc94
 
1bdc94
+    /* Just support these ram flags by now. */
1bdc94
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
1bdc94
+
1bdc94
     if (xen_enabled()) {
1bdc94
         error_setg(errp, "-mem-path not supported with Xen");
1bdc94
         return NULL;
1bdc94
@@ -3863,6 +3866,11 @@ err:
1bdc94
     return ret;
1bdc94
 }
1bdc94
 
1bdc94
+bool ramblock_is_pmem(RAMBlock *rb)
1bdc94
+{
1bdc94
+    return rb->flags & RAM_PMEM;
1bdc94
+}
1bdc94
+
1bdc94
 #endif
1bdc94
 
1bdc94
 void page_size_init(void)
1bdc94
diff --git a/include/exec/memory.h b/include/exec/memory.h
1bdc94
index b3abe61..fd2c574 100644
1bdc94
--- a/include/exec/memory.h
1bdc94
+++ b/include/exec/memory.h
1bdc94
@@ -122,6 +122,9 @@ typedef struct IOMMUNotifier IOMMUNotifier;
1bdc94
 /* RAM can be migrated */
1bdc94
 #define RAM_MIGRATABLE (1 << 4)
1bdc94
 
1bdc94
+/* RAM is a persistent kind memory */
1bdc94
+#define RAM_PMEM (1 << 5)
1bdc94
+
1bdc94
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
1bdc94
                                        IOMMUNotifierFlag flags,
1bdc94
                                        hwaddr start, hwaddr end)
1bdc94
@@ -541,6 +544,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
1bdc94
  *         (getpagesize()) will be used.
1bdc94
  * @ram_flags: Memory region features:
1bdc94
  *             - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
1bdc94
+ *             - RAM_PMEM: the memory is persistent memory
1bdc94
  *             Other bits are ignored now.
1bdc94
  * @path: the path in which to allocate the RAM.
1bdc94
  * @errp: pointer to Error*, to store an error if it happens.
1bdc94
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
1bdc94
index 67e163e..922305d 100644
1bdc94
--- a/include/exec/ram_addr.h
1bdc94
+++ b/include/exec/ram_addr.h
1bdc94
@@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
1bdc94
     return host_addr_offset >> TARGET_PAGE_BITS;
1bdc94
 }
1bdc94
 
1bdc94
+bool ramblock_is_pmem(RAMBlock *rb);
1bdc94
+
1bdc94
 long qemu_getrampagesize(void);
1bdc94
 unsigned long last_ram_page(void);
1bdc94
 
1bdc94
@@ -84,6 +86,7 @@ unsigned long last_ram_page(void);
1bdc94
  *  @ram_flags: specify the properties of the ram block, which can be one
1bdc94
  *              or bit-or of following values
1bdc94
  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
1bdc94
+ *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
1bdc94
  *              Other bits are ignored.
1bdc94
  *  @mem_path or @fd: specify the backing file or device
1bdc94
  *  @errp: pointer to Error*, to store an error if it happens
1bdc94
diff --git a/qemu-options.hx b/qemu-options.hx
1bdc94
index 4271cd3..5c58760 100644
1bdc94
--- a/qemu-options.hx
1bdc94
+++ b/qemu-options.hx
1bdc94
@@ -4045,6 +4045,13 @@ requires an alignment different than the default one used by QEMU, eg
1bdc94
 the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
1bdc94
 such cases, users can specify the required alignment via this option.
1bdc94
 
1bdc94
+The @option{pmem} option specifies whether the backing file specified
1bdc94
+by @option{mem-path} is in host persistent memory that can be accessed
1bdc94
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
1bdc94
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
1bdc94
+guarantee the persistence of its own writes to @option{mem-path}
1bdc94
+(e.g. in vNVDIMM label emulation and live migration).
1bdc94
+
1bdc94
 @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
1bdc94
 
1bdc94
 Creates a memory backend object, which can be used to back the guest RAM.
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94