|
|
1bdc94 |
From 0b7c71b8b4afcbc92a9ef549d485c54da92204b3 Mon Sep 17 00:00:00 2001
|
|
|
1bdc94 |
From: "plai@redhat.com" <plai@redhat.com>
|
|
|
1bdc94 |
Date: Fri, 31 Aug 2018 16:25:56 +0200
|
|
|
1bdc94 |
Subject: [PATCH 14/29] hostmem-file: add the 'pmem' option
|
|
|
1bdc94 |
|
|
|
1bdc94 |
RH-Author: plai@redhat.com
|
|
|
1bdc94 |
Message-id: <1535732759-22481-7-git-send-email-plai@redhat.com>
|
|
|
1bdc94 |
Patchwork-id: 82007
|
|
|
1bdc94 |
O-Subject: [RHEL7.6 PATCH BZ 1539280 6/9] hostmem-file: add the 'pmem' option
|
|
|
1bdc94 |
Bugzilla: 1539280
|
|
|
1bdc94 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
From: Junyan He <junyan.he@intel.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
|
|
|
1bdc94 |
needs to know whether the backend storage is a real persistent memory,
|
|
|
1bdc94 |
in order to decide whether special operations should be performed to
|
|
|
1bdc94 |
ensure the data persistence.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
This boolean option 'pmem' allows users to specify whether the backend
|
|
|
1bdc94 |
storage of memory-backend-file is a real persistent memory. If
|
|
|
1bdc94 |
'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
|
|
|
1bdc94 |
corresponding memory region. If 'pmem' is set while lack of libpmem
|
|
|
1bdc94 |
support, a error is generated.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Signed-off-by: Junyan He <junyan.he@intel.com>
|
|
|
1bdc94 |
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
|
|
|
1bdc94 |
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
1bdc94 |
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
|
|
1bdc94 |
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
|
1bdc94 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
1bdc94 |
(cherry picked from commit a4de8552b2580adf6fa4874439217b65d3bdd88b)
|
|
|
1bdc94 |
Signed-off-by: Paul Lai <plai@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Resolved Conflicts:
|
|
|
1bdc94 |
docs/nvdimm.txt
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
---
|
|
|
1bdc94 |
backends/hostmem-file.c | 43 +++++++++++++++++++++++++++++++++++++++++--
|
|
|
1bdc94 |
docs/nvdimm.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
|
|
|
1bdc94 |
exec.c | 8 ++++++++
|
|
|
1bdc94 |
include/exec/memory.h | 4 ++++
|
|
|
1bdc94 |
include/exec/ram_addr.h | 3 +++
|
|
|
1bdc94 |
qemu-options.hx | 7 +++++++
|
|
|
1bdc94 |
6 files changed, 105 insertions(+), 2 deletions(-)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
|
|
|
1bdc94 |
index 34c68bb..2476dcb 100644
|
|
|
1bdc94 |
--- a/backends/hostmem-file.c
|
|
|
1bdc94 |
+++ b/backends/hostmem-file.c
|
|
|
1bdc94 |
@@ -12,6 +12,7 @@
|
|
|
1bdc94 |
#include "qemu/osdep.h"
|
|
|
1bdc94 |
#include "qapi/error.h"
|
|
|
1bdc94 |
#include "qemu-common.h"
|
|
|
1bdc94 |
+#include "qemu/error-report.h"
|
|
|
1bdc94 |
#include "sysemu/hostmem.h"
|
|
|
1bdc94 |
#include "sysemu/sysemu.h"
|
|
|
1bdc94 |
#include "qom/object_interfaces.h"
|
|
|
1bdc94 |
@@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
|
|
|
1bdc94 |
struct HostMemoryBackendFile {
|
|
|
1bdc94 |
HostMemoryBackend parent_obj;
|
|
|
1bdc94 |
|
|
|
1bdc94 |
- bool discard_data;
|
|
|
1bdc94 |
char *mem_path;
|
|
|
1bdc94 |
uint64_t align;
|
|
|
1bdc94 |
+ bool discard_data;
|
|
|
1bdc94 |
+ bool is_pmem;
|
|
|
1bdc94 |
};
|
|
|
1bdc94 |
|
|
|
1bdc94 |
static void
|
|
|
1bdc94 |
@@ -59,7 +61,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
|
|
1bdc94 |
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
|
|
|
1bdc94 |
path,
|
|
|
1bdc94 |
backend->size, fb->align,
|
|
|
1bdc94 |
- backend->share ? RAM_SHARED : 0,
|
|
|
1bdc94 |
+ (backend->share ? RAM_SHARED : 0) |
|
|
|
1bdc94 |
+ (fb->is_pmem ? RAM_PMEM : 0),
|
|
|
1bdc94 |
fb->mem_path, errp);
|
|
|
1bdc94 |
g_free(path);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
@@ -131,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
|
|
|
1bdc94 |
error_propagate(errp, local_err);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ return MEMORY_BACKEND_FILE(o)->is_pmem;
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
|
|
|
1bdc94 |
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ if (host_memory_backend_mr_inited(backend)) {
|
|
|
1bdc94 |
+ error_setg(errp, "cannot change property 'pmem' of %s '%s'",
|
|
|
1bdc94 |
+ object_get_typename(o),
|
|
|
1bdc94 |
+ object_get_canonical_path_component(o));
|
|
|
1bdc94 |
+ return;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+#ifndef CONFIG_LIBPMEM
|
|
|
1bdc94 |
+ if (value) {
|
|
|
1bdc94 |
+ Error *local_err = NULL;
|
|
|
1bdc94 |
+ error_setg(&local_err,
|
|
|
1bdc94 |
+ "Lack of libpmem support while setting the 'pmem=on'"
|
|
|
1bdc94 |
+ " of %s '%s'. We can't ensure data persistence.",
|
|
|
1bdc94 |
+ object_get_typename(o),
|
|
|
1bdc94 |
+ object_get_canonical_path_component(o));
|
|
|
1bdc94 |
+ error_propagate(errp, local_err);
|
|
|
1bdc94 |
+ return;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+#endif
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ fb->is_pmem = value;
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
static void file_backend_unparent(Object *obj)
|
|
|
1bdc94 |
{
|
|
|
1bdc94 |
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
|
|
1bdc94 |
@@ -162,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
|
|
|
1bdc94 |
file_memory_backend_get_align,
|
|
|
1bdc94 |
file_memory_backend_set_align,
|
|
|
1bdc94 |
NULL, NULL, &error_abort);
|
|
|
1bdc94 |
+ object_class_property_add_bool(oc, "pmem",
|
|
|
1bdc94 |
+ file_memory_backend_get_pmem, file_memory_backend_set_pmem,
|
|
|
1bdc94 |
+ &error_abort);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
static void file_backend_instance_finalize(Object *o)
|
|
|
1bdc94 |
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
|
|
|
1bdc94 |
index e903d8b..5f158a6 100644
|
|
|
1bdc94 |
--- a/docs/nvdimm.txt
|
|
|
1bdc94 |
+++ b/docs/nvdimm.txt
|
|
|
1bdc94 |
@@ -153,3 +153,45 @@ guest NVDIMM region mapping structure. This unarmed flag indicates
|
|
|
1bdc94 |
guest software that this vNVDIMM device contains a region that cannot
|
|
|
1bdc94 |
accept persistent writes. In result, for example, the guest Linux
|
|
|
1bdc94 |
NVDIMM driver, marks such vNVDIMM device as read-only.
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+NVDIMM Persistence
|
|
|
1bdc94 |
+------------------
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
|
|
|
1bdc94 |
+which allows the platform to communicate what features it supports related to
|
|
|
1bdc94 |
+NVDIMM data persistence. Users can provide a persistence value to a guest via
|
|
|
1bdc94 |
+the optional "nvdimm-persistence" machine command line option:
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ -machine pc,accel=kvm,nvdimm,nvdimm-persistence=cpu
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+There are currently two valid values for this option:
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+"mem-ctrl" - The platform supports flushing dirty data from the memory
|
|
|
1bdc94 |
+ controller to the NVDIMMs in the event of power loss.
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+"cpu" - The platform supports flushing dirty data from the CPU cache to
|
|
|
1bdc94 |
+ the NVDIMMs in the event of power loss. This implies that the
|
|
|
1bdc94 |
+ platform also supports flushing dirty data through the memory
|
|
|
1bdc94 |
+ controller on power loss.
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+If the vNVDIMM backend is in host persistent memory that can be accessed in
|
|
|
1bdc94 |
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
|
|
|
1bdc94 |
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
|
|
|
1bdc94 |
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
|
|
|
1bdc94 |
+will take necessary operations to guarantee the persistence of its own writes
|
|
|
1bdc94 |
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
|
|
|
1bdc94 |
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
|
|
|
1bdc94 |
+a "lack of libpmem support" message to ensure the persistence is available.
|
|
|
1bdc94 |
+For example, if we want to ensure the persistence for some backend file,
|
|
|
1bdc94 |
+use the QEMU command line:
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+References
|
|
|
1bdc94 |
+----------
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+[1] NVM Programming Model (NPM)
|
|
|
1bdc94 |
+ Version 1.2
|
|
|
1bdc94 |
+ https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
|
|
|
1bdc94 |
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
|
|
|
1bdc94 |
+ http://pmem.io/pmdk/
|
|
|
1bdc94 |
diff --git a/exec.c b/exec.c
|
|
|
1bdc94 |
index 295142b..c670185 100644
|
|
|
1bdc94 |
--- a/exec.c
|
|
|
1bdc94 |
+++ b/exec.c
|
|
|
1bdc94 |
@@ -2045,6 +2045,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
|
|
1bdc94 |
Error *local_err = NULL;
|
|
|
1bdc94 |
int64_t file_size;
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+ /* Just support these ram flags by now. */
|
|
|
1bdc94 |
+ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
if (xen_enabled()) {
|
|
|
1bdc94 |
error_setg(errp, "-mem-path not supported with Xen");
|
|
|
1bdc94 |
return NULL;
|
|
|
1bdc94 |
@@ -3863,6 +3866,11 @@ err:
|
|
|
1bdc94 |
return ret;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+bool ramblock_is_pmem(RAMBlock *rb)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ return rb->flags & RAM_PMEM;
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
#endif
|
|
|
1bdc94 |
|
|
|
1bdc94 |
void page_size_init(void)
|
|
|
1bdc94 |
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
|
|
1bdc94 |
index b3abe61..fd2c574 100644
|
|
|
1bdc94 |
--- a/include/exec/memory.h
|
|
|
1bdc94 |
+++ b/include/exec/memory.h
|
|
|
1bdc94 |
@@ -122,6 +122,9 @@ typedef struct IOMMUNotifier IOMMUNotifier;
|
|
|
1bdc94 |
/* RAM can be migrated */
|
|
|
1bdc94 |
#define RAM_MIGRATABLE (1 << 4)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+/* RAM is a persistent kind memory */
|
|
|
1bdc94 |
+#define RAM_PMEM (1 << 5)
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
|
|
1bdc94 |
IOMMUNotifierFlag flags,
|
|
|
1bdc94 |
hwaddr start, hwaddr end)
|
|
|
1bdc94 |
@@ -541,6 +544,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
|
|
|
1bdc94 |
* (getpagesize()) will be used.
|
|
|
1bdc94 |
* @ram_flags: Memory region features:
|
|
|
1bdc94 |
* - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
|
|
|
1bdc94 |
+ * - RAM_PMEM: the memory is persistent memory
|
|
|
1bdc94 |
* Other bits are ignored now.
|
|
|
1bdc94 |
* @path: the path in which to allocate the RAM.
|
|
|
1bdc94 |
* @errp: pointer to Error*, to store an error if it happens.
|
|
|
1bdc94 |
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
|
|
|
1bdc94 |
index 67e163e..922305d 100644
|
|
|
1bdc94 |
--- a/include/exec/ram_addr.h
|
|
|
1bdc94 |
+++ b/include/exec/ram_addr.h
|
|
|
1bdc94 |
@@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
|
|
|
1bdc94 |
return host_addr_offset >> TARGET_PAGE_BITS;
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+bool ramblock_is_pmem(RAMBlock *rb);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
long qemu_getrampagesize(void);
|
|
|
1bdc94 |
unsigned long last_ram_page(void);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
@@ -84,6 +86,7 @@ unsigned long last_ram_page(void);
|
|
|
1bdc94 |
* @ram_flags: specify the properties of the ram block, which can be one
|
|
|
1bdc94 |
* or bit-or of following values
|
|
|
1bdc94 |
* - RAM_SHARED: mmap the backing file or device with MAP_SHARED
|
|
|
1bdc94 |
+ * - RAM_PMEM: the backend @mem_path or @fd is persistent memory
|
|
|
1bdc94 |
* Other bits are ignored.
|
|
|
1bdc94 |
* @mem_path or @fd: specify the backing file or device
|
|
|
1bdc94 |
* @errp: pointer to Error*, to store an error if it happens
|
|
|
1bdc94 |
diff --git a/qemu-options.hx b/qemu-options.hx
|
|
|
1bdc94 |
index 4271cd3..5c58760 100644
|
|
|
1bdc94 |
--- a/qemu-options.hx
|
|
|
1bdc94 |
+++ b/qemu-options.hx
|
|
|
1bdc94 |
@@ -4045,6 +4045,13 @@ requires an alignment different than the default one used by QEMU, eg
|
|
|
1bdc94 |
the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
|
|
|
1bdc94 |
such cases, users can specify the required alignment via this option.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+The @option{pmem} option specifies whether the backing file specified
|
|
|
1bdc94 |
+by @option{mem-path} is in host persistent memory that can be accessed
|
|
|
1bdc94 |
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
|
|
|
1bdc94 |
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
|
|
|
1bdc94 |
+guarantee the persistence of its own writes to @option{mem-path}
|
|
|
1bdc94 |
+(e.g. in vNVDIMM label emulation and live migration).
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Creates a memory backend object, which can be used to back the guest RAM.
|
|
|
1bdc94 |
--
|
|
|
1bdc94 |
1.8.3.1
|
|
|
1bdc94 |
|