From af079d99de7c556c3b9bb10037dae90e0f23f38a Mon Sep 17 00:00:00 2001 Message-Id: From: Michal Privoznik Date: Tue, 18 Dec 2018 11:47:36 +0100 Subject: [PATCH] qemu: Don't use -mem-prealloc among with .prealloc=yes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://bugzilla.redhat.com/show_bug.cgi?id=1624223 There are two ways to request memory preallocation on cmd line: -mem-prealloc and .prealloc attribute for a memory-backend-file. However, as it turns out it's not safe to use both at the same time. If -mem-prealloc is used then qemu will fully allocate the memory (this is done by actually touching every page that has been allocated). Then, if .prealloc=yes is specified, mbind(flags = MPOL_MF_STRICT | MPOL_MF_MOVE) is called which: a) has to (possibly) move the memory to a different NUMA node, b) can have no effect when hugepages are in play (thus ignoring user request to place memory on desired NUMA nodes). Prefer -mem-prealloc as it is more backward compatible compared to switching to "-numa node,memdev= + -object memory-backend-file". Signed-off-by: Michal Privoznik Reviewed-by: John Ferlan (cherry picked from commit c658764decf357ef2a064f09235fb6b8bd027f8b) Signed-off-by: Michal Privoznik Conflicts: src/qemu/qemu_command.c: src/qemu/qemu_domain.c: src/qemu/qemu_domain.h: Context mostly, the upstream code diverged. Signed-off-by: Michal Privoznik Message-Id: <37771aafbb9d1855721efde7ade34c7b98fb1fc7.1545129996.git.mprivozn@redhat.com> Reviewed-by: Ján Tomko --- src/qemu/qemu_command.c | 26 ++++++++++++------- src/qemu/qemu_domain.c | 7 +++++ src/qemu/qemu_domain.h | 3 +++ .../hugepages-numa-default-dimm.args | 2 +- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index fa2b904239..7ffc4358e3 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3185,11 +3185,13 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (useHugepage) { if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0) goto cleanup; - prealloc = true; + if (!priv->memPrealloc) + prealloc = true; } else if (mem->nvdimmPath) { if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0) goto cleanup; - prealloc = true; + if (!priv->memPrealloc) + prealloc = true; } else { /* We can have both pagesize and mem source. If that's the case, * prefer hugepages as those are more specific. */ @@ -7603,7 +7605,8 @@ qemuBuildSmpCommandLine(virCommandPtr cmd, static int qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, const virDomainDef *def, - virCommandPtr cmd) + virCommandPtr cmd, + qemuDomainObjPrivatePtr priv) { const long system_page_size = virGetSystemPageSizeKB(); char *mem_path = NULL; @@ -7624,8 +7627,10 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, if (qemuGetDomainHupageMemPath(def, cfg, def->mem.hugepages[0].size, &mem_path) < 0) return -1; - if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) + if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) { virCommandAddArgList(cmd, "-mem-prealloc", NULL); + priv->memPrealloc = true; + } virCommandAddArgList(cmd, "-mem-path", mem_path, NULL); VIR_FREE(mem_path); @@ -7638,7 +7643,8 @@ static int qemuBuildMemCommandLine(virCommandPtr cmd, virQEMUDriverConfigPtr cfg, const virDomainDef *def, - virQEMUCapsPtr qemuCaps) + virQEMUCapsPtr qemuCaps, + qemuDomainObjPrivatePtr priv) { if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0) return -1; @@ -7657,15 +7663,17 @@ qemuBuildMemCommandLine(virCommandPtr cmd, virDomainDefGetMemoryInitial(def) / 1024); } - if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) + if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) { virCommandAddArgList(cmd, "-mem-prealloc", NULL); + priv->memPrealloc = true; + } /* * Add '-mem-path' (and '-mem-prealloc') parameter here if * the hugepages and no numa node is specified. */ if (!virDomainNumaGetNodeCount(def->numa) && - qemuBuildMemPathStr(cfg, def, cmd) < 0) + qemuBuildMemPathStr(cfg, def, cmd, priv) < 0) return -1; if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_REALTIME_MLOCK)) { @@ -7772,7 +7780,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, } if (!needBackend && - qemuBuildMemPathStr(cfg, def, cmd) < 0) + qemuBuildMemPathStr(cfg, def, cmd, priv) < 0) goto cleanup; for (i = 0; i < ncells; i++) { @@ -10445,7 +10453,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver, if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) < 0) goto error; - if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0) + if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, priv) < 0) goto error; if (qemuBuildSmpCommandLine(cmd, def) < 0) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 8604385aa2..95b84af78a 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1936,6 +1936,8 @@ qemuDomainObjPrivateDataClear(qemuDomainObjPrivatePtr priv) VIR_FREE(priv->libDir); VIR_FREE(priv->channelTargetDir); + priv->memPrealloc = false; + /* remove automatic pinning data */ virBitmapFree(priv->autoNodeset); priv->autoNodeset = NULL; @@ -2439,6 +2441,9 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, qemuDomainObjPrivateXMLFormatPR(buf, priv); + if (priv->memPrealloc) + virBufferAddLit(buf, "\n"); + if (qemuDomainObjPrivateXMLFormatBlockjobs(buf, vm) < 0) return -1; @@ -2934,6 +2939,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, if (qemuDomainObjPrivateXMLParseBlockjobs(priv, ctxt) < 0) goto error; + priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1; + return 0; error: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index cc406e3ca0..8463a8b706 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -367,6 +367,9 @@ struct _qemuDomainObjPrivate { /* qemuProcessStartCPUs stores the reason for starting vCPUs here for the * RESUME event handler to use it */ virDomainRunningReason runningReason; + + /* true if global -mem-prealloc appears on cmd line */ + bool memPrealloc; }; # define QEMU_DOMAIN_PRIVATE(vm) \ diff --git a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args index 855966a137..e7294a0882 100644 --- a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args +++ b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args @@ -13,7 +13,7 @@ QEMU_AUDIO_DRV=none \ -mem-prealloc \ -mem-path /dev/hugepages2M/libvirt/qemu/-1-fedora \ -numa node,nodeid=0,cpus=0-1,mem=1024 \ --object memory-backend-file,id=memdimm0,prealloc=yes,\ +-object memory-backend-file,id=memdimm0,\ mem-path=/dev/hugepages1G/libvirt/qemu/-1-fedora,size=1073741824,\ host-nodes=1-3,policy=bind \ -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -- 2.22.0