c401cc
From 9211c21a4a98c63aafcc08981eb8773a25df92c1 Mon Sep 17 00:00:00 2001
c401cc
Message-Id: <9211c21a4a98c63aafcc08981eb8773a25df92c1.1389183249.git.jdenemar@redhat.com>
c401cc
From: Shivaprasad G Bhat <shivaprasadbhat@gmail.com>
c401cc
Date: Mon, 6 Jan 2014 17:02:26 +0100
c401cc
Subject: [PATCH] qemu: fix live pinning to memory node on NUMA system
c401cc
c401cc
https://bugzilla.redhat.com/show_bug.cgi?id=1047234
c401cc
c401cc
Ever since the subcpusets(vcpu,emulator) were introduced, the parent
c401cc
cpuset cannot be modified to remove the nodes that are in use by the
c401cc
subcpusets.
c401cc
The fix is to break the memory node modification into three steps:
c401cc
 1. assign new nodes into the parent,
c401cc
 2. change the nodes in the child nodes,
c401cc
 3. remove the old nodes on the parent node.
c401cc
c401cc
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1009880
c401cc
c401cc
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
c401cc
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
c401cc
(cherry picked from commit 81fae6b95cfe72d0f5a987b6b5cd4bf86e32798c)
c401cc
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
c401cc
---
c401cc
 src/qemu/qemu_driver.c | 102 ++++++++++++++++++++++++++++++++++++++-----------
c401cc
 1 file changed, 79 insertions(+), 23 deletions(-)
c401cc
c401cc
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
c401cc
index 5216e44..a3d4725 100644
c401cc
--- a/src/qemu/qemu_driver.c
c401cc
+++ b/src/qemu/qemu_driver.c
c401cc
@@ -8413,6 +8413,84 @@ cleanup:
c401cc
 }
c401cc
 
c401cc
 static int
c401cc
+qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
c401cc
+                            virCapsPtr caps,
c401cc
+                            virBitmapPtr nodeset)
c401cc
+{
c401cc
+    virCgroupPtr cgroup_temp = NULL;
c401cc
+    virBitmapPtr temp_nodeset = NULL;
c401cc
+    qemuDomainObjPrivatePtr priv = vm->privateData;
c401cc
+    char *nodeset_str = NULL;
c401cc
+    size_t i = 0;
c401cc
+    int ret = -1;
c401cc
+
c401cc
+    if (vm->def->numatune.memory.mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
c401cc
+        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
c401cc
+                       _("change of nodeset for running domain "
c401cc
+                         "requires strict numa mode"));
c401cc
+        goto cleanup;
c401cc
+    }
c401cc
+
c401cc
+    /*Get Exisitng nodeset values */
c401cc
+    if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
c401cc
+        virBitmapParse(nodeset_str, 0, &temp_nodeset,
c401cc
+                       VIR_DOMAIN_CPUMASK_LEN) < 0)
c401cc
+        goto cleanup;
c401cc
+    VIR_FREE(nodeset_str);
c401cc
+
c401cc
+    for (i = 0; i < caps->host.nnumaCell; i++) {
c401cc
+        bool result;
c401cc
+        if (virBitmapGetBit(nodeset, i, &result) < 0) {
c401cc
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
c401cc
+                           _("Failed to get cpuset bit values"));
c401cc
+            goto cleanup;
c401cc
+        }
c401cc
+        if (result && (virBitmapSetBit(temp_nodeset, i) < 0)) {
c401cc
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
c401cc
+                           _("Failed to set temporary cpuset bit values"));
c401cc
+            goto cleanup;
c401cc
+        }
c401cc
+    }
c401cc
+
c401cc
+    if (!(nodeset_str = virBitmapFormat(temp_nodeset))) {
c401cc
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
c401cc
+                       _("Failed to format nodeset"));
c401cc
+        goto cleanup;
c401cc
+    }
c401cc
+
c401cc
+    if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
c401cc
+        goto cleanup;
c401cc
+    VIR_FREE(nodeset_str);
c401cc
+
c401cc
+    /* Ensure the cpuset string is formated before passing to cgroup */
c401cc
+    if (!(nodeset_str = virBitmapFormat(nodeset))) {
c401cc
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
c401cc
+                       _("Failed to format nodeset"));
c401cc
+        goto cleanup;
c401cc
+    }
c401cc
+
c401cc
+    for (i = 0; i < priv->nvcpupids; i++) {
c401cc
+        if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 ||
c401cc
+            virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0)
c401cc
+            goto cleanup;
c401cc
+        virCgroupFree(&cgroup_temp);
c401cc
+    }
c401cc
+
c401cc
+    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
c401cc
+        virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0 ||
c401cc
+        virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
c401cc
+        goto cleanup;
c401cc
+
c401cc
+    ret = 0;
c401cc
+ cleanup:
c401cc
+    VIR_FREE(nodeset_str);
c401cc
+    virBitmapFree(temp_nodeset);
c401cc
+    virCgroupFree(&cgroup_temp);
c401cc
+
c401cc
+    return ret;
c401cc
+}
c401cc
+
c401cc
+static int
c401cc
 qemuDomainSetNumaParameters(virDomainPtr dom,
c401cc
                             virTypedParameterPtr params,
c401cc
                             int nparams,
c401cc
@@ -8479,7 +8557,6 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
c401cc
             }
c401cc
         } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) {
c401cc
             virBitmapPtr nodeset = NULL;
c401cc
-            char *nodeset_str = NULL;
c401cc
 
c401cc
             if (virBitmapParse(params[i].value.s,
c401cc
                                0, &nodeset,
c401cc
@@ -8489,32 +8566,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
c401cc
             }
c401cc
 
c401cc
             if (flags & VIR_DOMAIN_AFFECT_LIVE) {
c401cc
-                if (vm->def->numatune.memory.mode !=
c401cc
-                    VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
c401cc
-                    virReportError(VIR_ERR_OPERATION_INVALID, "%s",
c401cc
-                                   _("change of nodeset for running domain "
c401cc
-                                     "requires strict numa mode"));
c401cc
-                    virBitmapFree(nodeset);
c401cc
-                    ret = -1;
c401cc
-                    continue;
c401cc
-                }
c401cc
-
c401cc
-                /* Ensure the cpuset string is formated before passing to cgroup */
c401cc
-                if (!(nodeset_str = virBitmapFormat(nodeset))) {
c401cc
-                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
c401cc
-                                   _("Failed to format nodeset"));
c401cc
-                    virBitmapFree(nodeset);
c401cc
-                    ret = -1;
c401cc
-                    continue;
c401cc
-                }
c401cc
-
c401cc
-                if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) {
c401cc
+                if (qemuDomainSetNumaParamsLive(vm, caps, nodeset) < 0) {
c401cc
                     virBitmapFree(nodeset);
c401cc
-                    VIR_FREE(nodeset_str);
c401cc
                     ret = -1;
c401cc
                     continue;
c401cc
                 }
c401cc
-                VIR_FREE(nodeset_str);
c401cc
 
c401cc
                 /* update vm->def here so that dumpxml can read the new
c401cc
                  * values from vm->def. */
c401cc
-- 
c401cc
1.8.5.2
c401cc