9119d9
From a53fd36c939c31dbfa134d8d0f342bb257ff300d Mon Sep 17 00:00:00 2001
9119d9
Message-Id: <a53fd36c939c31dbfa134d8d0f342bb257ff300d@dist-git>
9119d9
From: Martin Kletzander <mkletzan@redhat.com>
9119d9
Date: Thu, 15 Jan 2015 15:03:48 +0100
9119d9
Subject: [PATCH] qemu: Leave cpuset.mems in parent cgroup alone
9119d9
9119d9
https://bugzilla.redhat.com/show_bug.cgi?id=1161540
9119d9
9119d9
Instead of setting the value of cpuset.mems once when the domain starts
9119d9
and then re-calculating the value every time we need to change the child
9119d9
cgroup values, leave the cgroup alone and rather set the child data
9119d9
every time there is new cgroup created.  We don't leave any task in the
9119d9
parent group anyway.  This will ease both current and future code.
9119d9
9119d9
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
9119d9
(cherry picked from commit af2a1f0587d88656f2c14265a63fbc11ecbd924e)
9119d9
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
9119d9
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
9119d9
---
9119d9
 src/qemu/qemu_cgroup.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++--
9119d9
 src/qemu/qemu_driver.c | 59 +++++++++++++++-----------------------------
9119d9
 2 files changed, 85 insertions(+), 41 deletions(-)
9119d9
9119d9
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
9119d9
index fa94037..0a4576a 100644
9119d9
--- a/src/qemu/qemu_cgroup.c
9119d9
+++ b/src/qemu/qemu_cgroup.c
9119d9
@@ -35,6 +35,7 @@
9119d9
 #include "virstring.h"
9119d9
 #include "virfile.h"
9119d9
 #include "virtypedparam.h"
9119d9
+#include "virnuma.h"
9119d9
 
9119d9
 #define VIR_FROM_THIS VIR_FROM_QEMU
9119d9
 
9119d9
@@ -625,8 +626,7 @@ qemuSetupCpusetMems(virDomainObjPtr vm)
9119d9
 
9119d9
     if (mem_mask)
9119d9
         if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
9119d9
-            virCgroupSetCpusetMems(cgroup_temp, mem_mask) < 0 ||
9119d9
-            virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
9119d9
+            virCgroupSetCpusetMems(cgroup_temp, mem_mask) < 0)
9119d9
             goto cleanup;
9119d9
 
9119d9
     ret = 0;
9119d9
@@ -781,6 +781,39 @@ qemuInitCgroup(virQEMUDriverPtr driver,
9119d9
     return ret;
9119d9
 }
9119d9
 
9119d9
+static void
9119d9
+qemuRestoreCgroupState(virDomainObjPtr vm)
9119d9
+{
9119d9
+    char *mem_mask;
9119d9
+    int empty = -1;
9119d9
+    qemuDomainObjPrivatePtr priv = vm->privateData;
9119d9
+    virBitmapPtr all_nodes;
9119d9
+
9119d9
+    if (!(all_nodes = virNumaGetHostNodeset()))
9119d9
+        goto error;
9119d9
+
9119d9
+    if (!(mem_mask = virBitmapFormat(all_nodes)))
9119d9
+        goto error;
9119d9
+
9119d9
+    if ((empty = virCgroupHasEmptyTasks(priv->cgroup,
9119d9
+                                        VIR_CGROUP_CONTROLLER_CPUSET)) < 0)
9119d9
+
9119d9
+    if (!empty)
9119d9
+        goto error;
9119d9
+
9119d9
+    if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
9119d9
+        goto error;
9119d9
+
9119d9
+ cleanup:
9119d9
+    VIR_FREE(mem_mask);
9119d9
+    virBitmapFree(all_nodes);
9119d9
+    return;
9119d9
+
9119d9
+ error:
9119d9
+    virResetLastError();
9119d9
+    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
9119d9
+    goto cleanup;
9119d9
+}
9119d9
 
9119d9
 int
9119d9
 qemuConnectCgroup(virQEMUDriverPtr driver,
9119d9
@@ -808,6 +841,8 @@ qemuConnectCgroup(virQEMUDriverPtr driver,
9119d9
                                   &priv->cgroup) < 0)
9119d9
         goto cleanup;
9119d9
 
9119d9
+    qemuRestoreCgroupState(vm);
9119d9
+
9119d9
  done:
9119d9
     ret = 0;
9119d9
  cleanup:
9119d9
@@ -959,6 +994,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
9119d9
     size_t i, j;
9119d9
     unsigned long long period = vm->def->cputune.period;
9119d9
     long long quota = vm->def->cputune.quota;
9119d9
+    char *mem_mask = NULL;
9119d9
 
9119d9
     if ((period || quota) &&
9119d9
         !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
9119d9
@@ -990,6 +1026,13 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
9119d9
         return 0;
9119d9
     }
9119d9
 
9119d9
+    if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
9119d9
+        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
9119d9
+        virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
9119d9
+                                            priv->autoNodeset,
9119d9
+                                            &mem_mask, -1) < 0)
9119d9
+        goto cleanup;
9119d9
+
9119d9
     for (i = 0; i < priv->nvcpupids; i++) {
9119d9
         if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
9119d9
             goto cleanup;
9119d9
@@ -998,6 +1041,10 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
9119d9
         if (virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]) < 0)
9119d9
             goto cleanup;
9119d9
 
9119d9
+        if (mem_mask &&
9119d9
+            virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
9119d9
+            goto cleanup;
9119d9
+
9119d9
         if (period || quota) {
9119d9
             if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
9119d9
                 goto cleanup;
9119d9
@@ -1023,6 +1070,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
9119d9
 
9119d9
         virCgroupFree(&cgroup_vcpu);
9119d9
     }
9119d9
+    VIR_FREE(mem_mask);
9119d9
 
9119d9
     return 0;
9119d9
 
9119d9
@@ -1031,6 +1079,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
9119d9
         virCgroupRemove(cgroup_vcpu);
9119d9
         virCgroupFree(&cgroup_vcpu);
9119d9
     }
9119d9
+    VIR_FREE(mem_mask);
9119d9
 
9119d9
     return -1;
9119d9
 }
9119d9
@@ -1119,6 +1168,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
9119d9
     size_t i, j;
9119d9
     unsigned long long period = vm->def->cputune.period;
9119d9
     long long quota = vm->def->cputune.quota;
9119d9
+    char *mem_mask = NULL;
9119d9
 
9119d9
     if ((period || quota) &&
9119d9
         !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
9119d9
@@ -1147,6 +1197,13 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
9119d9
         return 0;
9119d9
     }
9119d9
 
9119d9
+    if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
9119d9
+        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
9119d9
+        virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
9119d9
+                                            priv->autoNodeset,
9119d9
+                                            &mem_mask, -1) < 0)
9119d9
+        goto cleanup;
9119d9
+
9119d9
     for (i = 0; i < priv->niothreadpids; i++) {
9119d9
         /* IOThreads are numbered 1..n, although the array is 0..n-1,
9119d9
          * so we will account for that here
9119d9
@@ -1164,6 +1221,10 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
9119d9
                 goto cleanup;
9119d9
         }
9119d9
 
9119d9
+        if (mem_mask &&
9119d9
+            virCgroupSetCpusetMems(cgroup_iothread, mem_mask) < 0)
9119d9
+            goto cleanup;
9119d9
+
9119d9
         /* Set iothreadpin in cgroup if iothreadpin xml is provided */
9119d9
         if (virCgroupHasController(priv->cgroup,
9119d9
                                    VIR_CGROUP_CONTROLLER_CPUSET)) {
9119d9
@@ -1186,6 +1247,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
9119d9
 
9119d9
         virCgroupFree(&cgroup_iothread);
9119d9
     }
9119d9
+    VIR_FREE(mem_mask);
9119d9
 
9119d9
     return 0;
9119d9
 
9119d9
@@ -1194,6 +1256,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
9119d9
         virCgroupRemove(cgroup_iothread);
9119d9
         virCgroupFree(&cgroup_iothread);
9119d9
     }
9119d9
+    VIR_FREE(mem_mask);
9119d9
 
9119d9
     return -1;
9119d9
 }
9119d9
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
9119d9
index 8830e04..ea9368d 100644
9119d9
--- a/src/qemu/qemu_driver.c
9119d9
+++ b/src/qemu/qemu_driver.c
9119d9
@@ -4447,6 +4447,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
9119d9
     pid_t *cpupids = NULL;
9119d9
     int ncpupids;
9119d9
     virCgroupPtr cgroup_vcpu = NULL;
9119d9
+    char *mem_mask = NULL;
9119d9
 
9119d9
     qemuDomainObjEnterMonitor(driver, vm);
9119d9
 
9119d9
@@ -4510,6 +4511,13 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
9119d9
         goto cleanup;
9119d9
     }
9119d9
 
9119d9
+    if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
9119d9
+        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
9119d9
+        virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
9119d9
+                                            priv->autoNodeset,
9119d9
+                                            &mem_mask, -1) < 0)
9119d9
+        goto cleanup;
9119d9
+
9119d9
     if (nvcpus > oldvcpus) {
9119d9
         for (i = oldvcpus; i < nvcpus; i++) {
9119d9
             if (priv->cgroup) {
9119d9
@@ -4518,6 +4526,10 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
9119d9
                 if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
9119d9
                     goto cleanup;
9119d9
 
9119d9
+                if (mem_mask &&
9119d9
+                    virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
9119d9
+                    goto cleanup;
9119d9
+
9119d9
                 /* Add vcpu thread to the cgroup */
9119d9
                 rv = virCgroupAddTask(cgroup_vcpu, cpupids[i]);
9119d9
                 if (rv < 0) {
9119d9
@@ -4527,6 +4539,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
9119d9
                     virCgroupRemove(cgroup_vcpu);
9119d9
                     goto cleanup;
9119d9
                 }
9119d9
+
9119d9
             }
9119d9
 
9119d9
             /* Inherit def->cpuset */
9119d9
@@ -4599,6 +4612,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
9119d9
     qemuDomainObjExitMonitor(driver, vm);
9119d9
     vm->def->vcpus = vcpus;
9119d9
     VIR_FREE(cpupids);
9119d9
+    VIR_FREE(mem_mask);
9119d9
     virDomainAuditVcpu(vm, oldvcpus, nvcpus, "update", rc == 1);
9119d9
     if (cgroup_vcpu)
9119d9
         virCgroupFree(&cgroup_vcpu);
9119d9
@@ -9143,11 +9157,9 @@ qemuDomainGetMemoryParameters(virDomainPtr dom,
9119d9
 
9119d9
 static int
9119d9
 qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
9119d9
-                            virCapsPtr caps,
9119d9
                             virBitmapPtr nodeset)
9119d9
 {
9119d9
     virCgroupPtr cgroup_temp = NULL;
9119d9
-    virBitmapPtr temp_nodeset = NULL;
9119d9
     qemuDomainObjPrivatePtr priv = vm->privateData;
9119d9
     char *nodeset_str = NULL;
9119d9
     size_t i = 0;
9119d9
@@ -9161,39 +9173,15 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
9119d9
         goto cleanup;
9119d9
     }
9119d9
 
9119d9
-    /* Get existing nodeset values */
9119d9
-    if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
9119d9
-        virBitmapParse(nodeset_str, 0, &temp_nodeset,
9119d9
-                       VIR_DOMAIN_CPUMASK_LEN) < 0)
9119d9
-        goto cleanup;
9119d9
-    VIR_FREE(nodeset_str);
9119d9
-
9119d9
-    for (i = 0; i < caps->host.nnumaCell; i++) {
9119d9
-        bool result;
9119d9
-        virCapsHostNUMACellPtr cell = caps->host.numaCell[i];
9119d9
-        if (virBitmapGetBit(nodeset, cell->num, &result) < 0) {
9119d9
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
9119d9
-                           _("Failed to get cpuset bit values"));
9119d9
-            goto cleanup;
9119d9
-        }
9119d9
-        if (result && (virBitmapSetBit(temp_nodeset, cell->num) < 0)) {
9119d9
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
9119d9
-                           _("Failed to set temporary cpuset bit values"));
9119d9
-            goto cleanup;
9119d9
-        }
9119d9
-    }
9119d9
-
9119d9
-    if (!(nodeset_str = virBitmapFormat(temp_nodeset)))
9119d9
-        goto cleanup;
9119d9
-
9119d9
-    if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
9119d9
-        goto cleanup;
9119d9
-    VIR_FREE(nodeset_str);
9119d9
-
9119d9
     /* Ensure the cpuset string is formatted before passing to cgroup */
9119d9
     if (!(nodeset_str = virBitmapFormat(nodeset)))
9119d9
         goto cleanup;
9119d9
 
9119d9
+    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
9119d9
+        virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0)
9119d9
+        goto cleanup;
9119d9
+    virCgroupFree(&cgroup_temp);
9119d9
+
9119d9
     for (i = 0; i < priv->nvcpupids; i++) {
9119d9
         if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 ||
9119d9
             virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0)
9119d9
@@ -9201,11 +9189,6 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
9119d9
         virCgroupFree(&cgroup_temp);
9119d9
     }
9119d9
 
9119d9
-    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
9119d9
-        virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0 ||
9119d9
-        virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
9119d9
-        goto cleanup;
9119d9
-
9119d9
     for (i = 0; i < priv->niothreadpids; i++) {
9119d9
         if (virCgroupNewIOThread(priv->cgroup, i + 1, false,
9119d9
                                  &cgroup_temp) < 0 ||
9119d9
@@ -9214,11 +9197,9 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
9119d9
         virCgroupFree(&cgroup_temp);
9119d9
     }
9119d9
 
9119d9
-
9119d9
     ret = 0;
9119d9
  cleanup:
9119d9
     VIR_FREE(nodeset_str);
9119d9
-    virBitmapFree(temp_nodeset);
9119d9
     virCgroupFree(&cgroup_temp);
9119d9
 
9119d9
     return ret;
9119d9
@@ -9313,7 +9294,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
9119d9
         }
9119d9
 
9119d9
         if (nodeset &&
9119d9
-            qemuDomainSetNumaParamsLive(vm, caps, nodeset) < 0)
9119d9
+            qemuDomainSetNumaParamsLive(vm, nodeset) < 0)
9119d9
             goto endjob;
9119d9
 
9119d9
         if (virDomainNumatuneSet(&vm->def->numatune,
9119d9
-- 
9119d9
2.2.1
9119d9