Blob Blame History Raw
From 5832f82195c2b4144841645f90467cfb11d8d2a4 Mon Sep 17 00:00:00 2001
Message-Id: <5832f82195c2b4144841645f90467cfb11d8d2a4@dist-git>
From: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
Date: Thu, 3 Aug 2017 10:11:48 +0200
Subject: [PATCH] qemu: Enable NUMA node tag in pci-root for PPC64

This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.

PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.

For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.

So patch enables NUMA node tag in pci-root controller on PPC.

The way to set the NUMA node is through the numa_node option of
spapr-pci-host-bridge device. However for the implicit PHB, the only way
to set the numa_node is from the -global option. The -global option applies
to all the PHBs unless explicitly specified with the option on the
respective PHB of CLI. The default PHB has the emulated devices only, so
the patch prevents setting the NUMA node for the default PHB.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
(cherry picked from commit e5a0579996b96e74d12dd348cb73b6a0947f9a20)

Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1474327

Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
 docs/formatdomain.html.in                          |  5 ++-
 src/conf/domain_conf.c                             |  9 +++-
 src/qemu/qemu_command.c                            | 10 +++++
 src/qemu/qemu_domain.c                             | 13 +++---
 .../qemuxml2argv-pseries-default-phb-numa-node.xml | 29 ++++++++++++
 .../qemuxml2argv-pseries-phb-numa-node.args        | 28 ++++++++++++
 .../qemuxml2argv-pseries-phb-numa-node.xml         | 41 +++++++++++++++++
 tests/qemuxml2argvtest.c                           |  6 +++
 .../qemuxml2xmlout-pseries-phb-numa-node.xml       | 52 ++++++++++++++++++++++
 tests/qemuxml2xmltest.c                            |  4 ++
 10 files changed, 188 insertions(+), 9 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
 create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index bc67a53408..5e747542fc 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3751,7 +3751,10 @@
       </dd>
       <dt><code>node</code></dt>
       <dd>
-        pci-expander-bus controllers can have an
+        Some PCI controllers (<code>pci-expander-bus</code> for the pc
+        machine type, <code>pcie-expander-bus</code> for the q35 machine
+        type and, <span class="since">since 3.6.0</span>,
+        <code>pci-root</code> for the pseries machine type) can have an
         optional <code>&lt;node&gt;</code> subelement within
         the <code>&lt;target&gt;</code> subelement, which is used to
         set the NUMA node reported to the guest OS for that bus - the
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 5941a3a4c4..7ba2bc01ca 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -9314,8 +9314,15 @@ virDomainControllerDefParseXML(xmlNodePtr node,
                 goto error;
             }
         }
-        if (numaNode >= 0)
+        if (numaNode >= 0) {
+            if (def->idx == 0) {
+                virReportError(VIR_ERR_XML_ERROR, "%s",
+                               _("The PCI controller with index=0 can't "
+                                 "be associated with a NUMA node"));
+                goto error;
+            }
             def->opts.pciopts.numaNode = numaNode;
+        }
         break;
 
     default:
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index cb0eac4668..0437db0ba2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3191,6 +3191,16 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
             virBufferAsprintf(&buf, "%s,index=%d,id=%s",
                               modelName, def->opts.pciopts.targetIndex,
                               def->info.alias);
+
+            if (def->opts.pciopts.numaNode != -1) {
+                if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE)) {
+                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                                   _("the spapr-pci-host-bridge controller "
+                                     "doesn't support numa_node on this QEMU binary"));
+                    goto error;
+                }
+                virBufferAsprintf(&buf, ",numa_node=%d", def->opts.pciopts.numaNode);
+            }
             break;
         case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT:
         case VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST:
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 916358ea1e..ff7efa609e 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3406,15 +3406,14 @@ qemuDomainControllerDefPostParse(virDomainControllerDefPtr cont,
             return -1;
         }
 
-        /* if a PCI expander bus has a NUMA node set, make sure
-         * that NUMA node is configured in the guest <cpu><numa>
-         * array. NUMA cell id's in this array are numbered
+        /* if a PCI expander bus or pci-root on Pseries has a NUMA node
+         * set, make sure that NUMA node is configured in the guest
+         * <cpu><numa> array. NUMA cell id's in this array are numbered
          * from 0 .. size-1.
          */
-        if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
-             cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) &&
-            (int) virDomainNumaGetNodeCount(def->numa)
-            <= cont->opts.pciopts.numaNode) {
+        if (cont->opts.pciopts.numaNode >= 0 &&
+            cont->opts.pciopts.numaNode >=
+            (int) virDomainNumaGetNodeCount(def->numa)) {
             virReportError(VIR_ERR_XML_ERROR,
                            _("%s with index %d is "
                              "configured for a NUMA node (%d) "
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml b/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
new file mode 100644
index 0000000000..12d277aaf8
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
@@ -0,0 +1,29 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>1ccfd97d-5eb4-478a-bbe6-88d254c16db7</uuid>
+  <memory unit='KiB'>1048576</memory>
+  <vcpu placement='static'>24</vcpu>
+  <numatune>
+    <memnode cellid="0" mode="strict" nodeset="1"/>
+  </numatune>
+  <cpu>
+    <topology sockets='3' cores='1' threads='8'/>
+    <numa>
+      <cell id='0' cpus='0-23' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <os>
+    <type arch='ppc64' machine='pseries'>hvm</type>
+  </os>
+  <devices>
+    <emulator>/usr/bin/qemu-system-ppc64</emulator>
+    <!-- The default PHB (controller index 0) shouldn't be assigned a NUMA node -->
+    <controller type='pci' index='0' model='pci-root'>
+      <target index='0'>
+        <node>0</node>
+      </target>
+    </controller>
+    <controller type='usb' model='none'/>
+    <memballoon model='none'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
new file mode 100644
index 0000000000..e69ff16d0e
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
@@ -0,0 +1,28 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=2,cores=1,threads=4 \
+-object memory-backend-ram,id=ram-node0,size=1073741824,host-nodes=1,\
+policy=bind \
+-numa node,nodeid=0,cpus=0-3,memdev=ram-node0 \
+-object memory-backend-ram,id=ram-node1,size=1073741824,host-nodes=2,\
+policy=bind \
+-numa node,nodeid=1,cpus=4-7,memdev=ram-node1 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=readline \
+-boot c \
+-device spapr-pci-host-bridge,index=1,id=pci.1,numa_node=1 \
+-device spapr-pci-host-bridge,index=2,id=pci.2 \
+-device spapr-pci-host-bridge,index=3,id=pci.3,numa_node=0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
new file mode 100644
index 0000000000..aeccb14dfb
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
@@ -0,0 +1,41 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+  <memory unit='KiB'>2097152</memory>
+  <vcpu placement='static'>8</vcpu>
+  <numatune>
+    <memnode cellid="0" mode="strict" nodeset="1"/>
+    <memnode cellid="1" mode="strict" nodeset="2"/>
+  </numatune>
+  <cpu>
+    <topology sockets='2' cores='1' threads='4'/>
+    <numa>
+      <cell id='0' cpus='0-3' memory='1048576' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <os>
+    <type arch='ppc64' machine='pseries'>hvm</type>
+  </os>
+  <devices>
+    <emulator>/usr/bin/qemu-system-ppc64</emulator>
+    <controller type='usb' model='none' index='0'/>
+    <controller type='pci' index='0' model='pci-root'>
+      <target index='0'/>
+    </controller>
+    <controller type='pci' index='1' model='pci-root'>
+      <target index='1'>
+        <node>1</node>
+      </target>
+    </controller>
+    <controller type='pci' index='2' model='pci-root'>
+      <target index='2'/>
+    </controller>
+    <controller type='pci' index='3' model='pci-root'>
+      <target index='3'>
+        <node>0</node>
+      </target>
+    </controller>
+    <memballoon model='none'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 2b9421cb7a..ee62e26309 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1761,6 +1761,12 @@ mymain(void)
             QEMU_CAPS_NODEFCONFIG,
             QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE);
     DO_TEST_PARSE_ERROR("pseries-phb-wrong-target-index", NONE);
+    DO_TEST("pseries-phb-numa-node",
+            QEMU_CAPS_NUMA,
+            QEMU_CAPS_OBJECT_MEMORY_RAM,
+            QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE,
+            QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
+    DO_TEST_PARSE_ERROR("pseries-default-phb-numa-node", NONE);
 
     DO_TEST("pseries-many-devices",
             QEMU_CAPS_NODEFCONFIG,
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
new file mode 100644
index 0000000000..80b771e89d
--- /dev/null
+++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
@@ -0,0 +1,52 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+  <memory unit='KiB'>2097152</memory>
+  <currentMemory unit='KiB'>2097152</currentMemory>
+  <vcpu placement='static'>8</vcpu>
+  <numatune>
+    <memnode cellid='0' mode='strict' nodeset='1'/>
+    <memnode cellid='1' mode='strict' nodeset='2'/>
+  </numatune>
+  <os>
+    <type arch='ppc64' machine='pseries'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <cpu>
+    <topology sockets='2' cores='1' threads='4'/>
+    <numa>
+      <cell id='0' cpus='0-3' memory='1048576' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-ppc64</emulator>
+    <controller type='usb' index='0' model='none'/>
+    <controller type='pci' index='0' model='pci-root'>
+      <model name='spapr-pci-host-bridge'/>
+      <target index='0'/>
+    </controller>
+    <controller type='pci' index='1' model='pci-root'>
+      <model name='spapr-pci-host-bridge'/>
+      <target index='1'>
+        <node>1</node>
+      </target>
+    </controller>
+    <controller type='pci' index='2' model='pci-root'>
+      <model name='spapr-pci-host-bridge'/>
+      <target index='2'/>
+    </controller>
+    <controller type='pci' index='3' model='pci-root'>
+      <model name='spapr-pci-host-bridge'/>
+      <target index='3'>
+        <node>0</node>
+      </target>
+    </controller>
+    <memballoon model='none'/>
+    <panic model='pseries'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index 6762145470..564da08707 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -672,6 +672,10 @@ mymain(void)
     DO_TEST("pseries-phb-default-missing",
             QEMU_CAPS_NODEFCONFIG,
             QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE);
+    DO_TEST("pseries-phb-numa-node",
+            QEMU_CAPS_NUMA,
+            QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE,
+            QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
 
     DO_TEST("pseries-many-devices",
             QEMU_CAPS_NODEFCONFIG,
-- 
2.13.3