404507
From 2ff472bc2ac9d01181c1dbd522153934de79907f Mon Sep 17 00:00:00 2001
404507
Message-Id: <2ff472bc2ac9d01181c1dbd522153934de79907f@dist-git>
404507
From: Wim ten Have <wim.ten.have@oracle.com>
404507
Date: Mon, 4 Dec 2017 13:38:47 +0100
404507
Subject: [PATCH] numa: describe siblings distances within cells
404507
404507
https://bugzilla.redhat.com/show_bug.cgi?id=1454889
404507
404507
Add support for describing NUMA distances in a domain's <numa> <cell>
404507
XML description.
404507
404507
Below is an example of a 4 node setup:
404507
404507
  <cpu>
404507
    <numa>
404507
      <cell id='0' cpus='0-3' memory='2097152' unit='KiB'>
404507
        <distances>
404507
          <sibling id='0' value='10'/>
404507
          <sibling id='1' value='21'/>
404507
          <sibling id='2' value='31'/>
404507
          <sibling id='3' value='21'/>
404507
        </distances>
404507
      </cell>
404507
      <cell id='1' cpus='4-7' memory='2097152' unit='KiB'>
404507
        <distances>
404507
          <sibling id='0' value='21'/>
404507
          <sibling id='1' value='10'/>
404507
          <sibling id='2' value='21'/>
404507
          <sibling id='3' value='31'/>
404507
        </distances>
404507
      </cell>
404507
      <cell id='2' cpus='8-11' memory='2097152' unit='KiB'>
404507
        <distances>
404507
          <sibling id='0' value='31'/>
404507
          <sibling id='1' value='21'/>
404507
          <sibling id='2' value='10'/>
404507
          <sibling id='3' value='21'/>
404507
        </distances>
404507
      <cell id='3' cpus='12-15' memory='2097152' unit='KiB'>
404507
        <distances>
404507
          <sibling id='0' value='21'/>
404507
          <sibling id='1' value='31'/>
404507
          <sibling id='2' value='21'/>
404507
          <sibling id='3' value='10'/>
404507
        </distances>
404507
      </cell>
404507
    </numa>
404507
  </cpu>
404507
404507
A <cell> defines a NUMA node. <distances> describes the NUMA distance
404507
from the <cell> to the other NUMA nodes (the <sibling>s).  For example,
404507
in above XML description, the distance between NUMA node0 
404507
...> and NUMA node2 <sibling id='2' ...> is 31.
404507
404507
Valid distance values are '10 <= value <= 255'.  A distance value of 10
404507
represents the distance to the node itself.  A distance value of 20
404507
represents the default value for remote nodes but other values are
404507
possible depending on the physical topology of the system.
404507
404507
When distances are not fully described, any missing sibling distance
404507
values will default to 10 for local nodes and 20 for remote nodes.
404507
404507
If distance is given for A -> B, then we default B -> A to the same
404507
value instead of 20.
404507
404507
Signed-off-by: Wim ten Have <wim.ten.have@oracle.com>
404507
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
404507
Signed-off-by: Jim Fehlig <jfehlig@suse.com>
404507
(cherry picked from commit 74119a03f184b79dcad28aa1e6f4ede6dc444998)
404507
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
404507
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
404507
---
404507
 docs/formatdomain.html.in   |  64 ++++++++++++++-
404507
 docs/schemas/basictypes.rng |   7 ++
404507
 docs/schemas/cputypes.rng   |  18 +++++
404507
 src/conf/numa_conf.c        | 191 +++++++++++++++++++++++++++++++++++++++++++-
404507
 4 files changed, 276 insertions(+), 4 deletions(-)
404507
404507
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
404507
index 62dd6e1ec4..a5adf5d9aa 100644
404507
--- a/docs/formatdomain.html.in
404507
+++ b/docs/formatdomain.html.in
404507
@@ -1529,7 +1529,69 @@
404507
     

404507
 
404507
     

404507
-      This guest NUMA specification is currently available only for QEMU/KVM.
404507
+      This guest NUMA specification is currently available only for
404507
+      QEMU/KVM and Xen.
404507
+    

404507
+
404507
+    

404507
+      A NUMA hardware architecture supports the notion of distances
404507
+      between NUMA cells. Since 3.10.0 it
404507
+      is possible to define the distance between NUMA cells using the
404507
+      distances element within a NUMA cell
404507
+      description. The sibling sub-element is used to
404507
+      specify the distance value between sibling NUMA cells. For more
404507
+      details, see the chapter explaining the system's SLIT (System
404507
+      Locality Information Table) within the ACPI (Advanced
404507
+      Configuration and Power Interface) specification.
404507
+    

404507
+
404507
+
404507
+...
404507
+<cpu>
404507
+  ...
404507
+  <numa>
404507
+    <cell id='0' cpus='0,4-7' memory='512000' unit='KiB'>
404507
+      <distances>
404507
+        <sibling id='0' value='10'/>
404507
+        <sibling id='1' value='21'/>
404507
+        <sibling id='2' value='31'/>
404507
+        <sibling id='3' value='41'/>
404507
+      </distances>
404507
+    </cell>
404507
+    <cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'>
404507
+      <distances>
404507
+        <sibling id='0' value='21'/>
404507
+        <sibling id='1' value='10'/>
404507
+        <sibling id='2' value='21'/>
404507
+        <sibling id='3' value='31'/>
404507
+      </distances>
404507
+    </cell>
404507
+    <cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'>
404507
+      <distances>
404507
+        <sibling id='0' value='31'/>
404507
+        <sibling id='1' value='21'/>
404507
+        <sibling id='2' value='10'/>
404507
+        <sibling id='3' value='21'/>
404507
+      </distances>
404507
+    </cell>
404507
+    <cell id='3' cpus='3' memory='512000' unit='KiB'>
404507
+      <distances>
404507
+        <sibling id='0' value='41'/>
404507
+        <sibling id='1' value='31'/>
404507
+        <sibling id='2' value='21'/>
404507
+        <sibling id='3' value='10'/>
404507
+      </distances>
404507
+    </cell>
404507
+  </numa>
404507
+  ...
404507
+</cpu>
404507
+...
404507
+
404507
+    

404507
+      Describing distances between NUMA cells is currently only supported
404507
+      by Xen. If no distances are given to describe
404507
+      the SLIT data between different cells, it will default to a scheme
404507
+      using 10 for local and 20 for remote distances.
404507
     

404507
 
404507
     

Events configuration

404507
diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng
404507
index 1ea667cdf6..1a18cd31b1 100644
404507
--- a/docs/schemas/basictypes.rng
404507
+++ b/docs/schemas/basictypes.rng
404507
@@ -77,6 +77,13 @@
404507
     </choice>
404507
   </define>
404507
 
404507
+  <define name="numaDistanceValue">
404507
+    <data type="unsignedInt">
404507
+      <param name="minInclusive">10</param>
404507
+      <param name="maxInclusive">255</param>
404507
+    </data>
404507
+  </define>
404507
+
404507
   <define name="pciaddress">
404507
     <optional>
404507
       <attribute name="domain">
404507
diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng
404507
index 3eef16abce..c45b6dfb28 100644
404507
--- a/docs/schemas/cputypes.rng
404507
+++ b/docs/schemas/cputypes.rng
404507
@@ -129,6 +129,24 @@
404507
           </choice>
404507
         </attribute>
404507
       </optional>
404507
+      <optional>
404507
+        <element name="distances">
404507
+          <oneOrMore>
404507
+            <ref name="numaDistance"/>
404507
+          </oneOrMore>
404507
+        </element>
404507
+      </optional>
404507
+    </element>
404507
+  </define>
404507
+
404507
+  <define name="numaDistance">
404507
+    <element name="sibling">
404507
+      <attribute name="id">
404507
+        <ref name="unsignedInt"/>
404507
+      </attribute>
404507
+      <attribute name="value">
404507
+        <ref name="numaDistanceValue"/>
404507
+      </attribute>
404507
     </element>
404507
   </define>
404507
 
404507
diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
404507
index b71dc012c5..5fbcc72041 100644
404507
--- a/src/conf/numa_conf.c
404507
+++ b/src/conf/numa_conf.c
404507
@@ -29,6 +29,15 @@
404507
 #include "virnuma.h"
404507
 #include "virstring.h"
404507
 
404507
+/*
404507
+ * Distance definitions defined Conform ACPI 2.0 SLIT.
404507
+ * See include/linux/topology.h
404507
+ */
404507
+#define LOCAL_DISTANCE          10
404507
+#define REMOTE_DISTANCE         20
404507
+/* SLIT entry value is a one-byte unsigned integer. */
404507
+#define UNREACHABLE            255
404507
+
404507
 #define VIR_FROM_THIS VIR_FROM_DOMAIN
404507
 
404507
 VIR_ENUM_IMPL(virDomainNumatuneMemMode,
404507
@@ -48,6 +57,8 @@ VIR_ENUM_IMPL(virDomainMemoryAccess, VIR_DOMAIN_MEMORY_ACCESS_LAST,
404507
               "shared",
404507
               "private")
404507
 
404507
+typedef struct _virDomainNumaDistance virDomainNumaDistance;
404507
+typedef virDomainNumaDistance *virDomainNumaDistancePtr;
404507
 
404507
 typedef struct _virDomainNumaNode virDomainNumaNode;
404507
 typedef virDomainNumaNode *virDomainNumaNodePtr;
404507
@@ -66,6 +77,12 @@ struct _virDomainNuma {
404507
         virBitmapPtr nodeset;   /* host memory nodes where this guest node resides */
404507
         virDomainNumatuneMemMode mode;  /* memory mode selection */
404507
         virDomainMemoryAccess memAccess; /* shared memory access configuration */
404507
+
404507
+        struct _virDomainNumaDistance {
404507
+            unsigned int value; /* locality value for node i->j or j->i */
404507
+            unsigned int cellid;
404507
+        } *distances;           /* remote node distances */
404507
+        size_t ndistances;
404507
     } *mem_nodes;           /* guest node configuration */
404507
     size_t nmem_nodes;
404507
 
404507
@@ -686,6 +703,144 @@ virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune,
404507
 }
404507
 
404507
 
404507
+static int
404507
+virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def,
404507
+                                     xmlXPathContextPtr ctxt,
404507
+                                     unsigned int cur_cell)
404507
+{
404507
+    int ret = -1;
404507
+    int sibling;
404507
+    char *tmp = NULL;
404507
+    xmlNodePtr *nodes = NULL;
404507
+    size_t i, ndistances = def->nmem_nodes;
404507
+
404507
+    if (!ndistances)
404507
+        return 0;
404507
+
404507
+    /* check if NUMA distances definition is present */
404507
+    if (!virXPathNode("./distances[1]", ctxt))
404507
+        return 0;
404507
+
404507
+    if ((sibling = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
404507
+        virReportError(VIR_ERR_XML_ERROR, "%s",
404507
+                       _("NUMA distances defined without siblings"));
404507
+        goto cleanup;
404507
+    }
404507
+
404507
+    for (i = 0; i < sibling; i++) {
404507
+        virDomainNumaDistancePtr ldist, rdist;
404507
+        unsigned int sibling_id, sibling_value;
404507
+
404507
+        /* siblings are in order of parsing or explicitly numbered */
404507
+        if (!(tmp = virXMLPropString(nodes[i], "id"))) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("Missing 'id' attribute in NUMA "
404507
+                             "distances under 'cell id %d'"),
404507
+                           cur_cell);
404507
+            goto cleanup;
404507
+        }
404507
+
404507
+        /* The "id" needs to be applicable */
404507
+        if (virStrToLong_uip(tmp, NULL, 10, &sibling_id) < 0) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("Invalid 'id' attribute in NUMA "
404507
+                             "distances for sibling: '%s'"),
404507
+                           tmp);
404507
+            goto cleanup;
404507
+        }
404507
+        VIR_FREE(tmp);
404507
+
404507
+        /* The "id" needs to be within numa/cell range */
404507
+        if (sibling_id >= ndistances) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("'sibling_id %d' does not refer to a "
404507
+                             "valid cell within NUMA 'cell id %d'"),
404507
+                           sibling_id, cur_cell);
404507
+            goto cleanup;
404507
+        }
404507
+
404507
+        /* We need a locality value. Check and correct
404507
+         * distance to local and distance to remote node.
404507
+         */
404507
+        if (!(tmp = virXMLPropString(nodes[i], "value"))) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("Missing 'value' attribute in NUMA distances "
404507
+                             "under 'cell id %d' for 'sibling id %d'"),
404507
+                           cur_cell, sibling_id);
404507
+            goto cleanup;
404507
+        }
404507
+
404507
+        /* The "value" needs to be applicable */
404507
+        if (virStrToLong_uip(tmp, NULL, 10, &sibling_value) < 0) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("'value %s' is invalid for "
404507
+                             "'sibling id %d' under NUMA 'cell id %d'"),
404507
+                           tmp, sibling_id, cur_cell);
404507
+            goto cleanup;
404507
+        }
404507
+        VIR_FREE(tmp);
404507
+
404507
+        /* Assure LOCAL_DISTANCE <= "value" <= UNREACHABLE
404507
+         * and correct LOCAL_DISTANCE setting if such applies.
404507
+         */
404507
+        if ((sibling_value < LOCAL_DISTANCE ||
404507
+             sibling_value > UNREACHABLE) ||
404507
+            (sibling_id == cur_cell &&
404507
+             sibling_value != LOCAL_DISTANCE) ||
404507
+            (sibling_id != cur_cell &&
404507
+             sibling_value == LOCAL_DISTANCE)) {
404507
+            virReportError(VIR_ERR_XML_ERROR,
404507
+                           _("'value %d' is invalid for "
404507
+                             "'sibling id %d' under NUMA 'cell id %d'"),
404507
+                           sibling_value, sibling_id, cur_cell);
404507
+            goto cleanup;
404507
+        }
404507
+
404507
+        /* Apply the local / remote distance */
404507
+        ldist = def->mem_nodes[cur_cell].distances;
404507
+        if (!ldist) {
404507
+            if (VIR_ALLOC_N(ldist, ndistances) < 0)
404507
+                goto cleanup;
404507
+
404507
+            ldist[cur_cell].value = LOCAL_DISTANCE;
404507
+            ldist[cur_cell].cellid = cur_cell;
404507
+            def->mem_nodes[cur_cell].ndistances = ndistances;
404507
+        }
404507
+
404507
+        ldist[sibling_id].cellid = sibling_id;
404507
+        ldist[sibling_id].value = sibling_value;
404507
+        def->mem_nodes[cur_cell].distances = ldist;
404507
+
404507
+        /* Apply symmetry if none given */
404507
+        rdist = def->mem_nodes[sibling_id].distances;
404507
+        if (!rdist) {
404507
+            if (VIR_ALLOC_N(rdist, ndistances) < 0)
404507
+                goto cleanup;
404507
+
404507
+            rdist[sibling_id].value = LOCAL_DISTANCE;
404507
+            rdist[sibling_id].cellid = sibling_id;
404507
+            def->mem_nodes[sibling_id].ndistances = ndistances;
404507
+        }
404507
+
404507
+        rdist[cur_cell].cellid = cur_cell;
404507
+        if (!rdist[cur_cell].value)
404507
+            rdist[cur_cell].value = sibling_value;
404507
+        def->mem_nodes[sibling_id].distances = rdist;
404507
+    }
404507
+
404507
+    ret = 0;
404507
+
404507
+ cleanup:
404507
+    if (ret) {
404507
+        for (i = 0; i < ndistances; i++)
404507
+            VIR_FREE(def->mem_nodes[i].distances);
404507
+    }
404507
+    VIR_FREE(nodes);
404507
+    VIR_FREE(tmp);
404507
+
404507
+    return ret;
404507
+}
404507
+
404507
 int
404507
 virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
404507
                             xmlXPathContextPtr ctxt)
404507
@@ -694,7 +849,7 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
404507
     xmlNodePtr oldNode = ctxt->node;
404507
     char *tmp = NULL;
404507
     int n;
404507
-    size_t i;
404507
+    size_t i, j;
404507
     int ret = -1;
404507
 
404507
     /* check if NUMA definition is present */
404507
@@ -712,7 +867,6 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
404507
     def->nmem_nodes = n;
404507
 
404507
     for (i = 0; i < n; i++) {
404507
-        size_t j;
404507
         int rc;
404507
         unsigned int cur_cell = i;
404507
 
404507
@@ -788,6 +942,10 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
404507
             def->mem_nodes[cur_cell].memAccess = rc;
404507
             VIR_FREE(tmp);
404507
         }
404507
+
404507
+        /* Parse NUMA distances info */
404507
+        if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0)
404507
+                goto cleanup;
404507
     }
404507
 
404507
     ret = 0;
404507
@@ -815,6 +973,8 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf,
404507
     virBufferAddLit(buf, "<numa>\n");
404507
     virBufferAdjustIndent(buf, 2);
404507
     for (i = 0; i < ncells; i++) {
404507
+        int ndistances;
404507
+
404507
         memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
404507
 
404507
         if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
404507
@@ -829,7 +989,32 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf,
404507
         if (memAccess)
404507
             virBufferAsprintf(buf, " memAccess='%s'",
404507
                               virDomainMemoryAccessTypeToString(memAccess));
404507
-        virBufferAddLit(buf, "/>\n");
404507
+
404507
+        ndistances = def->mem_nodes[i].ndistances;
404507
+        if (!ndistances) {
404507
+            virBufferAddLit(buf, "/>\n");
404507
+        } else {
404507
+            size_t j;
404507
+            virDomainNumaDistancePtr distances = def->mem_nodes[i].distances;
404507
+
404507
+            virBufferAddLit(buf, ">\n");
404507
+            virBufferAdjustIndent(buf, 2);
404507
+            virBufferAddLit(buf, "<distances>\n");
404507
+            virBufferAdjustIndent(buf, 2);
404507
+            for (j = 0; j < ndistances; j++) {
404507
+                if (distances[j].value) {
404507
+                    virBufferAddLit(buf, "
404507
+                    virBufferAsprintf(buf, " id='%d'", distances[j].cellid);
404507
+                    virBufferAsprintf(buf, " value='%d'", distances[j].value);
404507
+                    virBufferAddLit(buf, "/>\n");
404507
+                }
404507
+            }
404507
+            virBufferAdjustIndent(buf, -2);
404507
+            virBufferAddLit(buf, "</distances>\n");
404507
+            virBufferAdjustIndent(buf, -2);
404507
+            virBufferAddLit(buf, "</cell>\n");
404507
+        }
404507
+
404507
         VIR_FREE(cpustr);
404507
     }
404507
     virBufferAdjustIndent(buf, -2);
404507
-- 
404507
2.15.1
404507