c1c534
From 2ff472bc2ac9d01181c1dbd522153934de79907f Mon Sep 17 00:00:00 2001
c1c534
Message-Id: <2ff472bc2ac9d01181c1dbd522153934de79907f@dist-git>
c1c534
From: Wim ten Have <wim.ten.have@oracle.com>
c1c534
Date: Mon, 4 Dec 2017 13:38:47 +0100
c1c534
Subject: [PATCH] numa: describe siblings distances within cells
c1c534
c1c534
https://bugzilla.redhat.com/show_bug.cgi?id=1454889
c1c534
c1c534
Add support for describing NUMA distances in a domain's <numa> <cell>
c1c534
XML description.
c1c534
c1c534
Below is an example of a 4 node setup:
c1c534
c1c534
  <cpu>
c1c534
    <numa>
c1c534
      <cell id='0' cpus='0-3' memory='2097152' unit='KiB'>
c1c534
        <distances>
c1c534
          <sibling id='0' value='10'/>
c1c534
          <sibling id='1' value='21'/>
c1c534
          <sibling id='2' value='31'/>
c1c534
          <sibling id='3' value='21'/>
c1c534
        </distances>
c1c534
      </cell>
c1c534
      <cell id='1' cpus='4-7' memory='2097152' unit='KiB'>
c1c534
        <distances>
c1c534
          <sibling id='0' value='21'/>
c1c534
          <sibling id='1' value='10'/>
c1c534
          <sibling id='2' value='21'/>
c1c534
          <sibling id='3' value='31'/>
c1c534
        </distances>
c1c534
      </cell>
c1c534
      <cell id='2' cpus='8-11' memory='2097152' unit='KiB'>
c1c534
        <distances>
c1c534
          <sibling id='0' value='31'/>
c1c534
          <sibling id='1' value='21'/>
c1c534
          <sibling id='2' value='10'/>
c1c534
          <sibling id='3' value='21'/>
c1c534
        </distances>
c1c534
      <cell id='3' cpus='12-15' memory='2097152' unit='KiB'>
c1c534
        <distances>
c1c534
          <sibling id='0' value='21'/>
c1c534
          <sibling id='1' value='31'/>
c1c534
          <sibling id='2' value='21'/>
c1c534
          <sibling id='3' value='10'/>
c1c534
        </distances>
c1c534
      </cell>
c1c534
    </numa>
c1c534
  </cpu>
c1c534
c1c534
A <cell> defines a NUMA node. <distances> describes the NUMA distance
c1c534
from the <cell> to the other NUMA nodes (the <sibling>s).  For example,
c1c534
in above XML description, the distance between NUMA node0 
c1c534
...> and NUMA node2 <sibling id='2' ...> is 31.
c1c534
c1c534
Valid distance values are '10 <= value <= 255'.  A distance value of 10
c1c534
represents the distance to the node itself.  A distance value of 20
c1c534
represents the default value for remote nodes but other values are
c1c534
possible depending on the physical topology of the system.
c1c534
c1c534
When distances are not fully described, any missing sibling distance
c1c534
values will default to 10 for local nodes and 20 for remote nodes.
c1c534
c1c534
If distance is given for A -> B, then we default B -> A to the same
c1c534
value instead of 20.
c1c534
c1c534
Signed-off-by: Wim ten Have <wim.ten.have@oracle.com>
c1c534
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
c1c534
Signed-off-by: Jim Fehlig <jfehlig@suse.com>
c1c534
(cherry picked from commit 74119a03f184b79dcad28aa1e6f4ede6dc444998)
c1c534
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
c1c534
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
c1c534
---
c1c534
 docs/formatdomain.html.in   |  64 ++++++++++++++-
c1c534
 docs/schemas/basictypes.rng |   7 ++
c1c534
 docs/schemas/cputypes.rng   |  18 +++++
c1c534
 src/conf/numa_conf.c        | 191 +++++++++++++++++++++++++++++++++++++++++++-
c1c534
 4 files changed, 276 insertions(+), 4 deletions(-)
c1c534
c1c534
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
c1c534
index 62dd6e1ec4..a5adf5d9aa 100644
c1c534
--- a/docs/formatdomain.html.in
c1c534
+++ b/docs/formatdomain.html.in
c1c534
@@ -1529,7 +1529,69 @@
c1c534
     

c1c534
 
c1c534
     

c1c534
-      This guest NUMA specification is currently available only for QEMU/KVM.
c1c534
+      This guest NUMA specification is currently available only for
c1c534
+      QEMU/KVM and Xen.
c1c534
+    

c1c534
+
c1c534
+    

c1c534
+      A NUMA hardware architecture supports the notion of distances
c1c534
+      between NUMA cells. Since 3.10.0 it
c1c534
+      is possible to define the distance between NUMA cells using the
c1c534
+      distances element within a NUMA cell
c1c534
+      description. The sibling sub-element is used to
c1c534
+      specify the distance value between sibling NUMA cells. For more
c1c534
+      details, see the chapter explaining the system's SLIT (System
c1c534
+      Locality Information Table) within the ACPI (Advanced
c1c534
+      Configuration and Power Interface) specification.
c1c534
+    

c1c534
+
c1c534
+
c1c534
+...
c1c534
+<cpu>
c1c534
+  ...
c1c534
+  <numa>
c1c534
+    <cell id='0' cpus='0,4-7' memory='512000' unit='KiB'>
c1c534
+      <distances>
c1c534
+        <sibling id='0' value='10'/>
c1c534
+        <sibling id='1' value='21'/>
c1c534
+        <sibling id='2' value='31'/>
c1c534
+        <sibling id='3' value='41'/>
c1c534
+      </distances>
c1c534
+    </cell>
c1c534
+    <cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'>
c1c534
+      <distances>
c1c534
+        <sibling id='0' value='21'/>
c1c534
+        <sibling id='1' value='10'/>
c1c534
+        <sibling id='2' value='21'/>
c1c534
+        <sibling id='3' value='31'/>
c1c534
+      </distances>
c1c534
+    </cell>
c1c534
+    <cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'>
c1c534
+      <distances>
c1c534
+        <sibling id='0' value='31'/>
c1c534
+        <sibling id='1' value='21'/>
c1c534
+        <sibling id='2' value='10'/>
c1c534
+        <sibling id='3' value='21'/>
c1c534
+      </distances>
c1c534
+    </cell>
c1c534
+    <cell id='3' cpus='3' memory='512000' unit='KiB'>
c1c534
+      <distances>
c1c534
+        <sibling id='0' value='41'/>
c1c534
+        <sibling id='1' value='31'/>
c1c534
+        <sibling id='2' value='21'/>
c1c534
+        <sibling id='3' value='10'/>
c1c534
+      </distances>
c1c534
+    </cell>
c1c534
+  </numa>
c1c534
+  ...
c1c534
+</cpu>
c1c534
+...
c1c534
+
c1c534
+    

c1c534
+      Describing distances between NUMA cells is currently only supported
c1c534
+      by Xen. If no distances are given to describe
c1c534
+      the SLIT data between different cells, it will default to a scheme
c1c534
+      using 10 for local and 20 for remote distances.
c1c534
     

c1c534
 
c1c534
     

Events configuration

c1c534
diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng
c1c534
index 1ea667cdf6..1a18cd31b1 100644
c1c534
--- a/docs/schemas/basictypes.rng
c1c534
+++ b/docs/schemas/basictypes.rng
c1c534
@@ -77,6 +77,13 @@
c1c534
     </choice>
c1c534
   </define>
c1c534
 
c1c534
+  <define name="numaDistanceValue">
c1c534
+    <data type="unsignedInt">
c1c534
+      <param name="minInclusive">10</param>
c1c534
+      <param name="maxInclusive">255</param>
c1c534
+    </data>
c1c534
+  </define>
c1c534
+
c1c534
   <define name="pciaddress">
c1c534
     <optional>
c1c534
       <attribute name="domain">
c1c534
diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng
c1c534
index 3eef16abce..c45b6dfb28 100644
c1c534
--- a/docs/schemas/cputypes.rng
c1c534
+++ b/docs/schemas/cputypes.rng
c1c534
@@ -129,6 +129,24 @@
c1c534
           </choice>
c1c534
         </attribute>
c1c534
       </optional>
c1c534
+      <optional>
c1c534
+        <element name="distances">
c1c534
+          <oneOrMore>
c1c534
+            <ref name="numaDistance"/>
c1c534
+          </oneOrMore>
c1c534
+        </element>
c1c534
+      </optional>
c1c534
+    </element>
c1c534
+  </define>
c1c534
+
c1c534
+  <define name="numaDistance">
c1c534
+    <element name="sibling">
c1c534
+      <attribute name="id">
c1c534
+        <ref name="unsignedInt"/>
c1c534
+      </attribute>
c1c534
+      <attribute name="value">
c1c534
+        <ref name="numaDistanceValue"/>
c1c534
+      </attribute>
c1c534
     </element>
c1c534
   </define>
c1c534
 
c1c534
diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
c1c534
index b71dc012c5..5fbcc72041 100644
c1c534
--- a/src/conf/numa_conf.c
c1c534
+++ b/src/conf/numa_conf.c
c1c534
@@ -29,6 +29,15 @@
c1c534
 #include "virnuma.h"
c1c534
 #include "virstring.h"
c1c534
 
c1c534
+/*
c1c534
+ * Distance definitions defined Conform ACPI 2.0 SLIT.
c1c534
+ * See include/linux/topology.h
c1c534
+ */
c1c534
+#define LOCAL_DISTANCE          10
c1c534
+#define REMOTE_DISTANCE         20
c1c534
+/* SLIT entry value is a one-byte unsigned integer. */
c1c534
+#define UNREACHABLE            255
c1c534
+
c1c534
 #define VIR_FROM_THIS VIR_FROM_DOMAIN
c1c534
 
c1c534
 VIR_ENUM_IMPL(virDomainNumatuneMemMode,
c1c534
@@ -48,6 +57,8 @@ VIR_ENUM_IMPL(virDomainMemoryAccess, VIR_DOMAIN_MEMORY_ACCESS_LAST,
c1c534
               "shared",
c1c534
               "private")
c1c534
 
c1c534
+typedef struct _virDomainNumaDistance virDomainNumaDistance;
c1c534
+typedef virDomainNumaDistance *virDomainNumaDistancePtr;
c1c534
 
c1c534
 typedef struct _virDomainNumaNode virDomainNumaNode;
c1c534
 typedef virDomainNumaNode *virDomainNumaNodePtr;
c1c534
@@ -66,6 +77,12 @@ struct _virDomainNuma {
c1c534
         virBitmapPtr nodeset;   /* host memory nodes where this guest node resides */
c1c534
         virDomainNumatuneMemMode mode;  /* memory mode selection */
c1c534
         virDomainMemoryAccess memAccess; /* shared memory access configuration */
c1c534
+
c1c534
+        struct _virDomainNumaDistance {
c1c534
+            unsigned int value; /* locality value for node i->j or j->i */
c1c534
+            unsigned int cellid;
c1c534
+        } *distances;           /* remote node distances */
c1c534
+        size_t ndistances;
c1c534
     } *mem_nodes;           /* guest node configuration */
c1c534
     size_t nmem_nodes;
c1c534
 
c1c534
@@ -686,6 +703,144 @@ virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune,
c1c534
 }
c1c534
 
c1c534
 
c1c534
+static int
c1c534
+virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def,
c1c534
+                                     xmlXPathContextPtr ctxt,
c1c534
+                                     unsigned int cur_cell)
c1c534
+{
c1c534
+    int ret = -1;
c1c534
+    int sibling;
c1c534
+    char *tmp = NULL;
c1c534
+    xmlNodePtr *nodes = NULL;
c1c534
+    size_t i, ndistances = def->nmem_nodes;
c1c534
+
c1c534
+    if (!ndistances)
c1c534
+        return 0;
c1c534
+
c1c534
+    /* check if NUMA distances definition is present */
c1c534
+    if (!virXPathNode("./distances[1]", ctxt))
c1c534
+        return 0;
c1c534
+
c1c534
+    if ((sibling = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
c1c534
+        virReportError(VIR_ERR_XML_ERROR, "%s",
c1c534
+                       _("NUMA distances defined without siblings"));
c1c534
+        goto cleanup;
c1c534
+    }
c1c534
+
c1c534
+    for (i = 0; i < sibling; i++) {
c1c534
+        virDomainNumaDistancePtr ldist, rdist;
c1c534
+        unsigned int sibling_id, sibling_value;
c1c534
+
c1c534
+        /* siblings are in order of parsing or explicitly numbered */
c1c534
+        if (!(tmp = virXMLPropString(nodes[i], "id"))) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("Missing 'id' attribute in NUMA "
c1c534
+                             "distances under 'cell id %d'"),
c1c534
+                           cur_cell);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+
c1c534
+        /* The "id" needs to be applicable */
c1c534
+        if (virStrToLong_uip(tmp, NULL, 10, &sibling_id) < 0) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("Invalid 'id' attribute in NUMA "
c1c534
+                             "distances for sibling: '%s'"),
c1c534
+                           tmp);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+        VIR_FREE(tmp);
c1c534
+
c1c534
+        /* The "id" needs to be within numa/cell range */
c1c534
+        if (sibling_id >= ndistances) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("'sibling_id %d' does not refer to a "
c1c534
+                             "valid cell within NUMA 'cell id %d'"),
c1c534
+                           sibling_id, cur_cell);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+
c1c534
+        /* We need a locality value. Check and correct
c1c534
+         * distance to local and distance to remote node.
c1c534
+         */
c1c534
+        if (!(tmp = virXMLPropString(nodes[i], "value"))) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("Missing 'value' attribute in NUMA distances "
c1c534
+                             "under 'cell id %d' for 'sibling id %d'"),
c1c534
+                           cur_cell, sibling_id);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+
c1c534
+        /* The "value" needs to be applicable */
c1c534
+        if (virStrToLong_uip(tmp, NULL, 10, &sibling_value) < 0) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("'value %s' is invalid for "
c1c534
+                             "'sibling id %d' under NUMA 'cell id %d'"),
c1c534
+                           tmp, sibling_id, cur_cell);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+        VIR_FREE(tmp);
c1c534
+
c1c534
+        /* Assure LOCAL_DISTANCE <= "value" <= UNREACHABLE
c1c534
+         * and correct LOCAL_DISTANCE setting if such applies.
c1c534
+         */
c1c534
+        if ((sibling_value < LOCAL_DISTANCE ||
c1c534
+             sibling_value > UNREACHABLE) ||
c1c534
+            (sibling_id == cur_cell &&
c1c534
+             sibling_value != LOCAL_DISTANCE) ||
c1c534
+            (sibling_id != cur_cell &&
c1c534
+             sibling_value == LOCAL_DISTANCE)) {
c1c534
+            virReportError(VIR_ERR_XML_ERROR,
c1c534
+                           _("'value %d' is invalid for "
c1c534
+                             "'sibling id %d' under NUMA 'cell id %d'"),
c1c534
+                           sibling_value, sibling_id, cur_cell);
c1c534
+            goto cleanup;
c1c534
+        }
c1c534
+
c1c534
+        /* Apply the local / remote distance */
c1c534
+        ldist = def->mem_nodes[cur_cell].distances;
c1c534
+        if (!ldist) {
c1c534
+            if (VIR_ALLOC_N(ldist, ndistances) < 0)
c1c534
+                goto cleanup;
c1c534
+
c1c534
+            ldist[cur_cell].value = LOCAL_DISTANCE;
c1c534
+            ldist[cur_cell].cellid = cur_cell;
c1c534
+            def->mem_nodes[cur_cell].ndistances = ndistances;
c1c534
+        }
c1c534
+
c1c534
+        ldist[sibling_id].cellid = sibling_id;
c1c534
+        ldist[sibling_id].value = sibling_value;
c1c534
+        def->mem_nodes[cur_cell].distances = ldist;
c1c534
+
c1c534
+        /* Apply symmetry if none given */
c1c534
+        rdist = def->mem_nodes[sibling_id].distances;
c1c534
+        if (!rdist) {
c1c534
+            if (VIR_ALLOC_N(rdist, ndistances) < 0)
c1c534
+                goto cleanup;
c1c534
+
c1c534
+            rdist[sibling_id].value = LOCAL_DISTANCE;
c1c534
+            rdist[sibling_id].cellid = sibling_id;
c1c534
+            def->mem_nodes[sibling_id].ndistances = ndistances;
c1c534
+        }
c1c534
+
c1c534
+        rdist[cur_cell].cellid = cur_cell;
c1c534
+        if (!rdist[cur_cell].value)
c1c534
+            rdist[cur_cell].value = sibling_value;
c1c534
+        def->mem_nodes[sibling_id].distances = rdist;
c1c534
+    }
c1c534
+
c1c534
+    ret = 0;
c1c534
+
c1c534
+ cleanup:
c1c534
+    if (ret) {
c1c534
+        for (i = 0; i < ndistances; i++)
c1c534
+            VIR_FREE(def->mem_nodes[i].distances);
c1c534
+    }
c1c534
+    VIR_FREE(nodes);
c1c534
+    VIR_FREE(tmp);
c1c534
+
c1c534
+    return ret;
c1c534
+}
c1c534
+
c1c534
 int
c1c534
 virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
c1c534
                             xmlXPathContextPtr ctxt)
c1c534
@@ -694,7 +849,7 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
c1c534
     xmlNodePtr oldNode = ctxt->node;
c1c534
     char *tmp = NULL;
c1c534
     int n;
c1c534
-    size_t i;
c1c534
+    size_t i, j;
c1c534
     int ret = -1;
c1c534
 
c1c534
     /* check if NUMA definition is present */
c1c534
@@ -712,7 +867,6 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
c1c534
     def->nmem_nodes = n;
c1c534
 
c1c534
     for (i = 0; i < n; i++) {
c1c534
-        size_t j;
c1c534
         int rc;
c1c534
         unsigned int cur_cell = i;
c1c534
 
c1c534
@@ -788,6 +942,10 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
c1c534
             def->mem_nodes[cur_cell].memAccess = rc;
c1c534
             VIR_FREE(tmp);
c1c534
         }
c1c534
+
c1c534
+        /* Parse NUMA distances info */
c1c534
+        if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0)
c1c534
+                goto cleanup;
c1c534
     }
c1c534
 
c1c534
     ret = 0;
c1c534
@@ -815,6 +973,8 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf,
c1c534
     virBufferAddLit(buf, "<numa>\n");
c1c534
     virBufferAdjustIndent(buf, 2);
c1c534
     for (i = 0; i < ncells; i++) {
c1c534
+        int ndistances;
c1c534
+
c1c534
         memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
c1c534
 
c1c534
         if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
c1c534
@@ -829,7 +989,32 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf,
c1c534
         if (memAccess)
c1c534
             virBufferAsprintf(buf, " memAccess='%s'",
c1c534
                               virDomainMemoryAccessTypeToString(memAccess));
c1c534
-        virBufferAddLit(buf, "/>\n");
c1c534
+
c1c534
+        ndistances = def->mem_nodes[i].ndistances;
c1c534
+        if (!ndistances) {
c1c534
+            virBufferAddLit(buf, "/>\n");
c1c534
+        } else {
c1c534
+            size_t j;
c1c534
+            virDomainNumaDistancePtr distances = def->mem_nodes[i].distances;
c1c534
+
c1c534
+            virBufferAddLit(buf, ">\n");
c1c534
+            virBufferAdjustIndent(buf, 2);
c1c534
+            virBufferAddLit(buf, "<distances>\n");
c1c534
+            virBufferAdjustIndent(buf, 2);
c1c534
+            for (j = 0; j < ndistances; j++) {
c1c534
+                if (distances[j].value) {
c1c534
+                    virBufferAddLit(buf, "
c1c534
+                    virBufferAsprintf(buf, " id='%d'", distances[j].cellid);
c1c534
+                    virBufferAsprintf(buf, " value='%d'", distances[j].value);
c1c534
+                    virBufferAddLit(buf, "/>\n");
c1c534
+                }
c1c534
+            }
c1c534
+            virBufferAdjustIndent(buf, -2);
c1c534
+            virBufferAddLit(buf, "</distances>\n");
c1c534
+            virBufferAdjustIndent(buf, -2);
c1c534
+            virBufferAddLit(buf, "</cell>\n");
c1c534
+        }
c1c534
+
c1c534
         VIR_FREE(cpustr);
c1c534
     }
c1c534
     virBufferAdjustIndent(buf, -2);
c1c534
-- 
c1c534
2.15.1
c1c534