Blame SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch

ddf19c
From 32341d8cf680625def040b44d70b197f2399bbdb Mon Sep 17 00:00:00 2001
ddf19c
From: "plai@redhat.com" <plai@redhat.com>
ddf19c
Date: Thu, 21 May 2020 23:56:48 +0100
ddf19c
Subject: [PATCH 05/12] numa: Extend CLI to provide memory latency and
ddf19c
 bandwidth information
ddf19c
ddf19c
RH-Author: plai@redhat.com
ddf19c
Message-id: <20200521235655.27141-5-plai@redhat.com>
ddf19c
Patchwork-id: 96731
ddf19c
O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 04/11] numa: Extend CLI to provide memory latency and bandwidth information
ddf19c
Bugzilla: 1600217
ddf19c
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
ddf19c
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
ddf19c
ddf19c
From: Liu Jingqi <jingqi.liu@intel.com>
ddf19c
ddf19c
Add -numa hmat-lb option to provide System Locality Latency and
ddf19c
Bandwidth Information. These memory attributes help to build
ddf19c
System Locality Latency and Bandwidth Information Structure(s)
ddf19c
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
ddf19c
hmat-lb option, enable HMAT with -machine hmat=on.
ddf19c
ddf19c
Acked-by: Markus Armbruster <armbru@redhat.com>
ddf19c
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
ddf19c
Signed-off-by: Tao Xu <tao3.xu@intel.com>
ddf19c
Message-Id: <20191213011929.2520-3-tao3.xu@intel.com>
ddf19c
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
ddf19c
(cherry picked from commit 9b12dfa03a94d7f7a4b54eb67229a31e58193384)
ddf19c
Signed-off-by: Paul Lai <plai@redhat.com>
ddf19c
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ddf19c
---
ddf19c
 hw/core/numa.c        | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++
ddf19c
 include/sysemu/numa.h |  53 ++++++++++++++
ddf19c
 qapi/machine.json     |  93 +++++++++++++++++++++++-
ddf19c
 qemu-options.hx       |  47 +++++++++++-
ddf19c
 4 files changed, 384 insertions(+), 3 deletions(-)
ddf19c
ddf19c
diff --git a/hw/core/numa.c b/hw/core/numa.c
ddf19c
index a07eef9..58fe713 100644
ddf19c
--- a/hw/core/numa.c
ddf19c
+++ b/hw/core/numa.c
ddf19c
@@ -23,6 +23,7 @@
ddf19c
  */
ddf19c
 
ddf19c
 #include "qemu/osdep.h"
ddf19c
+#include "qemu/units.h"
ddf19c
 #include "sysemu/hostmem.h"
ddf19c
 #include "sysemu/numa.h"
ddf19c
 #include "sysemu/sysemu.h"
ddf19c
@@ -194,6 +195,186 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
ddf19c
     ms->numa_state->have_numa_distance = true;
ddf19c
 }
ddf19c
 
ddf19c
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
ddf19c
+                        Error **errp)
ddf19c
+{
ddf19c
+    int i, first_bit, last_bit;
ddf19c
+    uint64_t max_entry, temp_base, bitmap_copy;
ddf19c
+    NodeInfo *numa_info = numa_state->nodes;
ddf19c
+    HMAT_LB_Info *hmat_lb =
ddf19c
+        numa_state->hmat_lb[node->hierarchy][node->data_type];
ddf19c
+    HMAT_LB_Data lb_data = {};
ddf19c
+    HMAT_LB_Data *lb_temp;
ddf19c
+
ddf19c
+    /* Error checking */
ddf19c
+    if (node->initiator > numa_state->num_nodes) {
ddf19c
+        error_setg(errp, "Invalid initiator=%d, it should be less than %d",
ddf19c
+                   node->initiator, numa_state->num_nodes);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+    if (node->target > numa_state->num_nodes) {
ddf19c
+        error_setg(errp, "Invalid target=%d, it should be less than %d",
ddf19c
+                   node->target, numa_state->num_nodes);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+    if (!numa_info[node->initiator].has_cpu) {
ddf19c
+        error_setg(errp, "Invalid initiator=%d, it isn't an "
ddf19c
+                   "initiator proximity domain", node->initiator);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+    if (!numa_info[node->target].present) {
ddf19c
+        error_setg(errp, "The target=%d should point to an existing node",
ddf19c
+                   node->target);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    if (!hmat_lb) {
ddf19c
+        hmat_lb = g_malloc0(sizeof(*hmat_lb));
ddf19c
+        numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
ddf19c
+        hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
ddf19c
+    }
ddf19c
+    hmat_lb->hierarchy = node->hierarchy;
ddf19c
+    hmat_lb->data_type = node->data_type;
ddf19c
+    lb_data.initiator = node->initiator;
ddf19c
+    lb_data.target = node->target;
ddf19c
+
ddf19c
+    if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
ddf19c
+        /* Input latency data */
ddf19c
+
ddf19c
+        if (!node->has_latency) {
ddf19c
+            error_setg(errp, "Missing 'latency' option");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+        if (node->has_bandwidth) {
ddf19c
+            error_setg(errp, "Invalid option 'bandwidth' since "
ddf19c
+                       "the data type is latency");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+
ddf19c
+        /* Detect duplicate configuration */
ddf19c
+        for (i = 0; i < hmat_lb->list->len; i++) {
ddf19c
+            lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
ddf19c
+
ddf19c
+            if (node->initiator == lb_temp->initiator &&
ddf19c
+                node->target == lb_temp->target) {
ddf19c
+                error_setg(errp, "Duplicate configuration of the latency for "
ddf19c
+                    "initiator=%d and target=%d", node->initiator,
ddf19c
+                    node->target);
ddf19c
+                return;
ddf19c
+            }
ddf19c
+        }
ddf19c
+
ddf19c
+        hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
ddf19c
+
ddf19c
+        if (node->latency) {
ddf19c
+            /* Calculate the temporary base and compressed latency */
ddf19c
+            max_entry = node->latency;
ddf19c
+            temp_base = 1;
ddf19c
+            while (QEMU_IS_ALIGNED(max_entry, 10)) {
ddf19c
+                max_entry /= 10;
ddf19c
+                temp_base *= 10;
ddf19c
+            }
ddf19c
+
ddf19c
+            /* Calculate the max compressed latency */
ddf19c
+            temp_base = MIN(hmat_lb->base, temp_base);
ddf19c
+            max_entry = node->latency / hmat_lb->base;
ddf19c
+            max_entry = MAX(hmat_lb->range_bitmap, max_entry);
ddf19c
+
ddf19c
+            /*
ddf19c
+             * For latency hmat_lb->range_bitmap record the max compressed
ddf19c
+             * latency which should be less than 0xFFFF (UINT16_MAX)
ddf19c
+             */
ddf19c
+            if (max_entry >= UINT16_MAX) {
ddf19c
+                error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
ddf19c
+                        "target=%d should not differ from previously entered "
ddf19c
+                        "min or max values on more than %d", node->latency,
ddf19c
+                        node->initiator, node->target, UINT16_MAX - 1);
ddf19c
+                return;
ddf19c
+            } else {
ddf19c
+                hmat_lb->base = temp_base;
ddf19c
+                hmat_lb->range_bitmap = max_entry;
ddf19c
+            }
ddf19c
+
ddf19c
+            /*
ddf19c
+             * Set lb_info_provided bit 0 as 1,
ddf19c
+             * latency information is provided
ddf19c
+             */
ddf19c
+            numa_info[node->target].lb_info_provided |= BIT(0);
ddf19c
+        }
ddf19c
+        lb_data.data = node->latency;
ddf19c
+    } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
ddf19c
+        /* Input bandwidth data */
ddf19c
+        if (!node->has_bandwidth) {
ddf19c
+            error_setg(errp, "Missing 'bandwidth' option");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+        if (node->has_latency) {
ddf19c
+            error_setg(errp, "Invalid option 'latency' since "
ddf19c
+                       "the data type is bandwidth");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+        if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
ddf19c
+            error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
ddf19c
+                       "target=%d should be 1MB aligned", node->bandwidth,
ddf19c
+                       node->initiator, node->target);
ddf19c
+            return;
ddf19c
+        }
ddf19c
+
ddf19c
+        /* Detect duplicate configuration */
ddf19c
+        for (i = 0; i < hmat_lb->list->len; i++) {
ddf19c
+            lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
ddf19c
+
ddf19c
+            if (node->initiator == lb_temp->initiator &&
ddf19c
+                node->target == lb_temp->target) {
ddf19c
+                error_setg(errp, "Duplicate configuration of the bandwidth for "
ddf19c
+                    "initiator=%d and target=%d", node->initiator,
ddf19c
+                    node->target);
ddf19c
+                return;
ddf19c
+            }
ddf19c
+        }
ddf19c
+
ddf19c
+        hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
ddf19c
+
ddf19c
+        if (node->bandwidth) {
ddf19c
+            /* Keep bitmap unchanged when bandwidth out of range */
ddf19c
+            bitmap_copy = hmat_lb->range_bitmap;
ddf19c
+            bitmap_copy |= node->bandwidth;
ddf19c
+            first_bit = ctz64(bitmap_copy);
ddf19c
+            temp_base = UINT64_C(1) << first_bit;
ddf19c
+            max_entry = node->bandwidth / temp_base;
ddf19c
+            last_bit = 64 - clz64(bitmap_copy);
ddf19c
+
ddf19c
+            /*
ddf19c
+             * For bandwidth, first_bit record the base unit of bandwidth bits,
ddf19c
+             * last_bit record the last bit of the max bandwidth. The max
ddf19c
+             * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
ddf19c
+             */
ddf19c
+            if ((last_bit - first_bit) > UINT16_BITS ||
ddf19c
+                max_entry >= UINT16_MAX) {
ddf19c
+                error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
ddf19c
+                        "and target=%d should not differ from previously "
ddf19c
+                        "entered values on more than %d", node->bandwidth,
ddf19c
+                        node->initiator, node->target, UINT16_MAX - 1);
ddf19c
+                return;
ddf19c
+            } else {
ddf19c
+                hmat_lb->base = temp_base;
ddf19c
+                hmat_lb->range_bitmap = bitmap_copy;
ddf19c
+            }
ddf19c
+
ddf19c
+            /*
ddf19c
+             * Set lb_info_provided bit 1 as 1,
ddf19c
+             * bandwidth information is provided
ddf19c
+             */
ddf19c
+            numa_info[node->target].lb_info_provided |= BIT(1);
ddf19c
+        }
ddf19c
+        lb_data.data = node->bandwidth;
ddf19c
+    } else {
ddf19c
+        assert(0);
ddf19c
+    }
ddf19c
+
ddf19c
+    g_array_append_val(hmat_lb->list, lb_data);
ddf19c
+}
ddf19c
+
ddf19c
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
ddf19c
 {
ddf19c
     Error *err = NULL;
ddf19c
@@ -231,6 +412,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
ddf19c
         machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
ddf19c
                                   &err;;
ddf19c
         break;
ddf19c
+    case NUMA_OPTIONS_TYPE_HMAT_LB:
ddf19c
+        if (!ms->numa_state->hmat_enabled) {
ddf19c
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
ddf19c
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
ddf19c
+                       "before using any of hmat specific options");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+
ddf19c
+        parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err;;
ddf19c
+        if (err) {
ddf19c
+            goto end;
ddf19c
+        }
ddf19c
+        break;
ddf19c
     default:
ddf19c
         abort();
ddf19c
     }
ddf19c
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
ddf19c
index 788cbec..70f93c8 100644
ddf19c
--- a/include/sysemu/numa.h
ddf19c
+++ b/include/sysemu/numa.h
ddf19c
@@ -14,11 +14,34 @@ struct CPUArchId;
ddf19c
 #define NUMA_DISTANCE_MAX         254
ddf19c
 #define NUMA_DISTANCE_UNREACHABLE 255
ddf19c
 
ddf19c
+/* the value of AcpiHmatLBInfo flags */
ddf19c
+enum {
ddf19c
+    HMAT_LB_MEM_MEMORY           = 0,
ddf19c
+    HMAT_LB_MEM_CACHE_1ST_LEVEL  = 1,
ddf19c
+    HMAT_LB_MEM_CACHE_2ND_LEVEL  = 2,
ddf19c
+    HMAT_LB_MEM_CACHE_3RD_LEVEL  = 3,
ddf19c
+    HMAT_LB_LEVELS   /* must be the last entry */
ddf19c
+};
ddf19c
+
ddf19c
+/* the value of AcpiHmatLBInfo data type */
ddf19c
+enum {
ddf19c
+    HMAT_LB_DATA_ACCESS_LATENCY   = 0,
ddf19c
+    HMAT_LB_DATA_READ_LATENCY     = 1,
ddf19c
+    HMAT_LB_DATA_WRITE_LATENCY    = 2,
ddf19c
+    HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
ddf19c
+    HMAT_LB_DATA_READ_BANDWIDTH   = 4,
ddf19c
+    HMAT_LB_DATA_WRITE_BANDWIDTH  = 5,
ddf19c
+    HMAT_LB_TYPES   /* must be the last entry */
ddf19c
+};
ddf19c
+
ddf19c
+#define UINT16_BITS       16
ddf19c
+
ddf19c
 struct NodeInfo {
ddf19c
     uint64_t node_mem;
ddf19c
     struct HostMemoryBackend *node_memdev;
ddf19c
     bool present;
ddf19c
     bool has_cpu;
ddf19c
+    uint8_t lb_info_provided;
ddf19c
     uint16_t initiator;
ddf19c
     uint8_t distance[MAX_NODES];
ddf19c
 };
ddf19c
@@ -28,6 +51,31 @@ struct NumaNodeMem {
ddf19c
     uint64_t node_plugged_mem;
ddf19c
 };
ddf19c
 
ddf19c
+struct HMAT_LB_Data {
ddf19c
+    uint8_t     initiator;
ddf19c
+    uint8_t     target;
ddf19c
+    uint64_t    data;
ddf19c
+};
ddf19c
+typedef struct HMAT_LB_Data HMAT_LB_Data;
ddf19c
+
ddf19c
+struct HMAT_LB_Info {
ddf19c
+    /* Indicates it's memory or the specified level memory side cache. */
ddf19c
+    uint8_t     hierarchy;
ddf19c
+
ddf19c
+    /* Present the type of data, access/read/write latency or bandwidth. */
ddf19c
+    uint8_t     data_type;
ddf19c
+
ddf19c
+    /* The range bitmap of bandwidth for calculating common base */
ddf19c
+    uint64_t    range_bitmap;
ddf19c
+
ddf19c
+    /* The common base unit for latencies or bandwidths */
ddf19c
+    uint64_t    base;
ddf19c
+
ddf19c
+    /* Array to store the latencies or bandwidths */
ddf19c
+    GArray      *list;
ddf19c
+};
ddf19c
+typedef struct HMAT_LB_Info HMAT_LB_Info;
ddf19c
+
ddf19c
 struct NumaState {
ddf19c
     /* Number of NUMA nodes */
ddf19c
     int num_nodes;
ddf19c
@@ -40,11 +88,16 @@ struct NumaState {
ddf19c
 
ddf19c
     /* NUMA nodes information */
ddf19c
     NodeInfo nodes[MAX_NODES];
ddf19c
+
ddf19c
+    /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
ddf19c
+    HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
ddf19c
 };
ddf19c
 typedef struct NumaState NumaState;
ddf19c
 
ddf19c
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
ddf19c
 void parse_numa_opts(MachineState *ms);
ddf19c
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
ddf19c
+                        Error **errp);
ddf19c
 void numa_complete_configuration(MachineState *ms);
ddf19c
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
ddf19c
 extern QemuOptsList qemu_numa_opts;
ddf19c
diff --git a/qapi/machine.json b/qapi/machine.json
ddf19c
index 27d0e37..cf8faf5 100644
ddf19c
--- a/qapi/machine.json
ddf19c
+++ b/qapi/machine.json
ddf19c
@@ -426,10 +426,12 @@
ddf19c
 #
ddf19c
 # @cpu: property based CPU(s) to node mapping (Since: 2.10)
ddf19c
 #
ddf19c
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
ddf19c
+#
ddf19c
 # Since: 2.1
ddf19c
 ##
ddf19c
 { 'enum': 'NumaOptionsType',
ddf19c
-  'data': [ 'node', 'dist', 'cpu' ] }
ddf19c
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
ddf19c
 
ddf19c
 ##
ddf19c
 # @NumaOptions:
ddf19c
@@ -444,7 +446,8 @@
ddf19c
   'data': {
ddf19c
     'node': 'NumaNodeOptions',
ddf19c
     'dist': 'NumaDistOptions',
ddf19c
-    'cpu': 'NumaCpuOptions' }}
ddf19c
+    'cpu': 'NumaCpuOptions',
ddf19c
+    'hmat-lb': 'NumaHmatLBOptions' }}
ddf19c
 
ddf19c
 ##
ddf19c
 # @NumaNodeOptions:
ddf19c
@@ -558,6 +561,92 @@
ddf19c
    'data' : {} }
ddf19c
 
ddf19c
 ##
ddf19c
+# @HmatLBMemoryHierarchy:
ddf19c
+#
ddf19c
+# The memory hierarchy in the System Locality Latency and Bandwidth
ddf19c
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
ddf19c
+#
ddf19c
+# For more information about @HmatLBMemoryHierarchy, see chapter
ddf19c
+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @memory: the structure represents the memory performance
ddf19c
+#
ddf19c
+# @first-level: first level of memory side cache
ddf19c
+#
ddf19c
+# @second-level: second level of memory side cache
ddf19c
+#
ddf19c
+# @third-level: third level of memory side cache
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'enum': 'HmatLBMemoryHierarchy',
ddf19c
+  'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
ddf19c
+
ddf19c
+##
ddf19c
+# @HmatLBDataType:
ddf19c
+#
ddf19c
+# Data type in the System Locality Latency and Bandwidth
ddf19c
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
ddf19c
+#
ddf19c
+# For more information about @HmatLBDataType, see chapter
ddf19c
+# 5.2.27.4: Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @access-latency: access latency (nanoseconds)
ddf19c
+#
ddf19c
+# @read-latency: read latency (nanoseconds)
ddf19c
+#
ddf19c
+# @write-latency: write latency (nanoseconds)
ddf19c
+#
ddf19c
+# @access-bandwidth: access bandwidth (Bytes per second)
ddf19c
+#
ddf19c
+# @read-bandwidth: read bandwidth (Bytes per second)
ddf19c
+#
ddf19c
+# @write-bandwidth: write bandwidth (Bytes per second)
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'enum': 'HmatLBDataType',
ddf19c
+  'data': [ 'access-latency', 'read-latency', 'write-latency',
ddf19c
+            'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
ddf19c
+
ddf19c
+##
ddf19c
+# @NumaHmatLBOptions:
ddf19c
+#
ddf19c
+# Set the system locality latency and bandwidth information
ddf19c
+# between Initiator and Target proximity Domains.
ddf19c
+#
ddf19c
+# For more information about @NumaHmatLBOptions, see chapter
ddf19c
+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @initiator: the Initiator Proximity Domain.
ddf19c
+#
ddf19c
+# @target: the Target Proximity Domain.
ddf19c
+#
ddf19c
+# @hierarchy: the Memory Hierarchy. Indicates the performance
ddf19c
+#             of memory or side cache.
ddf19c
+#
ddf19c
+# @data-type: presents the type of data, access/read/write
ddf19c
+#             latency or hit latency.
ddf19c
+#
ddf19c
+# @latency: the value of latency from @initiator to @target
ddf19c
+#           proximity domain, the latency unit is "ns(nanosecond)".
ddf19c
+#
ddf19c
+# @bandwidth: the value of bandwidth between @initiator and @target
ddf19c
+#             proximity domain, the bandwidth unit is
ddf19c
+#             "Bytes per second".
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'struct': 'NumaHmatLBOptions',
ddf19c
+    'data': {
ddf19c
+    'initiator': 'uint16',
ddf19c
+    'target': 'uint16',
ddf19c
+    'hierarchy': 'HmatLBMemoryHierarchy',
ddf19c
+    'data-type': 'HmatLBDataType',
ddf19c
+    '*latency': 'uint64',
ddf19c
+    '*bandwidth': 'size' }}
ddf19c
+
ddf19c
+##
ddf19c
 # @HostMemPolicy:
ddf19c
 #
ddf19c
 # Host memory policy types
ddf19c
diff --git a/qemu-options.hx b/qemu-options.hx
ddf19c
index e2ce754..86d9d8a 100644
ddf19c
--- a/qemu-options.hx
ddf19c
+++ b/qemu-options.hx
ddf19c
@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
ddf19c
     "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
ddf19c
     "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
ddf19c
     "-numa dist,src=source,dst=destination,val=distance\n"
ddf19c
-    "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
ddf19c
+    "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
ddf19c
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
ddf19c
     QEMU_ARCH_ALL)
ddf19c
 STEXI
ddf19c
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
ddf19c
 @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
ddf19c
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
ddf19c
 @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
ddf19c
+@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
ddf19c
 @findex -numa
ddf19c
 Define a NUMA node and assign RAM and VCPUs to it.
ddf19c
 Set the NUMA distance from a source node to a destination node.
ddf19c
+Set the ACPI Heterogeneous Memory Attributes for the given nodes.
ddf19c
 
ddf19c
 Legacy VCPU assignment uses @samp{cpus} option where
ddf19c
 @var{firstcpu} and @var{lastcpu} are CPU indexes. Each
ddf19c
@@ -256,6 +259,48 @@ specified resources, it just assigns existing resources to NUMA
ddf19c
 nodes. This means that one still has to use the @option{-m},
ddf19c
 @option{-smp} options to allocate RAM and VCPUs respectively.
ddf19c
 
ddf19c
+Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information
ddf19c
+between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT).
ddf19c
+Initiator NUMA node can create memory requests, usually it has one or more processors.
ddf19c
+Target NUMA node contains addressable memory.
ddf19c
+
ddf19c
+In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory
ddf19c
+hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure
ddf19c
+represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level',
ddf19c
+this structure represents aggregated performance of memory side caches for each domain.
ddf19c
+@var{type} of 'data-type' is type of data represented by this structure instance:
ddf19c
+if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write'
ddf19c
+bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level',
ddf19c
+'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the
ddf19c
+target memory side cache.
ddf19c
+
ddf19c
+@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value,
ddf19c
+the possible value and units are NUM[M|G|T], mean that the bandwidth value are
ddf19c
+NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
ddf19c
+Note that if latency or bandwidth value is 0, means the corresponding latency or
ddf19c
+bandwidth information is not provided.
ddf19c
+
ddf19c
+For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
ddf19c
+a ram, node 1 has only a ram. The processors in node 0 access memory in node
ddf19c
+0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
ddf19c
+The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
ddf19c
+nanoseconds, access-bandwidth is 100 MB/s.
ddf19c
+@example
ddf19c
+-machine hmat=on \
ddf19c
+-m 2G \
ddf19c
+-object memory-backend-ram,size=1G,id=m0 \
ddf19c
+-object memory-backend-ram,size=1G,id=m1 \
ddf19c
+-smp 2 \
ddf19c
+-numa node,nodeid=0,memdev=m0 \
ddf19c
+-numa node,nodeid=1,memdev=m1,initiator=0 \
ddf19c
+-numa cpu,node-id=0,socket-id=0 \
ddf19c
+-numa cpu,node-id=0,socket-id=1 \
ddf19c
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
ddf19c
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
ddf19c
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
ddf19c
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
ddf19c
+@end example
ddf19c
+
ddf19c
 ETEXI
ddf19c
 
ddf19c
 DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
ddf19c
-- 
ddf19c
1.8.3.1
ddf19c