Blame SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch

77c23f
From 32341d8cf680625def040b44d70b197f2399bbdb Mon Sep 17 00:00:00 2001
77c23f
From: "plai@redhat.com" <plai@redhat.com>
77c23f
Date: Thu, 21 May 2020 23:56:48 +0100
77c23f
Subject: [PATCH 05/12] numa: Extend CLI to provide memory latency and
77c23f
 bandwidth information
77c23f
77c23f
RH-Author: plai@redhat.com
77c23f
Message-id: <20200521235655.27141-5-plai@redhat.com>
77c23f
Patchwork-id: 96731
77c23f
O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 04/11] numa: Extend CLI to provide memory latency and bandwidth information
77c23f
Bugzilla: 1600217
77c23f
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
77c23f
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
77c23f
77c23f
From: Liu Jingqi <jingqi.liu@intel.com>
77c23f
77c23f
Add -numa hmat-lb option to provide System Locality Latency and
77c23f
Bandwidth Information. These memory attributes help to build
77c23f
System Locality Latency and Bandwidth Information Structure(s)
77c23f
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
77c23f
hmat-lb option, enable HMAT with -machine hmat=on.
77c23f
77c23f
Acked-by: Markus Armbruster <armbru@redhat.com>
77c23f
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
77c23f
Signed-off-by: Tao Xu <tao3.xu@intel.com>
77c23f
Message-Id: <20191213011929.2520-3-tao3.xu@intel.com>
77c23f
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
77c23f
(cherry picked from commit 9b12dfa03a94d7f7a4b54eb67229a31e58193384)
77c23f
Signed-off-by: Paul Lai <plai@redhat.com>
77c23f
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
77c23f
---
77c23f
 hw/core/numa.c        | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++
77c23f
 include/sysemu/numa.h |  53 ++++++++++++++
77c23f
 qapi/machine.json     |  93 +++++++++++++++++++++++-
77c23f
 qemu-options.hx       |  47 +++++++++++-
77c23f
 4 files changed, 384 insertions(+), 3 deletions(-)
77c23f
77c23f
diff --git a/hw/core/numa.c b/hw/core/numa.c
77c23f
index a07eef9..58fe713 100644
77c23f
--- a/hw/core/numa.c
77c23f
+++ b/hw/core/numa.c
77c23f
@@ -23,6 +23,7 @@
77c23f
  */
77c23f
 
77c23f
 #include "qemu/osdep.h"
77c23f
+#include "qemu/units.h"
77c23f
 #include "sysemu/hostmem.h"
77c23f
 #include "sysemu/numa.h"
77c23f
 #include "sysemu/sysemu.h"
77c23f
@@ -194,6 +195,186 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
77c23f
     ms->numa_state->have_numa_distance = true;
77c23f
 }
77c23f
 
77c23f
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
77c23f
+                        Error **errp)
77c23f
+{
77c23f
+    int i, first_bit, last_bit;
77c23f
+    uint64_t max_entry, temp_base, bitmap_copy;
77c23f
+    NodeInfo *numa_info = numa_state->nodes;
77c23f
+    HMAT_LB_Info *hmat_lb =
77c23f
+        numa_state->hmat_lb[node->hierarchy][node->data_type];
77c23f
+    HMAT_LB_Data lb_data = {};
77c23f
+    HMAT_LB_Data *lb_temp;
77c23f
+
77c23f
+    /* Error checking */
77c23f
+    if (node->initiator > numa_state->num_nodes) {
77c23f
+        error_setg(errp, "Invalid initiator=%d, it should be less than %d",
77c23f
+                   node->initiator, numa_state->num_nodes);
77c23f
+        return;
77c23f
+    }
77c23f
+    if (node->target > numa_state->num_nodes) {
77c23f
+        error_setg(errp, "Invalid target=%d, it should be less than %d",
77c23f
+                   node->target, numa_state->num_nodes);
77c23f
+        return;
77c23f
+    }
77c23f
+    if (!numa_info[node->initiator].has_cpu) {
77c23f
+        error_setg(errp, "Invalid initiator=%d, it isn't an "
77c23f
+                   "initiator proximity domain", node->initiator);
77c23f
+        return;
77c23f
+    }
77c23f
+    if (!numa_info[node->target].present) {
77c23f
+        error_setg(errp, "The target=%d should point to an existing node",
77c23f
+                   node->target);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    if (!hmat_lb) {
77c23f
+        hmat_lb = g_malloc0(sizeof(*hmat_lb));
77c23f
+        numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
77c23f
+        hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
77c23f
+    }
77c23f
+    hmat_lb->hierarchy = node->hierarchy;
77c23f
+    hmat_lb->data_type = node->data_type;
77c23f
+    lb_data.initiator = node->initiator;
77c23f
+    lb_data.target = node->target;
77c23f
+
77c23f
+    if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
77c23f
+        /* Input latency data */
77c23f
+
77c23f
+        if (!node->has_latency) {
77c23f
+            error_setg(errp, "Missing 'latency' option");
77c23f
+            return;
77c23f
+        }
77c23f
+        if (node->has_bandwidth) {
77c23f
+            error_setg(errp, "Invalid option 'bandwidth' since "
77c23f
+                       "the data type is latency");
77c23f
+            return;
77c23f
+        }
77c23f
+
77c23f
+        /* Detect duplicate configuration */
77c23f
+        for (i = 0; i < hmat_lb->list->len; i++) {
77c23f
+            lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
77c23f
+
77c23f
+            if (node->initiator == lb_temp->initiator &&
77c23f
+                node->target == lb_temp->target) {
77c23f
+                error_setg(errp, "Duplicate configuration of the latency for "
77c23f
+                    "initiator=%d and target=%d", node->initiator,
77c23f
+                    node->target);
77c23f
+                return;
77c23f
+            }
77c23f
+        }
77c23f
+
77c23f
+        hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
77c23f
+
77c23f
+        if (node->latency) {
77c23f
+            /* Calculate the temporary base and compressed latency */
77c23f
+            max_entry = node->latency;
77c23f
+            temp_base = 1;
77c23f
+            while (QEMU_IS_ALIGNED(max_entry, 10)) {
77c23f
+                max_entry /= 10;
77c23f
+                temp_base *= 10;
77c23f
+            }
77c23f
+
77c23f
+            /* Calculate the max compressed latency */
77c23f
+            temp_base = MIN(hmat_lb->base, temp_base);
77c23f
+            max_entry = node->latency / hmat_lb->base;
77c23f
+            max_entry = MAX(hmat_lb->range_bitmap, max_entry);
77c23f
+
77c23f
+            /*
77c23f
+             * For latency hmat_lb->range_bitmap record the max compressed
77c23f
+             * latency which should be less than 0xFFFF (UINT16_MAX)
77c23f
+             */
77c23f
+            if (max_entry >= UINT16_MAX) {
77c23f
+                error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
77c23f
+                        "target=%d should not differ from previously entered "
77c23f
+                        "min or max values on more than %d", node->latency,
77c23f
+                        node->initiator, node->target, UINT16_MAX - 1);
77c23f
+                return;
77c23f
+            } else {
77c23f
+                hmat_lb->base = temp_base;
77c23f
+                hmat_lb->range_bitmap = max_entry;
77c23f
+            }
77c23f
+
77c23f
+            /*
77c23f
+             * Set lb_info_provided bit 0 as 1,
77c23f
+             * latency information is provided
77c23f
+             */
77c23f
+            numa_info[node->target].lb_info_provided |= BIT(0);
77c23f
+        }
77c23f
+        lb_data.data = node->latency;
77c23f
+    } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
77c23f
+        /* Input bandwidth data */
77c23f
+        if (!node->has_bandwidth) {
77c23f
+            error_setg(errp, "Missing 'bandwidth' option");
77c23f
+            return;
77c23f
+        }
77c23f
+        if (node->has_latency) {
77c23f
+            error_setg(errp, "Invalid option 'latency' since "
77c23f
+                       "the data type is bandwidth");
77c23f
+            return;
77c23f
+        }
77c23f
+        if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
77c23f
+            error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
77c23f
+                       "target=%d should be 1MB aligned", node->bandwidth,
77c23f
+                       node->initiator, node->target);
77c23f
+            return;
77c23f
+        }
77c23f
+
77c23f
+        /* Detect duplicate configuration */
77c23f
+        for (i = 0; i < hmat_lb->list->len; i++) {
77c23f
+            lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
77c23f
+
77c23f
+            if (node->initiator == lb_temp->initiator &&
77c23f
+                node->target == lb_temp->target) {
77c23f
+                error_setg(errp, "Duplicate configuration of the bandwidth for "
77c23f
+                    "initiator=%d and target=%d", node->initiator,
77c23f
+                    node->target);
77c23f
+                return;
77c23f
+            }
77c23f
+        }
77c23f
+
77c23f
+        hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
77c23f
+
77c23f
+        if (node->bandwidth) {
77c23f
+            /* Keep bitmap unchanged when bandwidth out of range */
77c23f
+            bitmap_copy = hmat_lb->range_bitmap;
77c23f
+            bitmap_copy |= node->bandwidth;
77c23f
+            first_bit = ctz64(bitmap_copy);
77c23f
+            temp_base = UINT64_C(1) << first_bit;
77c23f
+            max_entry = node->bandwidth / temp_base;
77c23f
+            last_bit = 64 - clz64(bitmap_copy);
77c23f
+
77c23f
+            /*
77c23f
+             * For bandwidth, first_bit record the base unit of bandwidth bits,
77c23f
+             * last_bit record the last bit of the max bandwidth. The max
77c23f
+             * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
77c23f
+             */
77c23f
+            if ((last_bit - first_bit) > UINT16_BITS ||
77c23f
+                max_entry >= UINT16_MAX) {
77c23f
+                error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
77c23f
+                        "and target=%d should not differ from previously "
77c23f
+                        "entered values on more than %d", node->bandwidth,
77c23f
+                        node->initiator, node->target, UINT16_MAX - 1);
77c23f
+                return;
77c23f
+            } else {
77c23f
+                hmat_lb->base = temp_base;
77c23f
+                hmat_lb->range_bitmap = bitmap_copy;
77c23f
+            }
77c23f
+
77c23f
+            /*
77c23f
+             * Set lb_info_provided bit 1 as 1,
77c23f
+             * bandwidth information is provided
77c23f
+             */
77c23f
+            numa_info[node->target].lb_info_provided |= BIT(1);
77c23f
+        }
77c23f
+        lb_data.data = node->bandwidth;
77c23f
+    } else {
77c23f
+        assert(0);
77c23f
+    }
77c23f
+
77c23f
+    g_array_append_val(hmat_lb->list, lb_data);
77c23f
+}
77c23f
+
77c23f
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
77c23f
 {
77c23f
     Error *err = NULL;
77c23f
@@ -231,6 +412,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
77c23f
         machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
77c23f
                                   &err;;
77c23f
         break;
77c23f
+    case NUMA_OPTIONS_TYPE_HMAT_LB:
77c23f
+        if (!ms->numa_state->hmat_enabled) {
77c23f
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
77c23f
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
77c23f
+                       "before using any of hmat specific options");
77c23f
+            return;
77c23f
+        }
77c23f
+
77c23f
+        parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err;;
77c23f
+        if (err) {
77c23f
+            goto end;
77c23f
+        }
77c23f
+        break;
77c23f
     default:
77c23f
         abort();
77c23f
     }
77c23f
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
77c23f
index 788cbec..70f93c8 100644
77c23f
--- a/include/sysemu/numa.h
77c23f
+++ b/include/sysemu/numa.h
77c23f
@@ -14,11 +14,34 @@ struct CPUArchId;
77c23f
 #define NUMA_DISTANCE_MAX         254
77c23f
 #define NUMA_DISTANCE_UNREACHABLE 255
77c23f
 
77c23f
+/* the value of AcpiHmatLBInfo flags */
77c23f
+enum {
77c23f
+    HMAT_LB_MEM_MEMORY           = 0,
77c23f
+    HMAT_LB_MEM_CACHE_1ST_LEVEL  = 1,
77c23f
+    HMAT_LB_MEM_CACHE_2ND_LEVEL  = 2,
77c23f
+    HMAT_LB_MEM_CACHE_3RD_LEVEL  = 3,
77c23f
+    HMAT_LB_LEVELS   /* must be the last entry */
77c23f
+};
77c23f
+
77c23f
+/* the value of AcpiHmatLBInfo data type */
77c23f
+enum {
77c23f
+    HMAT_LB_DATA_ACCESS_LATENCY   = 0,
77c23f
+    HMAT_LB_DATA_READ_LATENCY     = 1,
77c23f
+    HMAT_LB_DATA_WRITE_LATENCY    = 2,
77c23f
+    HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
77c23f
+    HMAT_LB_DATA_READ_BANDWIDTH   = 4,
77c23f
+    HMAT_LB_DATA_WRITE_BANDWIDTH  = 5,
77c23f
+    HMAT_LB_TYPES   /* must be the last entry */
77c23f
+};
77c23f
+
77c23f
+#define UINT16_BITS       16
77c23f
+
77c23f
 struct NodeInfo {
77c23f
     uint64_t node_mem;
77c23f
     struct HostMemoryBackend *node_memdev;
77c23f
     bool present;
77c23f
     bool has_cpu;
77c23f
+    uint8_t lb_info_provided;
77c23f
     uint16_t initiator;
77c23f
     uint8_t distance[MAX_NODES];
77c23f
 };
77c23f
@@ -28,6 +51,31 @@ struct NumaNodeMem {
77c23f
     uint64_t node_plugged_mem;
77c23f
 };
77c23f
 
77c23f
+struct HMAT_LB_Data {
77c23f
+    uint8_t     initiator;
77c23f
+    uint8_t     target;
77c23f
+    uint64_t    data;
77c23f
+};
77c23f
+typedef struct HMAT_LB_Data HMAT_LB_Data;
77c23f
+
77c23f
+struct HMAT_LB_Info {
77c23f
+    /* Indicates it's memory or the specified level memory side cache. */
77c23f
+    uint8_t     hierarchy;
77c23f
+
77c23f
+    /* Present the type of data, access/read/write latency or bandwidth. */
77c23f
+    uint8_t     data_type;
77c23f
+
77c23f
+    /* The range bitmap of bandwidth for calculating common base */
77c23f
+    uint64_t    range_bitmap;
77c23f
+
77c23f
+    /* The common base unit for latencies or bandwidths */
77c23f
+    uint64_t    base;
77c23f
+
77c23f
+    /* Array to store the latencies or bandwidths */
77c23f
+    GArray      *list;
77c23f
+};
77c23f
+typedef struct HMAT_LB_Info HMAT_LB_Info;
77c23f
+
77c23f
 struct NumaState {
77c23f
     /* Number of NUMA nodes */
77c23f
     int num_nodes;
77c23f
@@ -40,11 +88,16 @@ struct NumaState {
77c23f
 
77c23f
     /* NUMA nodes information */
77c23f
     NodeInfo nodes[MAX_NODES];
77c23f
+
77c23f
+    /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
77c23f
+    HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
77c23f
 };
77c23f
 typedef struct NumaState NumaState;
77c23f
 
77c23f
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
77c23f
 void parse_numa_opts(MachineState *ms);
77c23f
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
77c23f
+                        Error **errp);
77c23f
 void numa_complete_configuration(MachineState *ms);
77c23f
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
77c23f
 extern QemuOptsList qemu_numa_opts;
77c23f
diff --git a/qapi/machine.json b/qapi/machine.json
77c23f
index 27d0e37..cf8faf5 100644
77c23f
--- a/qapi/machine.json
77c23f
+++ b/qapi/machine.json
77c23f
@@ -426,10 +426,12 @@
77c23f
 #
77c23f
 # @cpu: property based CPU(s) to node mapping (Since: 2.10)
77c23f
 #
77c23f
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
77c23f
+#
77c23f
 # Since: 2.1
77c23f
 ##
77c23f
 { 'enum': 'NumaOptionsType',
77c23f
-  'data': [ 'node', 'dist', 'cpu' ] }
77c23f
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
77c23f
 
77c23f
 ##
77c23f
 # @NumaOptions:
77c23f
@@ -444,7 +446,8 @@
77c23f
   'data': {
77c23f
     'node': 'NumaNodeOptions',
77c23f
     'dist': 'NumaDistOptions',
77c23f
-    'cpu': 'NumaCpuOptions' }}
77c23f
+    'cpu': 'NumaCpuOptions',
77c23f
+    'hmat-lb': 'NumaHmatLBOptions' }}
77c23f
 
77c23f
 ##
77c23f
 # @NumaNodeOptions:
77c23f
@@ -558,6 +561,92 @@
77c23f
    'data' : {} }
77c23f
 
77c23f
 ##
77c23f
+# @HmatLBMemoryHierarchy:
77c23f
+#
77c23f
+# The memory hierarchy in the System Locality Latency and Bandwidth
77c23f
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
77c23f
+#
77c23f
+# For more information about @HmatLBMemoryHierarchy, see chapter
77c23f
+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @memory: the structure represents the memory performance
77c23f
+#
77c23f
+# @first-level: first level of memory side cache
77c23f
+#
77c23f
+# @second-level: second level of memory side cache
77c23f
+#
77c23f
+# @third-level: third level of memory side cache
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'enum': 'HmatLBMemoryHierarchy',
77c23f
+  'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
77c23f
+
77c23f
+##
77c23f
+# @HmatLBDataType:
77c23f
+#
77c23f
+# Data type in the System Locality Latency and Bandwidth
77c23f
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
77c23f
+#
77c23f
+# For more information about @HmatLBDataType, see chapter
77c23f
+# 5.2.27.4: Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @access-latency: access latency (nanoseconds)
77c23f
+#
77c23f
+# @read-latency: read latency (nanoseconds)
77c23f
+#
77c23f
+# @write-latency: write latency (nanoseconds)
77c23f
+#
77c23f
+# @access-bandwidth: access bandwidth (Bytes per second)
77c23f
+#
77c23f
+# @read-bandwidth: read bandwidth (Bytes per second)
77c23f
+#
77c23f
+# @write-bandwidth: write bandwidth (Bytes per second)
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'enum': 'HmatLBDataType',
77c23f
+  'data': [ 'access-latency', 'read-latency', 'write-latency',
77c23f
+            'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
77c23f
+
77c23f
+##
77c23f
+# @NumaHmatLBOptions:
77c23f
+#
77c23f
+# Set the system locality latency and bandwidth information
77c23f
+# between Initiator and Target proximity Domains.
77c23f
+#
77c23f
+# For more information about @NumaHmatLBOptions, see chapter
77c23f
+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @initiator: the Initiator Proximity Domain.
77c23f
+#
77c23f
+# @target: the Target Proximity Domain.
77c23f
+#
77c23f
+# @hierarchy: the Memory Hierarchy. Indicates the performance
77c23f
+#             of memory or side cache.
77c23f
+#
77c23f
+# @data-type: presents the type of data, access/read/write
77c23f
+#             latency or hit latency.
77c23f
+#
77c23f
+# @latency: the value of latency from @initiator to @target
77c23f
+#           proximity domain, the latency unit is "ns(nanosecond)".
77c23f
+#
77c23f
+# @bandwidth: the value of bandwidth between @initiator and @target
77c23f
+#             proximity domain, the bandwidth unit is
77c23f
+#             "Bytes per second".
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'struct': 'NumaHmatLBOptions',
77c23f
+    'data': {
77c23f
+    'initiator': 'uint16',
77c23f
+    'target': 'uint16',
77c23f
+    'hierarchy': 'HmatLBMemoryHierarchy',
77c23f
+    'data-type': 'HmatLBDataType',
77c23f
+    '*latency': 'uint64',
77c23f
+    '*bandwidth': 'size' }}
77c23f
+
77c23f
+##
77c23f
 # @HostMemPolicy:
77c23f
 #
77c23f
 # Host memory policy types
77c23f
diff --git a/qemu-options.hx b/qemu-options.hx
77c23f
index e2ce754..86d9d8a 100644
77c23f
--- a/qemu-options.hx
77c23f
+++ b/qemu-options.hx
77c23f
@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
77c23f
     "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
77c23f
     "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
77c23f
     "-numa dist,src=source,dst=destination,val=distance\n"
77c23f
-    "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
77c23f
+    "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
77c23f
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
77c23f
     QEMU_ARCH_ALL)
77c23f
 STEXI
77c23f
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
77c23f
 @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
77c23f
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
77c23f
 @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
77c23f
+@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
77c23f
 @findex -numa
77c23f
 Define a NUMA node and assign RAM and VCPUs to it.
77c23f
 Set the NUMA distance from a source node to a destination node.
77c23f
+Set the ACPI Heterogeneous Memory Attributes for the given nodes.
77c23f
 
77c23f
 Legacy VCPU assignment uses @samp{cpus} option where
77c23f
 @var{firstcpu} and @var{lastcpu} are CPU indexes. Each
77c23f
@@ -256,6 +259,48 @@ specified resources, it just assigns existing resources to NUMA
77c23f
 nodes. This means that one still has to use the @option{-m},
77c23f
 @option{-smp} options to allocate RAM and VCPUs respectively.
77c23f
 
77c23f
+Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information
77c23f
+between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT).
77c23f
+Initiator NUMA node can create memory requests, usually it has one or more processors.
77c23f
+Target NUMA node contains addressable memory.
77c23f
+
77c23f
+In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory
77c23f
+hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure
77c23f
+represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level',
77c23f
+this structure represents aggregated performance of memory side caches for each domain.
77c23f
+@var{type} of 'data-type' is type of data represented by this structure instance:
77c23f
+if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write'
77c23f
+bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level',
77c23f
+'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the
77c23f
+target memory side cache.
77c23f
+
77c23f
+@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value,
77c23f
+the possible value and units are NUM[M|G|T], mean that the bandwidth value are
77c23f
+NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
77c23f
+Note that if latency or bandwidth value is 0, means the corresponding latency or
77c23f
+bandwidth information is not provided.
77c23f
+
77c23f
+For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
77c23f
+a ram, node 1 has only a ram. The processors in node 0 access memory in node
77c23f
+0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
77c23f
+The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
77c23f
+nanoseconds, access-bandwidth is 100 MB/s.
77c23f
+@example
77c23f
+-machine hmat=on \
77c23f
+-m 2G \
77c23f
+-object memory-backend-ram,size=1G,id=m0 \
77c23f
+-object memory-backend-ram,size=1G,id=m1 \
77c23f
+-smp 2 \
77c23f
+-numa node,nodeid=0,memdev=m0 \
77c23f
+-numa node,nodeid=1,memdev=m1,initiator=0 \
77c23f
+-numa cpu,node-id=0,socket-id=0 \
77c23f
+-numa cpu,node-id=0,socket-id=1 \
77c23f
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
77c23f
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
77c23f
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
77c23f
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
77c23f
+@end example
77c23f
+
77c23f
 ETEXI
77c23f
 
77c23f
 DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
77c23f
-- 
77c23f
1.8.3.1
77c23f