Blame SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch

77c23f
From 8cd3544b1347b248b9d04eb3d6c9b9bde3a13655 Mon Sep 17 00:00:00 2001
77c23f
From: "plai@redhat.com" <plai@redhat.com>
77c23f
Date: Thu, 21 May 2020 23:56:49 +0100
77c23f
Subject: [PATCH 06/12] numa: Extend CLI to provide memory side cache
77c23f
 information
77c23f
77c23f
RH-Author: plai@redhat.com
77c23f
Message-id: <20200521235655.27141-6-plai@redhat.com>
77c23f
Patchwork-id: 96740
77c23f
O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 05/11] numa: Extend CLI to provide memory side cache information
77c23f
Bugzilla: 1600217
77c23f
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
77c23f
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
77c23f
77c23f
From: Liu Jingqi <jingqi.liu@intel.com>
77c23f
77c23f
Add -numa hmat-cache option to provide Memory Side Cache Information.
77c23f
These memory attributes help to build Memory Side Cache Information
77c23f
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
77c23f
Before using hmat-cache option, enable HMAT with -machine hmat=on.
77c23f
77c23f
Acked-by: Markus Armbruster <armbru@redhat.com>
77c23f
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
77c23f
Signed-off-by: Tao Xu <tao3.xu@intel.com>
77c23f
Message-Id: <20191213011929.2520-4-tao3.xu@intel.com>
77c23f
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
77c23f
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
77c23f
(cherry picked from commit c412a48d4d91e8f8b89aae02de0f44f1f0b729e5)
77c23f
Signed-off-by: Paul Lai <plai@redhat.com>
77c23f
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
77c23f
---
77c23f
 hw/core/numa.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
77c23f
 include/sysemu/numa.h |  5 ++++
77c23f
 qapi/machine.json     | 81 +++++++++++++++++++++++++++++++++++++++++++++++++--
77c23f
 qemu-options.hx       | 17 +++++++++--
77c23f
 4 files changed, 179 insertions(+), 4 deletions(-)
77c23f
77c23f
diff --git a/hw/core/numa.c b/hw/core/numa.c
77c23f
index 58fe713..0d1b4be 100644
77c23f
--- a/hw/core/numa.c
77c23f
+++ b/hw/core/numa.c
77c23f
@@ -375,6 +375,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
77c23f
     g_array_append_val(hmat_lb->list, lb_data);
77c23f
 }
77c23f
 
77c23f
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
77c23f
+                           Error **errp)
77c23f
+{
77c23f
+    int nb_numa_nodes = ms->numa_state->num_nodes;
77c23f
+    NodeInfo *numa_info = ms->numa_state->nodes;
77c23f
+    NumaHmatCacheOptions *hmat_cache = NULL;
77c23f
+
77c23f
+    if (node->node_id >= nb_numa_nodes) {
77c23f
+        error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
77c23f
+                   "than %d", node->node_id, nb_numa_nodes);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
77c23f
+        error_setg(errp, "The latency and bandwidth information of "
77c23f
+                   "node-id=%" PRIu32 " should be provided before memory side "
77c23f
+                   "cache attributes", node->node_id);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
77c23f
+        error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
77c23f
+                   "and less than or equal to %d", node->level,
77c23f
+                   HMAT_LB_LEVELS - 1);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
77c23f
+    assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
77c23f
+    if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
77c23f
+        error_setg(errp, "Duplicate configuration of the side cache for "
77c23f
+                   "node-id=%" PRIu32 " and level=%" PRIu8,
77c23f
+                   node->node_id, node->level);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    if ((node->level > 1) &&
77c23f
+        ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
77c23f
+        (node->size >=
77c23f
+            ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
77c23f
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
77c23f
+                   " should be less than the size(%" PRIu64 ") of "
77c23f
+                   "level=%u", node->size, node->level,
77c23f
+                   ms->numa_state->hmat_cache[node->node_id]
77c23f
+                                             [node->level - 1]->size,
77c23f
+                   node->level - 1);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    if ((node->level < HMAT_LB_LEVELS - 1) &&
77c23f
+        ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
77c23f
+        (node->size <=
77c23f
+            ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
77c23f
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
77c23f
+                   " should be larger than the size(%" PRIu64 ") of "
77c23f
+                   "level=%u", node->size, node->level,
77c23f
+                   ms->numa_state->hmat_cache[node->node_id]
77c23f
+                                             [node->level + 1]->size,
77c23f
+                   node->level + 1);
77c23f
+        return;
77c23f
+    }
77c23f
+
77c23f
+    hmat_cache = g_malloc0(sizeof(*hmat_cache));
77c23f
+    memcpy(hmat_cache, node, sizeof(*hmat_cache));
77c23f
+    ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
77c23f
+}
77c23f
+
77c23f
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
77c23f
 {
77c23f
     Error *err = NULL;
77c23f
@@ -425,6 +492,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
77c23f
             goto end;
77c23f
         }
77c23f
         break;
77c23f
+    case NUMA_OPTIONS_TYPE_HMAT_CACHE:
77c23f
+        if (!ms->numa_state->hmat_enabled) {
77c23f
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
77c23f
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
77c23f
+                       "before using any of hmat specific options");
77c23f
+            return;
77c23f
+        }
77c23f
+
77c23f
+        parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err;;
77c23f
+        if (err) {
77c23f
+            goto end;
77c23f
+        }
77c23f
+        break;
77c23f
     default:
77c23f
         abort();
77c23f
     }
77c23f
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
77c23f
index 70f93c8..ba693cc 100644
77c23f
--- a/include/sysemu/numa.h
77c23f
+++ b/include/sysemu/numa.h
77c23f
@@ -91,6 +91,9 @@ struct NumaState {
77c23f
 
77c23f
     /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
77c23f
     HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
77c23f
+
77c23f
+    /* Memory Side Cache Information Structure */
77c23f
+    NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
77c23f
 };
77c23f
 typedef struct NumaState NumaState;
77c23f
 
77c23f
@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
77c23f
 void parse_numa_opts(MachineState *ms);
77c23f
 void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
77c23f
                         Error **errp);
77c23f
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
77c23f
+                           Error **errp);
77c23f
 void numa_complete_configuration(MachineState *ms);
77c23f
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
77c23f
 extern QemuOptsList qemu_numa_opts;
77c23f
diff --git a/qapi/machine.json b/qapi/machine.json
77c23f
index cf8faf5..b3d30bc 100644
77c23f
--- a/qapi/machine.json
77c23f
+++ b/qapi/machine.json
77c23f
@@ -428,10 +428,12 @@
77c23f
 #
77c23f
 # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
77c23f
 #
77c23f
+# @hmat-cache: memory side cache information (Since: 5.0)
77c23f
+#
77c23f
 # Since: 2.1
77c23f
 ##
77c23f
 { 'enum': 'NumaOptionsType',
77c23f
-  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
77c23f
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
77c23f
 
77c23f
 ##
77c23f
 # @NumaOptions:
77c23f
@@ -447,7 +449,8 @@
77c23f
     'node': 'NumaNodeOptions',
77c23f
     'dist': 'NumaDistOptions',
77c23f
     'cpu': 'NumaCpuOptions',
77c23f
-    'hmat-lb': 'NumaHmatLBOptions' }}
77c23f
+    'hmat-lb': 'NumaHmatLBOptions',
77c23f
+    'hmat-cache': 'NumaHmatCacheOptions' }}
77c23f
 
77c23f
 ##
77c23f
 # @NumaNodeOptions:
77c23f
@@ -647,6 +650,80 @@
77c23f
     '*bandwidth': 'size' }}
77c23f
 
77c23f
 ##
77c23f
+# @HmatCacheAssociativity:
77c23f
+#
77c23f
+# Cache associativity in the Memory Side Cache Information Structure
77c23f
+# of HMAT
77c23f
+#
77c23f
+# For more information of @HmatCacheAssociativity, see chapter
77c23f
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @none: None (no memory side cache in this proximity domain,
77c23f
+#              or cache associativity unknown)
77c23f
+#
77c23f
+# @direct: Direct Mapped
77c23f
+#
77c23f
+# @complex: Complex Cache Indexing (implementation specific)
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'enum': 'HmatCacheAssociativity',
77c23f
+  'data': [ 'none', 'direct', 'complex' ] }
77c23f
+
77c23f
+##
77c23f
+# @HmatCacheWritePolicy:
77c23f
+#
77c23f
+# Cache write policy in the Memory Side Cache Information Structure
77c23f
+# of HMAT
77c23f
+#
77c23f
+# For more information of @HmatCacheWritePolicy, see chapter
77c23f
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @none: None (no memory side cache in this proximity domain,
77c23f
+#              or cache write policy unknown)
77c23f
+#
77c23f
+# @write-back: Write Back (WB)
77c23f
+#
77c23f
+# @write-through: Write Through (WT)
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'enum': 'HmatCacheWritePolicy',
77c23f
+  'data': [ 'none', 'write-back', 'write-through' ] }
77c23f
+
77c23f
+##
77c23f
+# @NumaHmatCacheOptions:
77c23f
+#
77c23f
+# Set the memory side cache information for a given memory domain.
77c23f
+#
77c23f
+# For more information of @NumaHmatCacheOptions, see chapter
77c23f
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
77c23f
+#
77c23f
+# @node-id: the memory proximity domain to which the memory belongs.
77c23f
+#
77c23f
+# @size: the size of memory side cache in bytes.
77c23f
+#
77c23f
+# @level: the cache level described in this structure.
77c23f
+#
77c23f
+# @associativity: the cache associativity,
77c23f
+#         none/direct-mapped/complex(complex cache indexing).
77c23f
+#
77c23f
+# @policy: the write policy, none/write-back/write-through.
77c23f
+#
77c23f
+# @line: the cache Line size in bytes.
77c23f
+#
77c23f
+# Since: 5.0
77c23f
+##
77c23f
+{ 'struct': 'NumaHmatCacheOptions',
77c23f
+  'data': {
77c23f
+   'node-id': 'uint32',
77c23f
+   'size': 'size',
77c23f
+   'level': 'uint8',
77c23f
+   'associativity': 'HmatCacheAssociativity',
77c23f
+   'policy': 'HmatCacheWritePolicy',
77c23f
+   'line': 'uint16' }}
77c23f
+
77c23f
+##
77c23f
 # @HostMemPolicy:
77c23f
 #
77c23f
 # Host memory policy types
77c23f
diff --git a/qemu-options.hx b/qemu-options.hx
77c23f
index 86d9d8a..8fe05b6 100644
77c23f
--- a/qemu-options.hx
77c23f
+++ b/qemu-options.hx
77c23f
@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
77c23f
     "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
77c23f
     "-numa dist,src=source,dst=destination,val=distance\n"
77c23f
     "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
77c23f
-    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
77c23f
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
77c23f
+    "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
77c23f
     QEMU_ARCH_ALL)
77c23f
 STEXI
77c23f
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
77c23f
@@ -177,6 +178,7 @@ STEXI
77c23f
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
77c23f
 @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
77c23f
 @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
77c23f
+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
77c23f
 @findex -numa
77c23f
 Define a NUMA node and assign RAM and VCPUs to it.
77c23f
 Set the NUMA distance from a source node to a destination node.
77c23f
@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
77c23f
 Note that if latency or bandwidth value is 0, means the corresponding latency or
77c23f
 bandwidth information is not provided.
77c23f
 
77c23f
+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
77c23f
+@var{size} is the size of memory side cache in bytes. @var{level} is the cache
77c23f
+level described in this structure, note that the cache level 0 should not be used
77c23f
+with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
77c23f
+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
77c23f
+@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
77c23f
+
77c23f
 For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
77c23f
 a ram, node 1 has only a ram. The processors in node 0 access memory in node
77c23f
 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
77c23f
 The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
77c23f
 nanoseconds, access-bandwidth is 100 MB/s.
77c23f
+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
77c23f
+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
77c23f
 @example
77c23f
 -machine hmat=on \
77c23f
 -m 2G \
77c23f
@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s.
77c23f
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
77c23f
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
77c23f
 -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
77c23f
--numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
77c23f
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
77c23f
+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
77c23f
+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
77c23f
 @end example
77c23f
 
77c23f
 ETEXI
77c23f
-- 
77c23f
1.8.3.1
77c23f