yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch

902636
From 8cd3544b1347b248b9d04eb3d6c9b9bde3a13655 Mon Sep 17 00:00:00 2001
902636
From: "plai@redhat.com" <plai@redhat.com>
902636
Date: Thu, 21 May 2020 23:56:49 +0100
902636
Subject: [PATCH 06/12] numa: Extend CLI to provide memory side cache
902636
 information
902636
902636
RH-Author: plai@redhat.com
902636
Message-id: <20200521235655.27141-6-plai@redhat.com>
902636
Patchwork-id: 96740
902636
O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 05/11] numa: Extend CLI to provide memory side cache information
902636
Bugzilla: 1600217
902636
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
902636
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
902636
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
902636
902636
From: Liu Jingqi <jingqi.liu@intel.com>
902636
902636
Add -numa hmat-cache option to provide Memory Side Cache Information.
902636
These memory attributes help to build Memory Side Cache Information
902636
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
902636
Before using hmat-cache option, enable HMAT with -machine hmat=on.
902636
902636
Acked-by: Markus Armbruster <armbru@redhat.com>
902636
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
902636
Signed-off-by: Tao Xu <tao3.xu@intel.com>
902636
Message-Id: <20191213011929.2520-4-tao3.xu@intel.com>
902636
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
902636
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
902636
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
902636
(cherry picked from commit c412a48d4d91e8f8b89aae02de0f44f1f0b729e5)
902636
Signed-off-by: Paul Lai <plai@redhat.com>
902636
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
902636
---
902636
 hw/core/numa.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
902636
 include/sysemu/numa.h |  5 ++++
902636
 qapi/machine.json     | 81 +++++++++++++++++++++++++++++++++++++++++++++++++--
902636
 qemu-options.hx       | 17 +++++++++--
902636
 4 files changed, 179 insertions(+), 4 deletions(-)
902636
902636
diff --git a/hw/core/numa.c b/hw/core/numa.c
902636
index 58fe713..0d1b4be 100644
902636
--- a/hw/core/numa.c
902636
+++ b/hw/core/numa.c
902636
@@ -375,6 +375,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
902636
     g_array_append_val(hmat_lb->list, lb_data);
902636
 }
902636
 
902636
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
902636
+                           Error **errp)
902636
+{
902636
+    int nb_numa_nodes = ms->numa_state->num_nodes;
902636
+    NodeInfo *numa_info = ms->numa_state->nodes;
902636
+    NumaHmatCacheOptions *hmat_cache = NULL;
902636
+
902636
+    if (node->node_id >= nb_numa_nodes) {
902636
+        error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
902636
+                   "than %d", node->node_id, nb_numa_nodes);
902636
+        return;
902636
+    }
902636
+
902636
+    if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
902636
+        error_setg(errp, "The latency and bandwidth information of "
902636
+                   "node-id=%" PRIu32 " should be provided before memory side "
902636
+                   "cache attributes", node->node_id);
902636
+        return;
902636
+    }
902636
+
902636
+    if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
902636
+        error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
902636
+                   "and less than or equal to %d", node->level,
902636
+                   HMAT_LB_LEVELS - 1);
902636
+        return;
902636
+    }
902636
+
902636
+    assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
902636
+    assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
902636
+    if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
902636
+        error_setg(errp, "Duplicate configuration of the side cache for "
902636
+                   "node-id=%" PRIu32 " and level=%" PRIu8,
902636
+                   node->node_id, node->level);
902636
+        return;
902636
+    }
902636
+
902636
+    if ((node->level > 1) &&
902636
+        ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
902636
+        (node->size >=
902636
+            ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
902636
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
902636
+                   " should be less than the size(%" PRIu64 ") of "
902636
+                   "level=%u", node->size, node->level,
902636
+                   ms->numa_state->hmat_cache[node->node_id]
902636
+                                             [node->level - 1]->size,
902636
+                   node->level - 1);
902636
+        return;
902636
+    }
902636
+
902636
+    if ((node->level < HMAT_LB_LEVELS - 1) &&
902636
+        ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
902636
+        (node->size <=
902636
+            ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
902636
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
902636
+                   " should be larger than the size(%" PRIu64 ") of "
902636
+                   "level=%u", node->size, node->level,
902636
+                   ms->numa_state->hmat_cache[node->node_id]
902636
+                                             [node->level + 1]->size,
902636
+                   node->level + 1);
902636
+        return;
902636
+    }
902636
+
902636
+    hmat_cache = g_malloc0(sizeof(*hmat_cache));
902636
+    memcpy(hmat_cache, node, sizeof(*hmat_cache));
902636
+    ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
902636
+}
902636
+
902636
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
902636
 {
902636
     Error *err = NULL;
902636
@@ -425,6 +492,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
902636
             goto end;
902636
         }
902636
         break;
902636
+    case NUMA_OPTIONS_TYPE_HMAT_CACHE:
902636
+        if (!ms->numa_state->hmat_enabled) {
902636
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
902636
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
902636
+                       "before using any of hmat specific options");
902636
+            return;
902636
+        }
902636
+
902636
+        parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err;;
902636
+        if (err) {
902636
+            goto end;
902636
+        }
902636
+        break;
902636
     default:
902636
         abort();
902636
     }
902636
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
902636
index 70f93c8..ba693cc 100644
902636
--- a/include/sysemu/numa.h
902636
+++ b/include/sysemu/numa.h
902636
@@ -91,6 +91,9 @@ struct NumaState {
902636
 
902636
     /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
902636
     HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
902636
+
902636
+    /* Memory Side Cache Information Structure */
902636
+    NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
902636
 };
902636
 typedef struct NumaState NumaState;
902636
 
902636
@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
902636
 void parse_numa_opts(MachineState *ms);
902636
 void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
902636
                         Error **errp);
902636
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
902636
+                           Error **errp);
902636
 void numa_complete_configuration(MachineState *ms);
902636
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
902636
 extern QemuOptsList qemu_numa_opts;
902636
diff --git a/qapi/machine.json b/qapi/machine.json
902636
index cf8faf5..b3d30bc 100644
902636
--- a/qapi/machine.json
902636
+++ b/qapi/machine.json
902636
@@ -428,10 +428,12 @@
902636
 #
902636
 # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
902636
 #
902636
+# @hmat-cache: memory side cache information (Since: 5.0)
902636
+#
902636
 # Since: 2.1
902636
 ##
902636
 { 'enum': 'NumaOptionsType',
902636
-  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
902636
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
902636
 
902636
 ##
902636
 # @NumaOptions:
902636
@@ -447,7 +449,8 @@
902636
     'node': 'NumaNodeOptions',
902636
     'dist': 'NumaDistOptions',
902636
     'cpu': 'NumaCpuOptions',
902636
-    'hmat-lb': 'NumaHmatLBOptions' }}
902636
+    'hmat-lb': 'NumaHmatLBOptions',
902636
+    'hmat-cache': 'NumaHmatCacheOptions' }}
902636
 
902636
 ##
902636
 # @NumaNodeOptions:
902636
@@ -647,6 +650,80 @@
902636
     '*bandwidth': 'size' }}
902636
 
902636
 ##
902636
+# @HmatCacheAssociativity:
902636
+#
902636
+# Cache associativity in the Memory Side Cache Information Structure
902636
+# of HMAT
902636
+#
902636
+# For more information of @HmatCacheAssociativity, see chapter
902636
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
902636
+#
902636
+# @none: None (no memory side cache in this proximity domain,
902636
+#              or cache associativity unknown)
902636
+#
902636
+# @direct: Direct Mapped
902636
+#
902636
+# @complex: Complex Cache Indexing (implementation specific)
902636
+#
902636
+# Since: 5.0
902636
+##
902636
+{ 'enum': 'HmatCacheAssociativity',
902636
+  'data': [ 'none', 'direct', 'complex' ] }
902636
+
902636
+##
902636
+# @HmatCacheWritePolicy:
902636
+#
902636
+# Cache write policy in the Memory Side Cache Information Structure
902636
+# of HMAT
902636
+#
902636
+# For more information of @HmatCacheWritePolicy, see chapter
902636
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
902636
+#
902636
+# @none: None (no memory side cache in this proximity domain,
902636
+#              or cache write policy unknown)
902636
+#
902636
+# @write-back: Write Back (WB)
902636
+#
902636
+# @write-through: Write Through (WT)
902636
+#
902636
+# Since: 5.0
902636
+##
902636
+{ 'enum': 'HmatCacheWritePolicy',
902636
+  'data': [ 'none', 'write-back', 'write-through' ] }
902636
+
902636
+##
902636
+# @NumaHmatCacheOptions:
902636
+#
902636
+# Set the memory side cache information for a given memory domain.
902636
+#
902636
+# For more information of @NumaHmatCacheOptions, see chapter
902636
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
902636
+#
902636
+# @node-id: the memory proximity domain to which the memory belongs.
902636
+#
902636
+# @size: the size of memory side cache in bytes.
902636
+#
902636
+# @level: the cache level described in this structure.
902636
+#
902636
+# @associativity: the cache associativity,
902636
+#         none/direct-mapped/complex(complex cache indexing).
902636
+#
902636
+# @policy: the write policy, none/write-back/write-through.
902636
+#
902636
+# @line: the cache Line size in bytes.
902636
+#
902636
+# Since: 5.0
902636
+##
902636
+{ 'struct': 'NumaHmatCacheOptions',
902636
+  'data': {
902636
+   'node-id': 'uint32',
902636
+   'size': 'size',
902636
+   'level': 'uint8',
902636
+   'associativity': 'HmatCacheAssociativity',
902636
+   'policy': 'HmatCacheWritePolicy',
902636
+   'line': 'uint16' }}
902636
+
902636
+##
902636
 # @HostMemPolicy:
902636
 #
902636
 # Host memory policy types
902636
diff --git a/qemu-options.hx b/qemu-options.hx
902636
index 86d9d8a..8fe05b6 100644
902636
--- a/qemu-options.hx
902636
+++ b/qemu-options.hx
902636
@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
902636
     "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
902636
     "-numa dist,src=source,dst=destination,val=distance\n"
902636
     "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
902636
-    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
902636
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
902636
+    "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
902636
     QEMU_ARCH_ALL)
902636
 STEXI
902636
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
902636
@@ -177,6 +178,7 @@ STEXI
902636
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
902636
 @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
902636
 @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
902636
+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
902636
 @findex -numa
902636
 Define a NUMA node and assign RAM and VCPUs to it.
902636
 Set the NUMA distance from a source node to a destination node.
902636
@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
902636
 Note that if latency or bandwidth value is 0, means the corresponding latency or
902636
 bandwidth information is not provided.
902636
 
902636
+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
902636
+@var{size} is the size of memory side cache in bytes. @var{level} is the cache
902636
+level described in this structure, note that the cache level 0 should not be used
902636
+with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
902636
+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
902636
+@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
902636
+
902636
 For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
902636
 a ram, node 1 has only a ram. The processors in node 0 access memory in node
902636
 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
902636
 The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
902636
 nanoseconds, access-bandwidth is 100 MB/s.
902636
+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
902636
+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
902636
 @example
902636
 -machine hmat=on \
902636
 -m 2G \
902636
@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s.
902636
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
902636
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
902636
 -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
902636
--numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
902636
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
902636
+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
902636
+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
902636
 @end example
902636
 
902636
 ETEXI
902636
-- 
902636
1.8.3.1
902636