yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch

ddf19c
From 8cd3544b1347b248b9d04eb3d6c9b9bde3a13655 Mon Sep 17 00:00:00 2001
ddf19c
From: "plai@redhat.com" <plai@redhat.com>
ddf19c
Date: Thu, 21 May 2020 23:56:49 +0100
ddf19c
Subject: [PATCH 06/12] numa: Extend CLI to provide memory side cache
ddf19c
 information
ddf19c
ddf19c
RH-Author: plai@redhat.com
ddf19c
Message-id: <20200521235655.27141-6-plai@redhat.com>
ddf19c
Patchwork-id: 96740
ddf19c
O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 05/11] numa: Extend CLI to provide memory side cache information
ddf19c
Bugzilla: 1600217
ddf19c
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
ddf19c
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
ddf19c
ddf19c
From: Liu Jingqi <jingqi.liu@intel.com>
ddf19c
ddf19c
Add -numa hmat-cache option to provide Memory Side Cache Information.
ddf19c
These memory attributes help to build Memory Side Cache Information
ddf19c
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
ddf19c
Before using hmat-cache option, enable HMAT with -machine hmat=on.
ddf19c
ddf19c
Acked-by: Markus Armbruster <armbru@redhat.com>
ddf19c
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
ddf19c
Signed-off-by: Tao Xu <tao3.xu@intel.com>
ddf19c
Message-Id: <20191213011929.2520-4-tao3.xu@intel.com>
ddf19c
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
ddf19c
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
ddf19c
(cherry picked from commit c412a48d4d91e8f8b89aae02de0f44f1f0b729e5)
ddf19c
Signed-off-by: Paul Lai <plai@redhat.com>
ddf19c
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ddf19c
---
ddf19c
 hw/core/numa.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
ddf19c
 include/sysemu/numa.h |  5 ++++
ddf19c
 qapi/machine.json     | 81 +++++++++++++++++++++++++++++++++++++++++++++++++--
ddf19c
 qemu-options.hx       | 17 +++++++++--
ddf19c
 4 files changed, 179 insertions(+), 4 deletions(-)
ddf19c
ddf19c
diff --git a/hw/core/numa.c b/hw/core/numa.c
ddf19c
index 58fe713..0d1b4be 100644
ddf19c
--- a/hw/core/numa.c
ddf19c
+++ b/hw/core/numa.c
ddf19c
@@ -375,6 +375,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
ddf19c
     g_array_append_val(hmat_lb->list, lb_data);
ddf19c
 }
ddf19c
 
ddf19c
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
ddf19c
+                           Error **errp)
ddf19c
+{
ddf19c
+    int nb_numa_nodes = ms->numa_state->num_nodes;
ddf19c
+    NodeInfo *numa_info = ms->numa_state->nodes;
ddf19c
+    NumaHmatCacheOptions *hmat_cache = NULL;
ddf19c
+
ddf19c
+    if (node->node_id >= nb_numa_nodes) {
ddf19c
+        error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
ddf19c
+                   "than %d", node->node_id, nb_numa_nodes);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
ddf19c
+        error_setg(errp, "The latency and bandwidth information of "
ddf19c
+                   "node-id=%" PRIu32 " should be provided before memory side "
ddf19c
+                   "cache attributes", node->node_id);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
ddf19c
+        error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
ddf19c
+                   "and less than or equal to %d", node->level,
ddf19c
+                   HMAT_LB_LEVELS - 1);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
ddf19c
+    assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
ddf19c
+    if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
ddf19c
+        error_setg(errp, "Duplicate configuration of the side cache for "
ddf19c
+                   "node-id=%" PRIu32 " and level=%" PRIu8,
ddf19c
+                   node->node_id, node->level);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    if ((node->level > 1) &&
ddf19c
+        ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
ddf19c
+        (node->size >=
ddf19c
+            ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
ddf19c
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
ddf19c
+                   " should be less than the size(%" PRIu64 ") of "
ddf19c
+                   "level=%u", node->size, node->level,
ddf19c
+                   ms->numa_state->hmat_cache[node->node_id]
ddf19c
+                                             [node->level - 1]->size,
ddf19c
+                   node->level - 1);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    if ((node->level < HMAT_LB_LEVELS - 1) &&
ddf19c
+        ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
ddf19c
+        (node->size <=
ddf19c
+            ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
ddf19c
+        error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
ddf19c
+                   " should be larger than the size(%" PRIu64 ") of "
ddf19c
+                   "level=%u", node->size, node->level,
ddf19c
+                   ms->numa_state->hmat_cache[node->node_id]
ddf19c
+                                             [node->level + 1]->size,
ddf19c
+                   node->level + 1);
ddf19c
+        return;
ddf19c
+    }
ddf19c
+
ddf19c
+    hmat_cache = g_malloc0(sizeof(*hmat_cache));
ddf19c
+    memcpy(hmat_cache, node, sizeof(*hmat_cache));
ddf19c
+    ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
ddf19c
+}
ddf19c
+
ddf19c
 void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
ddf19c
 {
ddf19c
     Error *err = NULL;
ddf19c
@@ -425,6 +492,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
ddf19c
             goto end;
ddf19c
         }
ddf19c
         break;
ddf19c
+    case NUMA_OPTIONS_TYPE_HMAT_CACHE:
ddf19c
+        if (!ms->numa_state->hmat_enabled) {
ddf19c
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
ddf19c
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
ddf19c
+                       "before using any of hmat specific options");
ddf19c
+            return;
ddf19c
+        }
ddf19c
+
ddf19c
+        parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err;;
ddf19c
+        if (err) {
ddf19c
+            goto end;
ddf19c
+        }
ddf19c
+        break;
ddf19c
     default:
ddf19c
         abort();
ddf19c
     }
ddf19c
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
ddf19c
index 70f93c8..ba693cc 100644
ddf19c
--- a/include/sysemu/numa.h
ddf19c
+++ b/include/sysemu/numa.h
ddf19c
@@ -91,6 +91,9 @@ struct NumaState {
ddf19c
 
ddf19c
     /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
ddf19c
     HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
ddf19c
+
ddf19c
+    /* Memory Side Cache Information Structure */
ddf19c
+    NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
ddf19c
 };
ddf19c
 typedef struct NumaState NumaState;
ddf19c
 
ddf19c
@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
ddf19c
 void parse_numa_opts(MachineState *ms);
ddf19c
 void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
ddf19c
                         Error **errp);
ddf19c
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
ddf19c
+                           Error **errp);
ddf19c
 void numa_complete_configuration(MachineState *ms);
ddf19c
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
ddf19c
 extern QemuOptsList qemu_numa_opts;
ddf19c
diff --git a/qapi/machine.json b/qapi/machine.json
ddf19c
index cf8faf5..b3d30bc 100644
ddf19c
--- a/qapi/machine.json
ddf19c
+++ b/qapi/machine.json
ddf19c
@@ -428,10 +428,12 @@
ddf19c
 #
ddf19c
 # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
ddf19c
 #
ddf19c
+# @hmat-cache: memory side cache information (Since: 5.0)
ddf19c
+#
ddf19c
 # Since: 2.1
ddf19c
 ##
ddf19c
 { 'enum': 'NumaOptionsType',
ddf19c
-  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
ddf19c
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
ddf19c
 
ddf19c
 ##
ddf19c
 # @NumaOptions:
ddf19c
@@ -447,7 +449,8 @@
ddf19c
     'node': 'NumaNodeOptions',
ddf19c
     'dist': 'NumaDistOptions',
ddf19c
     'cpu': 'NumaCpuOptions',
ddf19c
-    'hmat-lb': 'NumaHmatLBOptions' }}
ddf19c
+    'hmat-lb': 'NumaHmatLBOptions',
ddf19c
+    'hmat-cache': 'NumaHmatCacheOptions' }}
ddf19c
 
ddf19c
 ##
ddf19c
 # @NumaNodeOptions:
ddf19c
@@ -647,6 +650,80 @@
ddf19c
     '*bandwidth': 'size' }}
ddf19c
 
ddf19c
 ##
ddf19c
+# @HmatCacheAssociativity:
ddf19c
+#
ddf19c
+# Cache associativity in the Memory Side Cache Information Structure
ddf19c
+# of HMAT
ddf19c
+#
ddf19c
+# For more information of @HmatCacheAssociativity, see chapter
ddf19c
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @none: None (no memory side cache in this proximity domain,
ddf19c
+#              or cache associativity unknown)
ddf19c
+#
ddf19c
+# @direct: Direct Mapped
ddf19c
+#
ddf19c
+# @complex: Complex Cache Indexing (implementation specific)
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'enum': 'HmatCacheAssociativity',
ddf19c
+  'data': [ 'none', 'direct', 'complex' ] }
ddf19c
+
ddf19c
+##
ddf19c
+# @HmatCacheWritePolicy:
ddf19c
+#
ddf19c
+# Cache write policy in the Memory Side Cache Information Structure
ddf19c
+# of HMAT
ddf19c
+#
ddf19c
+# For more information of @HmatCacheWritePolicy, see chapter
ddf19c
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @none: None (no memory side cache in this proximity domain,
ddf19c
+#              or cache write policy unknown)
ddf19c
+#
ddf19c
+# @write-back: Write Back (WB)
ddf19c
+#
ddf19c
+# @write-through: Write Through (WT)
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'enum': 'HmatCacheWritePolicy',
ddf19c
+  'data': [ 'none', 'write-back', 'write-through' ] }
ddf19c
+
ddf19c
+##
ddf19c
+# @NumaHmatCacheOptions:
ddf19c
+#
ddf19c
+# Set the memory side cache information for a given memory domain.
ddf19c
+#
ddf19c
+# For more information of @NumaHmatCacheOptions, see chapter
ddf19c
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
ddf19c
+#
ddf19c
+# @node-id: the memory proximity domain to which the memory belongs.
ddf19c
+#
ddf19c
+# @size: the size of memory side cache in bytes.
ddf19c
+#
ddf19c
+# @level: the cache level described in this structure.
ddf19c
+#
ddf19c
+# @associativity: the cache associativity,
ddf19c
+#         none/direct-mapped/complex(complex cache indexing).
ddf19c
+#
ddf19c
+# @policy: the write policy, none/write-back/write-through.
ddf19c
+#
ddf19c
+# @line: the cache Line size in bytes.
ddf19c
+#
ddf19c
+# Since: 5.0
ddf19c
+##
ddf19c
+{ 'struct': 'NumaHmatCacheOptions',
ddf19c
+  'data': {
ddf19c
+   'node-id': 'uint32',
ddf19c
+   'size': 'size',
ddf19c
+   'level': 'uint8',
ddf19c
+   'associativity': 'HmatCacheAssociativity',
ddf19c
+   'policy': 'HmatCacheWritePolicy',
ddf19c
+   'line': 'uint16' }}
ddf19c
+
ddf19c
+##
ddf19c
 # @HostMemPolicy:
ddf19c
 #
ddf19c
 # Host memory policy types
ddf19c
diff --git a/qemu-options.hx b/qemu-options.hx
ddf19c
index 86d9d8a..8fe05b6 100644
ddf19c
--- a/qemu-options.hx
ddf19c
+++ b/qemu-options.hx
ddf19c
@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
ddf19c
     "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
ddf19c
     "-numa dist,src=source,dst=destination,val=distance\n"
ddf19c
     "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
ddf19c
-    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
ddf19c
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
ddf19c
+    "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
ddf19c
     QEMU_ARCH_ALL)
ddf19c
 STEXI
ddf19c
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
ddf19c
@@ -177,6 +178,7 @@ STEXI
ddf19c
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
ddf19c
 @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
ddf19c
 @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
ddf19c
+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
ddf19c
 @findex -numa
ddf19c
 Define a NUMA node and assign RAM and VCPUs to it.
ddf19c
 Set the NUMA distance from a source node to a destination node.
ddf19c
@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
ddf19c
 Note that if latency or bandwidth value is 0, means the corresponding latency or
ddf19c
 bandwidth information is not provided.
ddf19c
 
ddf19c
+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
ddf19c
+@var{size} is the size of memory side cache in bytes. @var{level} is the cache
ddf19c
+level described in this structure, note that the cache level 0 should not be used
ddf19c
+with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
ddf19c
+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
ddf19c
+@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
ddf19c
+
ddf19c
 For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
ddf19c
 a ram, node 1 has only a ram. The processors in node 0 access memory in node
ddf19c
 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
ddf19c
 The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
ddf19c
 nanoseconds, access-bandwidth is 100 MB/s.
ddf19c
+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
ddf19c
+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
ddf19c
 @example
ddf19c
 -machine hmat=on \
ddf19c
 -m 2G \
ddf19c
@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s.
ddf19c
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
ddf19c
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
ddf19c
 -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
ddf19c
--numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
ddf19c
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
ddf19c
+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
ddf19c
+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
ddf19c
 @end example
ddf19c
 
ddf19c
 ETEXI
ddf19c
-- 
ddf19c
1.8.3.1
ddf19c