Blame SOURCES/kvm-pc-acpi-revert-back-to-1-SRAT-entry-for-hotpluggable.patch

1bdc94
From cffbabd734488678614832ff26222aaa10920472 Mon Sep 17 00:00:00 2001
1bdc94
From: Igor Mammedov <imammedo@redhat.com>
1bdc94
Date: Wed, 12 Sep 2018 15:21:41 +0200
1bdc94
Subject: [PATCH 06/49] pc: acpi: revert back to 1 SRAT entry for hotpluggable
1bdc94
 area
1bdc94
1bdc94
RH-Author: Igor Mammedov <imammedo@redhat.com>
1bdc94
Message-id: <1536765701-266415-1-git-send-email-imammedo@redhat.com>
1bdc94
Patchwork-id: 82145
1bdc94
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area
1bdc94
Bugzilla: 1626059
1bdc94
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
1bdc94
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
1bdc94
1bdc94
Commit
1bdc94
  10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT entry size"
1bdc94
attemped to fix hotplug regression introduced by
1bdc94
  848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM devices"
1bdc94
1bdc94
fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
1bdc94
kernels (RHEL6) to the point where guest might crash at boot.
1bdc94
Reason is that 2.6 kernel discards SRAT table due too small last entry
1bdc94
which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
1bdc94
not ACPI spec compliant according to which whole possible RAM should be
1bdc94
described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.
1bdc94
1bdc94
With 10efd7e108 reverted, I've also tried splitting SRAT table statically
1bdc94
in different ways %/node and %/slot but Windows still fails to online
1bdc94
2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
1bdc94
sometimes even coldplugged pc-dimms where affected with static SRAT
1bdc94
partitioning.
1bdc94
The only known so far way where Windows stays happy is when we have 1
1bdc94
SRAT entry in the last node covering all hotplug area.
1bdc94
1bdc94
Revert 848a1cc1e until we come up with a way to avoid regression
1bdc94
on Windows with hotplug area split in several entries.
1bdc94
Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
1bdc94
1bdc94
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
1bdc94
Acked-by: Laszlo Ersek <lersek@redhat.com>
1bdc94
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
1bdc94
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
(pull request  https://www.mail-archive.com/qemu-devel@nongnu.org/msg560638.html)
1bdc94
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
1bdc94
Conflicts:
1bdc94
  hw/i386/acpi-build.c
1bdc94
    - contextual conflict since we do not have
1bdc94
         (d471bf3e hw/i386: Use the IEC binary prefix definitions)
1bdc94
         (b0c14ec4 machine: make MemoryHotplugState accessible via the machine)
1bdc94
1bdc94
Since it's blocker and urgent, sending patch without waiting for upstream
1bdc94
commit id (pull req is out there but it looks like Peter is absent to merge it)
1bdc94
---
1bdc94
 hw/i386/acpi-build.c | 74 +++++++++-------------------------------------------
1bdc94
 1 file changed, 13 insertions(+), 61 deletions(-)
1bdc94
1bdc94
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
1bdc94
index 683e5f4..2bc8117 100644
1bdc94
--- a/hw/i386/acpi-build.c
1bdc94
+++ b/hw/i386/acpi-build.c
1bdc94
@@ -2253,64 +2253,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
1bdc94
 #define HOLE_640K_START  (640 * 1024)
1bdc94
 #define HOLE_640K_END   (1024 * 1024)
1bdc94
 
1bdc94
-static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
1bdc94
-                                           uint64_t len, int default_node)
1bdc94
-{
1bdc94
-    MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list();
1bdc94
-    MemoryDeviceInfoList *info;
1bdc94
-    MemoryDeviceInfo *mi;
1bdc94
-    PCDIMMDeviceInfo *di;
1bdc94
-    uint64_t end = base + len, cur, size;
1bdc94
-    bool is_nvdimm;
1bdc94
-    AcpiSratMemoryAffinity *numamem;
1bdc94
-    MemoryAffinityFlags flags;
1bdc94
-
1bdc94
-    for (cur = base, info = info_list;
1bdc94
-         cur < end;
1bdc94
-         cur += size, info = info->next) {
1bdc94
-        numamem = acpi_data_push(table_data, sizeof *numamem);
1bdc94
-
1bdc94
-        if (!info) {
1bdc94
-            /*
1bdc94
-             * Entry is required for Windows to enable memory hotplug in OS
1bdc94
-             * and for Linux to enable SWIOTLB when booted with less than
1bdc94
-             * 4G of RAM. Windows works better if the entry sets proximity
1bdc94
-             * to the highest NUMA node in the machine at the end of the
1bdc94
-             * reserved space.
1bdc94
-             * Memory devices may override proximity set by this entry,
1bdc94
-             * providing _PXM method if necessary.
1bdc94
-             */
1bdc94
-            build_srat_memory(numamem, end - 1, 1, default_node,
1bdc94
-                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
1bdc94
-            break;
1bdc94
-        }
1bdc94
-
1bdc94
-        mi = info->value;
1bdc94
-        is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
1bdc94
-        di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
1bdc94
-
1bdc94
-        if (cur < di->addr) {
1bdc94
-            build_srat_memory(numamem, cur, di->addr - cur, default_node,
1bdc94
-                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
1bdc94
-            numamem = acpi_data_push(table_data, sizeof *numamem);
1bdc94
-        }
1bdc94
-
1bdc94
-        size = di->size;
1bdc94
-
1bdc94
-        flags = MEM_AFFINITY_ENABLED;
1bdc94
-        if (di->hotpluggable) {
1bdc94
-            flags |= MEM_AFFINITY_HOTPLUGGABLE;
1bdc94
-        }
1bdc94
-        if (is_nvdimm) {
1bdc94
-            flags |= MEM_AFFINITY_NON_VOLATILE;
1bdc94
-        }
1bdc94
-
1bdc94
-        build_srat_memory(numamem, di->addr, size, di->node, flags);
1bdc94
-    }
1bdc94
-
1bdc94
-    qapi_free_MemoryDeviceInfoList(info_list);
1bdc94
-}
1bdc94
-
1bdc94
 static void
1bdc94
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
1bdc94
 {
1bdc94
@@ -2413,10 +2355,20 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
1bdc94
         build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
1bdc94
     }
1bdc94
 
1bdc94
+    /*
1bdc94
+     * Entry is required for Windows to enable memory hotplug in OS
1bdc94
+     * and for Linux to enable SWIOTLB when booted with less than
1bdc94
+     * 4G of RAM. Windows works better if the entry sets proximity
1bdc94
+     * to the highest NUMA node in the machine at the end of the
1bdc94
+     * reserved space.
1bdc94
+     * Memory devices may override proximity set by this entry,
1bdc94
+     * providing _PXM method if necessary.
1bdc94
+     */
1bdc94
     if (hotplugabble_address_space_size) {
1bdc94
-        build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base,
1bdc94
-                                       hotplugabble_address_space_size,
1bdc94
-                                       pcms->numa_nodes - 1);
1bdc94
+        numamem = acpi_data_push(table_data, sizeof *numamem);
1bdc94
+        build_srat_memory(numamem, pcms->hotplug_memory.base,
1bdc94
+                          hotplugabble_address_space_size, pcms->numa_nodes - 1,
1bdc94
+                          MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
1bdc94
     }
1bdc94
 
1bdc94
     build_header(linker, table_data,
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94