|
|
703296 |
From 70c9d989107c6ac964bb437c5a4ea6ffe3214e45 Mon Sep 17 00:00:00 2001
|
|
|
703296 |
From: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
703296 |
Date: Mon, 10 Aug 2020 07:52:28 +0200
|
|
|
703296 |
Subject: [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: pause in WaitForSemaphore() before
|
|
|
703296 |
re-fetch
|
|
|
703296 |
MIME-Version: 1.0
|
|
|
703296 |
Content-Type: text/plain; charset=UTF-8
|
|
|
703296 |
Content-Transfer-Encoding: 8bit
|
|
|
703296 |
|
|
|
703296 |
RH-Author: Laszlo Ersek <lersek@redhat.com>
|
|
|
703296 |
Message-id: <20200731141037.1941-2-lersek@redhat.com>
|
|
|
703296 |
Patchwork-id: 98121
|
|
|
703296 |
O-Subject: [RHEL-8.3.0 edk2 PATCH 1/1] UefiCpuPkg/PiSmmCpuDxeSmm: pause in WaitForSemaphore() before re-fetch
|
|
|
703296 |
Bugzilla: 1861718
|
|
|
703296 |
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
|
|
703296 |
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
|
|
|
703296 |
|
|
|
703296 |
Most busy waits (spinlocks) in "UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c"
|
|
|
703296 |
already call CpuPause() in their loop bodies; see SmmWaitForApArrival(),
|
|
|
703296 |
APHandler(), and SmiRendezvous(). However, the "main wait" within
|
|
|
703296 |
APHandler():
|
|
|
703296 |
|
|
|
703296 |
> //
|
|
|
703296 |
> // Wait for something to happen
|
|
|
703296 |
> //
|
|
|
703296 |
> WaitForSemaphore (mSmmMpSyncData->CpuData[CpuIndex].Run);
|
|
|
703296 |
|
|
|
703296 |
doesn't do so, as WaitForSemaphore() keeps trying to acquire the semaphore
|
|
|
703296 |
without pausing.
|
|
|
703296 |
|
|
|
703296 |
The performance impact is especially notable in QEMU/KVM + OVMF
|
|
|
703296 |
virtualization with CPU overcommit (that is, when the guest has
|
|
|
703296 |
significantly more VCPUs than the host has physical CPUs). The guest BSP
|
|
|
703296 |
is working heavily in:
|
|
|
703296 |
|
|
|
703296 |
BSPHandler() [MpService.c]
|
|
|
703296 |
PerformRemainingTasks() [PiSmmCpuDxeSmm.c]
|
|
|
703296 |
SetUefiMemMapAttributes() [SmmCpuMemoryManagement.c]
|
|
|
703296 |
|
|
|
703296 |
while the many guest APs are spinning in the "Wait for something to
|
|
|
703296 |
happen" semaphore acquisition, in APHandler(). The guest APs are
|
|
|
703296 |
generating useless memory traffic and saturating host CPUs, hindering the
|
|
|
703296 |
guest BSP's progress in SetUefiMemMapAttributes().
|
|
|
703296 |
|
|
|
703296 |
Rework the loop in WaitForSemaphore(): call CpuPause() in every iteration
|
|
|
703296 |
after the first check fails. Due to Pause Loop Exiting (known as Pause
|
|
|
703296 |
Filter on AMD), the host scheduler can favor the guest BSP over the guest
|
|
|
703296 |
APs.
|
|
|
703296 |
|
|
|
703296 |
Running a 16 GB RAM + 512 VCPU guest on a 448 PCPU host, this patch
|
|
|
703296 |
reduces OVMF boot time (counted until reaching grub) from 20-30 minutes to
|
|
|
703296 |
less than 4 minutes.
|
|
|
703296 |
|
|
|
703296 |
The patch should benefit physical machines as well -- according to the
|
|
|
703296 |
Intel SDM, PAUSE "Improves the performance of spin-wait loops". Adding
|
|
|
703296 |
PAUSE to the generic WaitForSemaphore() function is considered a general
|
|
|
703296 |
improvement.
|
|
|
703296 |
|
|
|
703296 |
Cc: Eric Dong <eric.dong@intel.com>
|
|
|
703296 |
Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
|
703296 |
Cc: Rahul Kumar <rahul1.kumar@intel.com>
|
|
|
703296 |
Cc: Ray Ni <ray.ni@intel.com>
|
|
|
703296 |
Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1861718
|
|
|
703296 |
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
703296 |
Message-Id: <20200729185217.10084-1-lersek@redhat.com>
|
|
|
703296 |
Reviewed-by: Eric Dong <eric.dong@intel.com>
|
|
|
703296 |
(cherry picked from commit 9001b750df64b25b14ec45a2efa1361a7b96c00a)
|
|
|
703296 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
703296 |
---
|
|
|
703296 |
UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 18 +++++++++++-------
|
|
|
703296 |
1 file changed, 11 insertions(+), 7 deletions(-)
|
|
|
703296 |
|
|
|
703296 |
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
|
|
|
703296 |
index 57e788c..4bcd217 100644
|
|
|
703296 |
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
|
|
|
703296 |
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
|
|
|
703296 |
@@ -40,14 +40,18 @@ WaitForSemaphore (
|
|
|
703296 |
{
|
|
|
703296 |
UINT32 Value;
|
|
|
703296 |
|
|
|
703296 |
- do {
|
|
|
703296 |
+ for (;;) {
|
|
|
703296 |
Value = *Sem;
|
|
|
703296 |
- } while (Value == 0 ||
|
|
|
703296 |
- InterlockedCompareExchange32 (
|
|
|
703296 |
- (UINT32*)Sem,
|
|
|
703296 |
- Value,
|
|
|
703296 |
- Value - 1
|
|
|
703296 |
- ) != Value);
|
|
|
703296 |
+ if (Value != 0 &&
|
|
|
703296 |
+ InterlockedCompareExchange32 (
|
|
|
703296 |
+ (UINT32*)Sem,
|
|
|
703296 |
+ Value,
|
|
|
703296 |
+ Value - 1
|
|
|
703296 |
+ ) == Value) {
|
|
|
703296 |
+ break;
|
|
|
703296 |
+ }
|
|
|
703296 |
+ CpuPause ();
|
|
|
703296 |
+ }
|
|
|
703296 |
return Value - 1;
|
|
|
703296 |
}
|
|
|
703296 |
|
|
|
703296 |
--
|
|
|
703296 |
1.8.3.1
|
|
|
703296 |
|