|
Pablo Greco |
40546a |
From 051451a1b9cefa42ecfd6d27dcb6a12ef49de072 Mon Sep 17 00:00:00 2001
|
|
Pablo Greco |
40546a |
Message-Id: <051451a1b9cefa42ecfd6d27dcb6a12ef49de072@dist-git>
|
|
Pablo Greco |
40546a |
From: Daniel Henrique Barboza <danielhb413@gmail.com>
|
|
Pablo Greco |
40546a |
Date: Fri, 3 May 2019 13:54:51 +0200
|
|
Pablo Greco |
40546a |
Subject: [PATCH] qemu_domain: add a PPC64 memLockLimit helper
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
There is a lot of documentation in the comments about how PPC64 handles
|
|
Pablo Greco |
40546a |
passthrough VFIO devices to calculate the @memLockLimit. And more will
|
|
Pablo Greco |
40546a |
be added with the PPC64 NVLink2 support code.
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes()
|
|
Pablo Greco |
40546a |
body and put it into a helper function. This will simplify the
|
|
Pablo Greco |
40546a |
flow of qemuDomainGetMemLockLimitBytes() that handles all the other
|
|
Pablo Greco |
40546a |
platforms and improves readability of the PPC64 specifics.
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
|
|
Pablo Greco |
40546a |
Reviewed-by: Erik Skultety <eskultet@redhat.com>
|
|
Pablo Greco |
40546a |
(cherry picked from commit 7a686fd2eae8d5674bb1213d8517dc5814fa6bf3)
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
https: //bugzilla.redhat.com/show_bug.cgi?id=1505998
|
|
Pablo Greco |
40546a |
Signed-off-by: Erik Skultety <eskultet@redhat.com>
|
|
Pablo Greco |
40546a |
Message-Id: <ccdf9fefeb624585559606d00b6ac19b574733b3.1556884443.git.eskultet@redhat.com>
|
|
Pablo Greco |
40546a |
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
|
|
Pablo Greco |
40546a |
---
|
|
Pablo Greco |
40546a |
src/qemu/qemu_domain.c | 171 ++++++++++++++++++++++-------------------
|
|
Pablo Greco |
40546a |
1 file changed, 93 insertions(+), 78 deletions(-)
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
|
Pablo Greco |
40546a |
index d936090d87..f91de0b743 100644
|
|
Pablo Greco |
40546a |
--- a/src/qemu/qemu_domain.c
|
|
Pablo Greco |
40546a |
+++ b/src/qemu/qemu_domain.c
|
|
Pablo Greco |
40546a |
@@ -9805,6 +9805,97 @@ qemuDomainUpdateCurrentMemorySize(virQEMUDriverPtr driver,
|
|
Pablo Greco |
40546a |
}
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
+/**
|
|
Pablo Greco |
40546a |
+ * getPPC64MemLockLimitBytes:
|
|
Pablo Greco |
40546a |
+ * @def: domain definition
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * A PPC64 helper that calculates the memory locking limit in order for
|
|
Pablo Greco |
40546a |
+ * the guest to operate properly.
|
|
Pablo Greco |
40546a |
+ */
|
|
Pablo Greco |
40546a |
+static unsigned long long
|
|
Pablo Greco |
40546a |
+getPPC64MemLockLimitBytes(virDomainDefPtr def)
|
|
Pablo Greco |
40546a |
+{
|
|
Pablo Greco |
40546a |
+ unsigned long long memKB = 0;
|
|
Pablo Greco |
40546a |
+ unsigned long long baseLimit = 0;
|
|
Pablo Greco |
40546a |
+ unsigned long long memory = 0;
|
|
Pablo Greco |
40546a |
+ unsigned long long maxMemory = 0;
|
|
Pablo Greco |
40546a |
+ unsigned long long passthroughLimit = 0;
|
|
Pablo Greco |
40546a |
+ size_t i, nPCIHostBridges = 0;
|
|
Pablo Greco |
40546a |
+ bool usesVFIO = false;
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ for (i = 0; i < def->ncontrollers; i++) {
|
|
Pablo Greco |
40546a |
+ virDomainControllerDefPtr cont = def->controllers[i];
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ if (!virDomainControllerIsPSeriesPHB(cont))
|
|
Pablo Greco |
40546a |
+ continue;
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ nPCIHostBridges++;
|
|
Pablo Greco |
40546a |
+ }
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ for (i = 0; i < def->nhostdevs; i++) {
|
|
Pablo Greco |
40546a |
+ virDomainHostdevDefPtr dev = def->hostdevs[i];
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
|
|
Pablo Greco |
40546a |
+ dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
|
|
Pablo Greco |
40546a |
+ dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
|
|
Pablo Greco |
40546a |
+ usesVFIO = true;
|
|
Pablo Greco |
40546a |
+ break;
|
|
Pablo Greco |
40546a |
+ }
|
|
Pablo Greco |
40546a |
+ }
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ memory = virDomainDefGetMemoryTotal(def);
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ if (def->mem.max_memory)
|
|
Pablo Greco |
40546a |
+ maxMemory = def->mem.max_memory;
|
|
Pablo Greco |
40546a |
+ else
|
|
Pablo Greco |
40546a |
+ maxMemory = memory;
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ /* baseLimit := maxMemory / 128 (a)
|
|
Pablo Greco |
40546a |
+ * + 4 MiB * #PHBs + 8 MiB (b)
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * (a) is the hash table
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * (b) is accounting for the 32-bit DMA window - it could be either the
|
|
Pablo Greco |
40546a |
+ * KVM accelerated TCE tables for emulated devices, or the VFIO
|
|
Pablo Greco |
40546a |
+ * userspace view. The 4 MiB per-PHB (including the default one) covers
|
|
Pablo Greco |
40546a |
+ * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
|
|
Pablo Greco |
40546a |
+ * increased to help performance. The 8 MiB extra should be plenty for
|
|
Pablo Greco |
40546a |
+ * the TCE table index for any reasonable number of PHBs and several
|
|
Pablo Greco |
40546a |
+ * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
|
|
Pablo Greco |
40546a |
+ baseLimit = maxMemory / 128 +
|
|
Pablo Greco |
40546a |
+ 4096 * nPCIHostBridges +
|
|
Pablo Greco |
40546a |
+ 8192;
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ /* passthroughLimit := max( 2 GiB * #PHBs, (c)
|
|
Pablo Greco |
40546a |
+ * memory (d)
|
|
Pablo Greco |
40546a |
+ * + memory * 1/512 * #PHBs + 8 MiB ) (e)
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
|
|
Pablo Greco |
40546a |
+ * rather than 1 GiB
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * (d) is the with-DDW (and memory pre-registration and related
|
|
Pablo Greco |
40546a |
+ * features) DMA window accounting - assuming that we only account RAM
|
|
Pablo Greco |
40546a |
+ * once, even if mapped to multiple PHBs
|
|
Pablo Greco |
40546a |
+ *
|
|
Pablo Greco |
40546a |
+ * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
|
|
Pablo Greco |
40546a |
+ * window. This is based a bit on expected guest behaviour, but there
|
|
Pablo Greco |
40546a |
+ * really isn't a way to completely avoid that. We assume the guest
|
|
Pablo Greco |
40546a |
+ * requests a 64-bit DMA window (per PHB) just big enough to map all
|
|
Pablo Greco |
40546a |
+ * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
|
|
Pablo Greco |
40546a |
+ * kiB pages, less still if the guest is mapped with hugepages (unlike
|
|
Pablo Greco |
40546a |
+ * the default 32-bit DMA window, DDW windows can use large IOMMU
|
|
Pablo Greco |
40546a |
+ * pages). 8 MiB is for second and further level overheads, like (b) */
|
|
Pablo Greco |
40546a |
+ if (usesVFIO)
|
|
Pablo Greco |
40546a |
+ passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
|
|
Pablo Greco |
40546a |
+ memory +
|
|
Pablo Greco |
40546a |
+ memory / 512 * nPCIHostBridges + 8192);
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ memKB = baseLimit + passthroughLimit;
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+ return memKB << 10;
|
|
Pablo Greco |
40546a |
+}
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
+
|
|
Pablo Greco |
40546a |
/**
|
|
Pablo Greco |
40546a |
* qemuDomainGetMemLockLimitBytes:
|
|
Pablo Greco |
40546a |
* @def: domain definition
|
|
Pablo Greco |
40546a |
@@ -9836,84 +9927,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def)
|
|
Pablo Greco |
40546a |
if (def->mem.locked)
|
|
Pablo Greco |
40546a |
return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
- if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) {
|
|
Pablo Greco |
40546a |
- unsigned long long maxMemory;
|
|
Pablo Greco |
40546a |
- unsigned long long memory;
|
|
Pablo Greco |
40546a |
- unsigned long long baseLimit;
|
|
Pablo Greco |
40546a |
- unsigned long long passthroughLimit = 0;
|
|
Pablo Greco |
40546a |
- size_t nPCIHostBridges = 0;
|
|
Pablo Greco |
40546a |
- bool usesVFIO = false;
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- for (i = 0; i < def->ncontrollers; i++) {
|
|
Pablo Greco |
40546a |
- virDomainControllerDefPtr cont = def->controllers[i];
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- if (!virDomainControllerIsPSeriesPHB(cont))
|
|
Pablo Greco |
40546a |
- continue;
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- nPCIHostBridges++;
|
|
Pablo Greco |
40546a |
- }
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- for (i = 0; i < def->nhostdevs; i++) {
|
|
Pablo Greco |
40546a |
- virDomainHostdevDefPtr dev = def->hostdevs[i];
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
|
|
Pablo Greco |
40546a |
- dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
|
|
Pablo Greco |
40546a |
- dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
|
|
Pablo Greco |
40546a |
- usesVFIO = true;
|
|
Pablo Greco |
40546a |
- break;
|
|
Pablo Greco |
40546a |
- }
|
|
Pablo Greco |
40546a |
- }
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- memory = virDomainDefGetMemoryTotal(def);
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- if (def->mem.max_memory)
|
|
Pablo Greco |
40546a |
- maxMemory = def->mem.max_memory;
|
|
Pablo Greco |
40546a |
- else
|
|
Pablo Greco |
40546a |
- maxMemory = memory;
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- /* baseLimit := maxMemory / 128 (a)
|
|
Pablo Greco |
40546a |
- * + 4 MiB * #PHBs + 8 MiB (b)
|
|
Pablo Greco |
40546a |
- *
|
|
Pablo Greco |
40546a |
- * (a) is the hash table
|
|
Pablo Greco |
40546a |
- *
|
|
Pablo Greco |
40546a |
- * (b) is accounting for the 32-bit DMA window - it could be either the
|
|
Pablo Greco |
40546a |
- * KVM accelerated TCE tables for emulated devices, or the VFIO
|
|
Pablo Greco |
40546a |
- * userspace view. The 4 MiB per-PHB (including the default one) covers
|
|
Pablo Greco |
40546a |
- * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
|
|
Pablo Greco |
40546a |
- * increased to help performance. The 8 MiB extra should be plenty for
|
|
Pablo Greco |
40546a |
- * the TCE table index for any reasonable number of PHBs and several
|
|
Pablo Greco |
40546a |
- * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
|
|
Pablo Greco |
40546a |
- baseLimit = maxMemory / 128 +
|
|
Pablo Greco |
40546a |
- 4096 * nPCIHostBridges +
|
|
Pablo Greco |
40546a |
- 8192;
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- /* passthroughLimit := max( 2 GiB * #PHBs, (c)
|
|
Pablo Greco |
40546a |
- * memory (d)
|
|
Pablo Greco |
40546a |
- * + memory * 1/512 * #PHBs + 8 MiB ) (e)
|
|
Pablo Greco |
40546a |
- *
|
|
Pablo Greco |
40546a |
- * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
|
|
Pablo Greco |
40546a |
- * rather than 1 GiB
|
|
Pablo Greco |
40546a |
- *
|
|
Pablo Greco |
40546a |
- * (d) is the with-DDW (and memory pre-registration and related
|
|
Pablo Greco |
40546a |
- * features) DMA window accounting - assuming that we only account RAM
|
|
Pablo Greco |
40546a |
- * once, even if mapped to multiple PHBs
|
|
Pablo Greco |
40546a |
- *
|
|
Pablo Greco |
40546a |
- * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
|
|
Pablo Greco |
40546a |
- * window. This is based a bit on expected guest behaviour, but there
|
|
Pablo Greco |
40546a |
- * really isn't a way to completely avoid that. We assume the guest
|
|
Pablo Greco |
40546a |
- * requests a 64-bit DMA window (per PHB) just big enough to map all
|
|
Pablo Greco |
40546a |
- * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
|
|
Pablo Greco |
40546a |
- * kiB pages, less still if the guest is mapped with hugepages (unlike
|
|
Pablo Greco |
40546a |
- * the default 32-bit DMA window, DDW windows can use large IOMMU
|
|
Pablo Greco |
40546a |
- * pages). 8 MiB is for second and further level overheads, like (b) */
|
|
Pablo Greco |
40546a |
- if (usesVFIO)
|
|
Pablo Greco |
40546a |
- passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
|
|
Pablo Greco |
40546a |
- memory +
|
|
Pablo Greco |
40546a |
- memory / 512 * nPCIHostBridges + 8192);
|
|
Pablo Greco |
40546a |
-
|
|
Pablo Greco |
40546a |
- memKB = baseLimit + passthroughLimit;
|
|
Pablo Greco |
40546a |
- goto done;
|
|
Pablo Greco |
40546a |
- }
|
|
Pablo Greco |
40546a |
+ if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
|
|
Pablo Greco |
40546a |
+ return getPPC64MemLockLimitBytes(def);
|
|
Pablo Greco |
40546a |
|
|
Pablo Greco |
40546a |
/* For device passthrough using VFIO the guest memory and MMIO memory
|
|
Pablo Greco |
40546a |
* regions need to be locked persistent in order to allow DMA.
|
|
Pablo Greco |
40546a |
--
|
|
Pablo Greco |
40546a |
2.21.0
|
|
Pablo Greco |
40546a |
|