From 051451a1b9cefa42ecfd6d27dcb6a12ef49de072 Mon Sep 17 00:00:00 2001 Message-Id: <051451a1b9cefa42ecfd6d27dcb6a12ef49de072@dist-git> From: Daniel Henrique Barboza Date: Fri, 3 May 2019 13:54:51 +0200 Subject: [PATCH] qemu_domain: add a PPC64 memLockLimit helper There is a lot of documentation in the comments about how PPC64 handles passthrough VFIO devices to calculate the @memLockLimit. And more will be added with the PPC64 NVLink2 support code. Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes() body and put it into a helper function. This will simplify the flow of qemuDomainGetMemLockLimitBytes() that handles all the other platforms and improves readability of the PPC64 specifics. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Erik Skultety (cherry picked from commit 7a686fd2eae8d5674bb1213d8517dc5814fa6bf3) https: //bugzilla.redhat.com/show_bug.cgi?id=1505998 Signed-off-by: Erik Skultety Message-Id: Reviewed-by: Andrea Bolognani --- src/qemu/qemu_domain.c | 171 ++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 78 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index d936090d87..f91de0b743 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9805,6 +9805,97 @@ qemuDomainUpdateCurrentMemorySize(virQEMUDriverPtr driver, } +/** + * getPPC64MemLockLimitBytes: + * @def: domain definition + * + * A PPC64 helper that calculates the memory locking limit in order for + * the guest to operate properly. + */ +static unsigned long long +getPPC64MemLockLimitBytes(virDomainDefPtr def) +{ + unsigned long long memKB = 0; + unsigned long long baseLimit = 0; + unsigned long long memory = 0; + unsigned long long maxMemory = 0; + unsigned long long passthroughLimit = 0; + size_t i, nPCIHostBridges = 0; + bool usesVFIO = false; + + for (i = 0; i < def->ncontrollers; i++) { + virDomainControllerDefPtr cont = def->controllers[i]; + + if (!virDomainControllerIsPSeriesPHB(cont)) + continue; + + nPCIHostBridges++; + } + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDefPtr dev = def->hostdevs[i]; + + if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + usesVFIO = true; + break; + } + } + + memory = virDomainDefGetMemoryTotal(def); + + if (def->mem.max_memory) + maxMemory = def->mem.max_memory; + else + maxMemory = memory; + + /* baseLimit := maxMemory / 128 (a) + * + 4 MiB * #PHBs + 8 MiB (b) + * + * (a) is the hash table + * + * (b) is accounting for the 32-bit DMA window - it could be either the + * KVM accelerated TCE tables for emulated devices, or the VFIO + * userspace view. The 4 MiB per-PHB (including the default one) covers + * a 2GiB DMA window: default is 1GiB, but it's possible it'll be + * increased to help performance. The 8 MiB extra should be plenty for + * the TCE table index for any reasonable number of PHBs and several + * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */ + baseLimit = maxMemory / 128 + + 4096 * nPCIHostBridges + + 8192; + + /* passthroughLimit := max( 2 GiB * #PHBs, (c) + * memory (d) + * + memory * 1/512 * #PHBs + 8 MiB ) (e) + * + * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB + * rather than 1 GiB + * + * (d) is the with-DDW (and memory pre-registration and related + * features) DMA window accounting - assuming that we only account RAM + * once, even if mapped to multiple PHBs + * + * (e) is the with-DDW userspace view and overhead for the 64-bit DMA + * window. This is based a bit on expected guest behaviour, but there + * really isn't a way to completely avoid that. We assume the guest + * requests a 64-bit DMA window (per PHB) just big enough to map all + * its RAM. 4 kiB page size gives the 1/512; it will be less with 64 + * kiB pages, less still if the guest is mapped with hugepages (unlike + * the default 32-bit DMA window, DDW windows can use large IOMMU + * pages). 8 MiB is for second and further level overheads, like (b) */ + if (usesVFIO) + passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges, + memory + + memory / 512 * nPCIHostBridges + 8192); + + memKB = baseLimit + passthroughLimit; + + return memKB << 10; +} + + /** * qemuDomainGetMemLockLimitBytes: * @def: domain definition @@ -9836,84 +9927,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def) if (def->mem.locked) return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; - if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) { - unsigned long long maxMemory; - unsigned long long memory; - unsigned long long baseLimit; - unsigned long long passthroughLimit = 0; - size_t nPCIHostBridges = 0; - bool usesVFIO = false; - - for (i = 0; i < def->ncontrollers; i++) { - virDomainControllerDefPtr cont = def->controllers[i]; - - if (!virDomainControllerIsPSeriesPHB(cont)) - continue; - - nPCIHostBridges++; - } - - for (i = 0; i < def->nhostdevs; i++) { - virDomainHostdevDefPtr dev = def->hostdevs[i]; - - if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && - dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && - dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - usesVFIO = true; - break; - } - } - - memory = virDomainDefGetMemoryTotal(def); - - if (def->mem.max_memory) - maxMemory = def->mem.max_memory; - else - maxMemory = memory; - - /* baseLimit := maxMemory / 128 (a) - * + 4 MiB * #PHBs + 8 MiB (b) - * - * (a) is the hash table - * - * (b) is accounting for the 32-bit DMA window - it could be either the - * KVM accelerated TCE tables for emulated devices, or the VFIO - * userspace view. The 4 MiB per-PHB (including the default one) covers - * a 2GiB DMA window: default is 1GiB, but it's possible it'll be - * increased to help performance. The 8 MiB extra should be plenty for - * the TCE table index for any reasonable number of PHBs and several - * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */ - baseLimit = maxMemory / 128 + - 4096 * nPCIHostBridges + - 8192; - - /* passthroughLimit := max( 2 GiB * #PHBs, (c) - * memory (d) - * + memory * 1/512 * #PHBs + 8 MiB ) (e) - * - * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB - * rather than 1 GiB - * - * (d) is the with-DDW (and memory pre-registration and related - * features) DMA window accounting - assuming that we only account RAM - * once, even if mapped to multiple PHBs - * - * (e) is the with-DDW userspace view and overhead for the 64-bit DMA - * window. This is based a bit on expected guest behaviour, but there - * really isn't a way to completely avoid that. We assume the guest - * requests a 64-bit DMA window (per PHB) just big enough to map all - * its RAM. 4 kiB page size gives the 1/512; it will be less with 64 - * kiB pages, less still if the guest is mapped with hugepages (unlike - * the default 32-bit DMA window, DDW windows can use large IOMMU - * pages). 8 MiB is for second and further level overheads, like (b) */ - if (usesVFIO) - passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges, - memory + - memory / 512 * nPCIHostBridges + 8192); - - memKB = baseLimit + passthroughLimit; - goto done; - } + if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) + return getPPC64MemLockLimitBytes(def); /* For device passthrough using VFIO the guest memory and MMIO memory * regions need to be locked persistent in order to allow DMA. -- 2.21.0