Pablo Greco 40546a
From 051451a1b9cefa42ecfd6d27dcb6a12ef49de072 Mon Sep 17 00:00:00 2001
Pablo Greco 40546a
Message-Id: <051451a1b9cefa42ecfd6d27dcb6a12ef49de072@dist-git>
Pablo Greco 40546a
From: Daniel Henrique Barboza <danielhb413@gmail.com>
Pablo Greco 40546a
Date: Fri, 3 May 2019 13:54:51 +0200
Pablo Greco 40546a
Subject: [PATCH] qemu_domain: add a PPC64 memLockLimit helper
Pablo Greco 40546a
Pablo Greco 40546a
There is a lot of documentation in the comments about how PPC64 handles
Pablo Greco 40546a
passthrough VFIO devices to calculate the @memLockLimit. And more will
Pablo Greco 40546a
be added with the PPC64 NVLink2 support code.
Pablo Greco 40546a
Pablo Greco 40546a
Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes()
Pablo Greco 40546a
body and put it into a helper function. This will simplify the
Pablo Greco 40546a
flow of qemuDomainGetMemLockLimitBytes() that handles all the other
Pablo Greco 40546a
platforms and improves readability of the PPC64 specifics.
Pablo Greco 40546a
Pablo Greco 40546a
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Pablo Greco 40546a
Reviewed-by: Erik Skultety <eskultet@redhat.com>
Pablo Greco 40546a
(cherry picked from commit 7a686fd2eae8d5674bb1213d8517dc5814fa6bf3)
Pablo Greco 40546a
Pablo Greco 40546a
https: //bugzilla.redhat.com/show_bug.cgi?id=1505998
Pablo Greco 40546a
Signed-off-by: Erik Skultety <eskultet@redhat.com>
Pablo Greco 40546a
Message-Id: <ccdf9fefeb624585559606d00b6ac19b574733b3.1556884443.git.eskultet@redhat.com>
Pablo Greco 40546a
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
Pablo Greco 40546a
---
Pablo Greco 40546a
 src/qemu/qemu_domain.c | 171 ++++++++++++++++++++++-------------------
Pablo Greco 40546a
 1 file changed, 93 insertions(+), 78 deletions(-)
Pablo Greco 40546a
Pablo Greco 40546a
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
Pablo Greco 40546a
index d936090d87..f91de0b743 100644
Pablo Greco 40546a
--- a/src/qemu/qemu_domain.c
Pablo Greco 40546a
+++ b/src/qemu/qemu_domain.c
Pablo Greco 40546a
@@ -9805,6 +9805,97 @@ qemuDomainUpdateCurrentMemorySize(virQEMUDriverPtr driver,
Pablo Greco 40546a
 }
Pablo Greco 40546a
 
Pablo Greco 40546a
 
Pablo Greco 40546a
+/**
Pablo Greco 40546a
+ * getPPC64MemLockLimitBytes:
Pablo Greco 40546a
+ * @def: domain definition
Pablo Greco 40546a
+ *
Pablo Greco 40546a
+ * A PPC64 helper that calculates the memory locking limit in order for
Pablo Greco 40546a
+ * the guest to operate properly.
Pablo Greco 40546a
+ */
Pablo Greco 40546a
+static unsigned long long
Pablo Greco 40546a
+getPPC64MemLockLimitBytes(virDomainDefPtr def)
Pablo Greco 40546a
+{
Pablo Greco 40546a
+    unsigned long long memKB = 0;
Pablo Greco 40546a
+    unsigned long long baseLimit = 0;
Pablo Greco 40546a
+    unsigned long long memory = 0;
Pablo Greco 40546a
+    unsigned long long maxMemory = 0;
Pablo Greco 40546a
+    unsigned long long passthroughLimit = 0;
Pablo Greco 40546a
+    size_t i, nPCIHostBridges = 0;
Pablo Greco 40546a
+    bool usesVFIO = false;
Pablo Greco 40546a
+
Pablo Greco 40546a
+    for (i = 0; i < def->ncontrollers; i++) {
Pablo Greco 40546a
+        virDomainControllerDefPtr cont = def->controllers[i];
Pablo Greco 40546a
+
Pablo Greco 40546a
+        if (!virDomainControllerIsPSeriesPHB(cont))
Pablo Greco 40546a
+            continue;
Pablo Greco 40546a
+
Pablo Greco 40546a
+        nPCIHostBridges++;
Pablo Greco 40546a
+    }
Pablo Greco 40546a
+
Pablo Greco 40546a
+    for (i = 0; i < def->nhostdevs; i++) {
Pablo Greco 40546a
+        virDomainHostdevDefPtr dev = def->hostdevs[i];
Pablo Greco 40546a
+
Pablo Greco 40546a
+        if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
Pablo Greco 40546a
+            dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
Pablo Greco 40546a
+            dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
Pablo Greco 40546a
+            usesVFIO = true;
Pablo Greco 40546a
+            break;
Pablo Greco 40546a
+        }
Pablo Greco 40546a
+    }
Pablo Greco 40546a
+
Pablo Greco 40546a
+    memory = virDomainDefGetMemoryTotal(def);
Pablo Greco 40546a
+
Pablo Greco 40546a
+    if (def->mem.max_memory)
Pablo Greco 40546a
+        maxMemory = def->mem.max_memory;
Pablo Greco 40546a
+    else
Pablo Greco 40546a
+        maxMemory = memory;
Pablo Greco 40546a
+
Pablo Greco 40546a
+    /* baseLimit := maxMemory / 128                                  (a)
Pablo Greco 40546a
+     *              + 4 MiB * #PHBs + 8 MiB                          (b)
Pablo Greco 40546a
+     *
Pablo Greco 40546a
+     * (a) is the hash table
Pablo Greco 40546a
+     *
Pablo Greco 40546a
+     * (b) is accounting for the 32-bit DMA window - it could be either the
Pablo Greco 40546a
+     * KVM accelerated TCE tables for emulated devices, or the VFIO
Pablo Greco 40546a
+     * userspace view. The 4 MiB per-PHB (including the default one) covers
Pablo Greco 40546a
+     * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
Pablo Greco 40546a
+     * increased to help performance. The 8 MiB extra should be plenty for
Pablo Greco 40546a
+     * the TCE table index for any reasonable number of PHBs and several
Pablo Greco 40546a
+     * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
Pablo Greco 40546a
+    baseLimit = maxMemory / 128 +
Pablo Greco 40546a
+                4096 * nPCIHostBridges +
Pablo Greco 40546a
+                8192;
Pablo Greco 40546a
+
Pablo Greco 40546a
+    /* passthroughLimit := max( 2 GiB * #PHBs,                       (c)
Pablo Greco 40546a
+     *                          memory                               (d)
Pablo Greco 40546a
+     *                          + memory * 1/512 * #PHBs + 8 MiB )   (e)
Pablo Greco 40546a
+     *
Pablo Greco 40546a
+     * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
Pablo Greco 40546a
+     * rather than 1 GiB
Pablo Greco 40546a
+     *
Pablo Greco 40546a
+     * (d) is the with-DDW (and memory pre-registration and related
Pablo Greco 40546a
+     * features) DMA window accounting - assuming that we only account RAM
Pablo Greco 40546a
+     * once, even if mapped to multiple PHBs
Pablo Greco 40546a
+     *
Pablo Greco 40546a
+     * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
Pablo Greco 40546a
+     * window. This is based a bit on expected guest behaviour, but there
Pablo Greco 40546a
+     * really isn't a way to completely avoid that. We assume the guest
Pablo Greco 40546a
+     * requests a 64-bit DMA window (per PHB) just big enough to map all
Pablo Greco 40546a
+     * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
Pablo Greco 40546a
+     * kiB pages, less still if the guest is mapped with hugepages (unlike
Pablo Greco 40546a
+     * the default 32-bit DMA window, DDW windows can use large IOMMU
Pablo Greco 40546a
+     * pages). 8 MiB is for second and further level overheads, like (b) */
Pablo Greco 40546a
+    if (usesVFIO)
Pablo Greco 40546a
+        passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
Pablo Greco 40546a
+                               memory +
Pablo Greco 40546a
+                               memory / 512 * nPCIHostBridges + 8192);
Pablo Greco 40546a
+
Pablo Greco 40546a
+    memKB = baseLimit + passthroughLimit;
Pablo Greco 40546a
+
Pablo Greco 40546a
+    return memKB << 10;
Pablo Greco 40546a
+}
Pablo Greco 40546a
+
Pablo Greco 40546a
+
Pablo Greco 40546a
 /**
Pablo Greco 40546a
  * qemuDomainGetMemLockLimitBytes:
Pablo Greco 40546a
  * @def: domain definition
Pablo Greco 40546a
@@ -9836,84 +9927,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def)
Pablo Greco 40546a
     if (def->mem.locked)
Pablo Greco 40546a
         return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
Pablo Greco 40546a
 
Pablo Greco 40546a
-    if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) {
Pablo Greco 40546a
-        unsigned long long maxMemory;
Pablo Greco 40546a
-        unsigned long long memory;
Pablo Greco 40546a
-        unsigned long long baseLimit;
Pablo Greco 40546a
-        unsigned long long passthroughLimit = 0;
Pablo Greco 40546a
-        size_t nPCIHostBridges = 0;
Pablo Greco 40546a
-        bool usesVFIO = false;
Pablo Greco 40546a
-
Pablo Greco 40546a
-        for (i = 0; i < def->ncontrollers; i++) {
Pablo Greco 40546a
-            virDomainControllerDefPtr cont = def->controllers[i];
Pablo Greco 40546a
-
Pablo Greco 40546a
-            if (!virDomainControllerIsPSeriesPHB(cont))
Pablo Greco 40546a
-                continue;
Pablo Greco 40546a
-
Pablo Greco 40546a
-            nPCIHostBridges++;
Pablo Greco 40546a
-        }
Pablo Greco 40546a
-
Pablo Greco 40546a
-        for (i = 0; i < def->nhostdevs; i++) {
Pablo Greco 40546a
-            virDomainHostdevDefPtr dev = def->hostdevs[i];
Pablo Greco 40546a
-
Pablo Greco 40546a
-            if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
Pablo Greco 40546a
-                dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
Pablo Greco 40546a
-                dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
Pablo Greco 40546a
-                usesVFIO = true;
Pablo Greco 40546a
-                break;
Pablo Greco 40546a
-            }
Pablo Greco 40546a
-        }
Pablo Greco 40546a
-
Pablo Greco 40546a
-        memory = virDomainDefGetMemoryTotal(def);
Pablo Greco 40546a
-
Pablo Greco 40546a
-        if (def->mem.max_memory)
Pablo Greco 40546a
-            maxMemory = def->mem.max_memory;
Pablo Greco 40546a
-        else
Pablo Greco 40546a
-            maxMemory = memory;
Pablo Greco 40546a
-
Pablo Greco 40546a
-        /* baseLimit := maxMemory / 128                                  (a)
Pablo Greco 40546a
-         *              + 4 MiB * #PHBs + 8 MiB                          (b)
Pablo Greco 40546a
-         *
Pablo Greco 40546a
-         * (a) is the hash table
Pablo Greco 40546a
-         *
Pablo Greco 40546a
-         * (b) is accounting for the 32-bit DMA window - it could be either the
Pablo Greco 40546a
-         * KVM accelerated TCE tables for emulated devices, or the VFIO
Pablo Greco 40546a
-         * userspace view. The 4 MiB per-PHB (including the default one) covers
Pablo Greco 40546a
-         * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
Pablo Greco 40546a
-         * increased to help performance. The 8 MiB extra should be plenty for
Pablo Greco 40546a
-         * the TCE table index for any reasonable number of PHBs and several
Pablo Greco 40546a
-         * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
Pablo Greco 40546a
-        baseLimit = maxMemory / 128 +
Pablo Greco 40546a
-                    4096 * nPCIHostBridges +
Pablo Greco 40546a
-                    8192;
Pablo Greco 40546a
-
Pablo Greco 40546a
-        /* passthroughLimit := max( 2 GiB * #PHBs,                       (c)
Pablo Greco 40546a
-         *                          memory                               (d)
Pablo Greco 40546a
-         *                          + memory * 1/512 * #PHBs + 8 MiB )   (e)
Pablo Greco 40546a
-         *
Pablo Greco 40546a
-         * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
Pablo Greco 40546a
-         * rather than 1 GiB
Pablo Greco 40546a
-         *
Pablo Greco 40546a
-         * (d) is the with-DDW (and memory pre-registration and related
Pablo Greco 40546a
-         * features) DMA window accounting - assuming that we only account RAM
Pablo Greco 40546a
-         * once, even if mapped to multiple PHBs
Pablo Greco 40546a
-         *
Pablo Greco 40546a
-         * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
Pablo Greco 40546a
-         * window. This is based a bit on expected guest behaviour, but there
Pablo Greco 40546a
-         * really isn't a way to completely avoid that. We assume the guest
Pablo Greco 40546a
-         * requests a 64-bit DMA window (per PHB) just big enough to map all
Pablo Greco 40546a
-         * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
Pablo Greco 40546a
-         * kiB pages, less still if the guest is mapped with hugepages (unlike
Pablo Greco 40546a
-         * the default 32-bit DMA window, DDW windows can use large IOMMU
Pablo Greco 40546a
-         * pages). 8 MiB is for second and further level overheads, like (b) */
Pablo Greco 40546a
-        if (usesVFIO)
Pablo Greco 40546a
-            passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
Pablo Greco 40546a
-                                   memory +
Pablo Greco 40546a
-                                   memory / 512 * nPCIHostBridges + 8192);
Pablo Greco 40546a
-
Pablo Greco 40546a
-        memKB = baseLimit + passthroughLimit;
Pablo Greco 40546a
-        goto done;
Pablo Greco 40546a
-    }
Pablo Greco 40546a
+    if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
Pablo Greco 40546a
+        return getPPC64MemLockLimitBytes(def);
Pablo Greco 40546a
 
Pablo Greco 40546a
     /* For device passthrough using VFIO the guest memory and MMIO memory
Pablo Greco 40546a
      * regions need to be locked persistent in order to allow DMA.
Pablo Greco 40546a
-- 
Pablo Greco 40546a
2.21.0
Pablo Greco 40546a