9c6c51
From 051451a1b9cefa42ecfd6d27dcb6a12ef49de072 Mon Sep 17 00:00:00 2001
9c6c51
Message-Id: <051451a1b9cefa42ecfd6d27dcb6a12ef49de072@dist-git>
9c6c51
From: Daniel Henrique Barboza <danielhb413@gmail.com>
9c6c51
Date: Fri, 3 May 2019 13:54:51 +0200
9c6c51
Subject: [PATCH] qemu_domain: add a PPC64 memLockLimit helper
9c6c51
9c6c51
There is a lot of documentation in the comments about how PPC64 handles
9c6c51
passthrough VFIO devices to calculate the @memLockLimit. And more will
9c6c51
be added with the PPC64 NVLink2 support code.
9c6c51
9c6c51
Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes()
9c6c51
body and put it into a helper function. This will simplify the
9c6c51
flow of qemuDomainGetMemLockLimitBytes() that handles all the other
9c6c51
platforms and improves readability of the PPC64 specifics.
9c6c51
9c6c51
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
9c6c51
Reviewed-by: Erik Skultety <eskultet@redhat.com>
9c6c51
(cherry picked from commit 7a686fd2eae8d5674bb1213d8517dc5814fa6bf3)
9c6c51
9c6c51
https: //bugzilla.redhat.com/show_bug.cgi?id=1505998
9c6c51
Signed-off-by: Erik Skultety <eskultet@redhat.com>
9c6c51
Message-Id: <ccdf9fefeb624585559606d00b6ac19b574733b3.1556884443.git.eskultet@redhat.com>
9c6c51
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
9c6c51
---
9c6c51
 src/qemu/qemu_domain.c | 171 ++++++++++++++++++++++-------------------
9c6c51
 1 file changed, 93 insertions(+), 78 deletions(-)
9c6c51
9c6c51
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
9c6c51
index d936090d87..f91de0b743 100644
9c6c51
--- a/src/qemu/qemu_domain.c
9c6c51
+++ b/src/qemu/qemu_domain.c
9c6c51
@@ -9805,6 +9805,97 @@ qemuDomainUpdateCurrentMemorySize(virQEMUDriverPtr driver,
9c6c51
 }
9c6c51
 
9c6c51
 
9c6c51
+/**
9c6c51
+ * getPPC64MemLockLimitBytes:
9c6c51
+ * @def: domain definition
9c6c51
+ *
9c6c51
+ * A PPC64 helper that calculates the memory locking limit in order for
9c6c51
+ * the guest to operate properly.
9c6c51
+ */
9c6c51
+static unsigned long long
9c6c51
+getPPC64MemLockLimitBytes(virDomainDefPtr def)
9c6c51
+{
9c6c51
+    unsigned long long memKB = 0;
9c6c51
+    unsigned long long baseLimit = 0;
9c6c51
+    unsigned long long memory = 0;
9c6c51
+    unsigned long long maxMemory = 0;
9c6c51
+    unsigned long long passthroughLimit = 0;
9c6c51
+    size_t i, nPCIHostBridges = 0;
9c6c51
+    bool usesVFIO = false;
9c6c51
+
9c6c51
+    for (i = 0; i < def->ncontrollers; i++) {
9c6c51
+        virDomainControllerDefPtr cont = def->controllers[i];
9c6c51
+
9c6c51
+        if (!virDomainControllerIsPSeriesPHB(cont))
9c6c51
+            continue;
9c6c51
+
9c6c51
+        nPCIHostBridges++;
9c6c51
+    }
9c6c51
+
9c6c51
+    for (i = 0; i < def->nhostdevs; i++) {
9c6c51
+        virDomainHostdevDefPtr dev = def->hostdevs[i];
9c6c51
+
9c6c51
+        if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
9c6c51
+            dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
9c6c51
+            dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
9c6c51
+            usesVFIO = true;
9c6c51
+            break;
9c6c51
+        }
9c6c51
+    }
9c6c51
+
9c6c51
+    memory = virDomainDefGetMemoryTotal(def);
9c6c51
+
9c6c51
+    if (def->mem.max_memory)
9c6c51
+        maxMemory = def->mem.max_memory;
9c6c51
+    else
9c6c51
+        maxMemory = memory;
9c6c51
+
9c6c51
+    /* baseLimit := maxMemory / 128                                  (a)
9c6c51
+     *              + 4 MiB * #PHBs + 8 MiB                          (b)
9c6c51
+     *
9c6c51
+     * (a) is the hash table
9c6c51
+     *
9c6c51
+     * (b) is accounting for the 32-bit DMA window - it could be either the
9c6c51
+     * KVM accelerated TCE tables for emulated devices, or the VFIO
9c6c51
+     * userspace view. The 4 MiB per-PHB (including the default one) covers
9c6c51
+     * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
9c6c51
+     * increased to help performance. The 8 MiB extra should be plenty for
9c6c51
+     * the TCE table index for any reasonable number of PHBs and several
9c6c51
+     * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
9c6c51
+    baseLimit = maxMemory / 128 +
9c6c51
+                4096 * nPCIHostBridges +
9c6c51
+                8192;
9c6c51
+
9c6c51
+    /* passthroughLimit := max( 2 GiB * #PHBs,                       (c)
9c6c51
+     *                          memory                               (d)
9c6c51
+     *                          + memory * 1/512 * #PHBs + 8 MiB )   (e)
9c6c51
+     *
9c6c51
+     * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
9c6c51
+     * rather than 1 GiB
9c6c51
+     *
9c6c51
+     * (d) is the with-DDW (and memory pre-registration and related
9c6c51
+     * features) DMA window accounting - assuming that we only account RAM
9c6c51
+     * once, even if mapped to multiple PHBs
9c6c51
+     *
9c6c51
+     * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
9c6c51
+     * window. This is based a bit on expected guest behaviour, but there
9c6c51
+     * really isn't a way to completely avoid that. We assume the guest
9c6c51
+     * requests a 64-bit DMA window (per PHB) just big enough to map all
9c6c51
+     * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
9c6c51
+     * kiB pages, less still if the guest is mapped with hugepages (unlike
9c6c51
+     * the default 32-bit DMA window, DDW windows can use large IOMMU
9c6c51
+     * pages). 8 MiB is for second and further level overheads, like (b) */
9c6c51
+    if (usesVFIO)
9c6c51
+        passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
9c6c51
+                               memory +
9c6c51
+                               memory / 512 * nPCIHostBridges + 8192);
9c6c51
+
9c6c51
+    memKB = baseLimit + passthroughLimit;
9c6c51
+
9c6c51
+    return memKB << 10;
9c6c51
+}
9c6c51
+
9c6c51
+
9c6c51
 /**
9c6c51
  * qemuDomainGetMemLockLimitBytes:
9c6c51
  * @def: domain definition
9c6c51
@@ -9836,84 +9927,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def)
9c6c51
     if (def->mem.locked)
9c6c51
         return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
9c6c51
 
9c6c51
-    if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) {
9c6c51
-        unsigned long long maxMemory;
9c6c51
-        unsigned long long memory;
9c6c51
-        unsigned long long baseLimit;
9c6c51
-        unsigned long long passthroughLimit = 0;
9c6c51
-        size_t nPCIHostBridges = 0;
9c6c51
-        bool usesVFIO = false;
9c6c51
-
9c6c51
-        for (i = 0; i < def->ncontrollers; i++) {
9c6c51
-            virDomainControllerDefPtr cont = def->controllers[i];
9c6c51
-
9c6c51
-            if (!virDomainControllerIsPSeriesPHB(cont))
9c6c51
-                continue;
9c6c51
-
9c6c51
-            nPCIHostBridges++;
9c6c51
-        }
9c6c51
-
9c6c51
-        for (i = 0; i < def->nhostdevs; i++) {
9c6c51
-            virDomainHostdevDefPtr dev = def->hostdevs[i];
9c6c51
-
9c6c51
-            if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
9c6c51
-                dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
9c6c51
-                dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
9c6c51
-                usesVFIO = true;
9c6c51
-                break;
9c6c51
-            }
9c6c51
-        }
9c6c51
-
9c6c51
-        memory = virDomainDefGetMemoryTotal(def);
9c6c51
-
9c6c51
-        if (def->mem.max_memory)
9c6c51
-            maxMemory = def->mem.max_memory;
9c6c51
-        else
9c6c51
-            maxMemory = memory;
9c6c51
-
9c6c51
-        /* baseLimit := maxMemory / 128                                  (a)
9c6c51
-         *              + 4 MiB * #PHBs + 8 MiB                          (b)
9c6c51
-         *
9c6c51
-         * (a) is the hash table
9c6c51
-         *
9c6c51
-         * (b) is accounting for the 32-bit DMA window - it could be either the
9c6c51
-         * KVM accelerated TCE tables for emulated devices, or the VFIO
9c6c51
-         * userspace view. The 4 MiB per-PHB (including the default one) covers
9c6c51
-         * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
9c6c51
-         * increased to help performance. The 8 MiB extra should be plenty for
9c6c51
-         * the TCE table index for any reasonable number of PHBs and several
9c6c51
-         * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
9c6c51
-        baseLimit = maxMemory / 128 +
9c6c51
-                    4096 * nPCIHostBridges +
9c6c51
-                    8192;
9c6c51
-
9c6c51
-        /* passthroughLimit := max( 2 GiB * #PHBs,                       (c)
9c6c51
-         *                          memory                               (d)
9c6c51
-         *                          + memory * 1/512 * #PHBs + 8 MiB )   (e)
9c6c51
-         *
9c6c51
-         * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
9c6c51
-         * rather than 1 GiB
9c6c51
-         *
9c6c51
-         * (d) is the with-DDW (and memory pre-registration and related
9c6c51
-         * features) DMA window accounting - assuming that we only account RAM
9c6c51
-         * once, even if mapped to multiple PHBs
9c6c51
-         *
9c6c51
-         * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
9c6c51
-         * window. This is based a bit on expected guest behaviour, but there
9c6c51
-         * really isn't a way to completely avoid that. We assume the guest
9c6c51
-         * requests a 64-bit DMA window (per PHB) just big enough to map all
9c6c51
-         * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
9c6c51
-         * kiB pages, less still if the guest is mapped with hugepages (unlike
9c6c51
-         * the default 32-bit DMA window, DDW windows can use large IOMMU
9c6c51
-         * pages). 8 MiB is for second and further level overheads, like (b) */
9c6c51
-        if (usesVFIO)
9c6c51
-            passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
9c6c51
-                                   memory +
9c6c51
-                                   memory / 512 * nPCIHostBridges + 8192);
9c6c51
-
9c6c51
-        memKB = baseLimit + passthroughLimit;
9c6c51
-        goto done;
9c6c51
-    }
9c6c51
+    if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
9c6c51
+        return getPPC64MemLockLimitBytes(def);
9c6c51
 
9c6c51
     /* For device passthrough using VFIO the guest memory and MMIO memory
9c6c51
      * regions need to be locked persistent in order to allow DMA.
9c6c51
-- 
9c6c51
2.21.0
9c6c51