a19a21
From 7ef9b9c593da98ad32ad20c28d17bb2700a35c29 Mon Sep 17 00:00:00 2001
a19a21
From: Cornelia Huck <cohuck@redhat.com>
a19a21
Date: Tue, 19 Jan 2021 12:50:45 -0500
a19a21
Subject: [PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
a19a21
a19a21
RH-Author: Cornelia Huck <cohuck@redhat.com>
a19a21
Message-id: <20210119125046.472811-7-cohuck@redhat.com>
a19a21
Patchwork-id: 100680
a19a21
O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
a19a21
Bugzilla: 1905391
a19a21
RH-Acked-by: David Hildenbrand <david@redhat.com>
a19a21
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
a19a21
RH-Acked-by: Thomas Huth <thuth@redhat.com>
a19a21
a19a21
From: Matthew Rosato <mjrosato@linux.ibm.com>
a19a21
a19a21
When an s390 guest is using lazy unmapping, it can result in a very
a19a21
large number of oustanding DMA requests, far beyond the default
a19a21
limit configured for vfio.  Let's track DMA usage similar to vfio
a19a21
in the host, and trigger the guest to flush their DMA mappings
a19a21
before vfio runs out.
a19a21
a19a21
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
a19a21
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
a19a21
[aw: non-Linux build fixes]
a19a21
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
a19a21
(cherry picked from commit 37fa32de707340f3a93959ad5a1ebc41ba1520ee)
a19a21
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
a19a21
a19a21
 Conflicts:
a19a21
	hw/s390x/s390-pci-bus.c
a19a21
        --> adapt to missing 981c3dcd9489 ("qdev: Convert to
a19a21
            qdev_unrealize() with Coccinelle")
a19a21
	hw/s390x/s390-pci-inst.c
a19a21
        --> adapt to out of order inclusion of 5039caf3c449 ("memory:
a19a21
            Add IOMMUTLBEvent")
a19a21
	include/hw/s390x/s390-pci-bus.h
a19a21
        --> adapt to missing db1015e92e04 ("Move QOM typedefs and
a19a21
            add missing includes")
a19a21
a19a21
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
a19a21
---
a19a21
 hw/s390x/s390-pci-bus.c          | 16 ++++++++----
a19a21
 hw/s390x/s390-pci-inst.c         | 45 +++++++++++++++++++++++++++-----
a19a21
 hw/s390x/s390-pci-vfio.c         | 42 +++++++++++++++++++++++++++++
a19a21
 include/hw/s390x/s390-pci-bus.h  |  9 +++++++
a19a21
 include/hw/s390x/s390-pci-inst.h |  3 +++
a19a21
 include/hw/s390x/s390-pci-vfio.h | 12 +++++++++
a19a21
 6 files changed, 116 insertions(+), 11 deletions(-)
a19a21
a19a21
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
a19a21
index 6daef2b6d57..a9f6f550472 100644
a19a21
--- a/hw/s390x/s390-pci-bus.c
a19a21
+++ b/hw/s390x/s390-pci-bus.c
a19a21
@@ -17,6 +17,7 @@
a19a21
 #include "cpu.h"
a19a21
 #include "hw/s390x/s390-pci-bus.h"
a19a21
 #include "hw/s390x/s390-pci-inst.h"
a19a21
+#include "hw/s390x/s390-pci-vfio.h"
a19a21
 #include "hw/pci/pci_bus.h"
a19a21
 #include "hw/qdev-properties.h"
a19a21
 #include "hw/pci/pci_bridge.h"
a19a21
@@ -771,6 +772,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
a19a21
     s->bus_no = 0;
a19a21
     QTAILQ_INIT(&s->pending_sei);
a19a21
     QTAILQ_INIT(&s->zpci_devs);
a19a21
+    QTAILQ_INIT(&s->zpci_dma_limit);
a19a21
 
a19a21
     css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
a19a21
                              S390_ADAPTER_SUPPRESSIBLE, &local_err);
a19a21
@@ -951,17 +953,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
a19a21
             }
a19a21
         }
a19a21
 
a19a21
+        pbdev->pdev = pdev;
a19a21
+        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
a19a21
+        pbdev->iommu->pbdev = pbdev;
a19a21
+        pbdev->state = ZPCI_FS_DISABLED;
a19a21
+
a19a21
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
a19a21
             pbdev->fh |= FH_SHM_VFIO;
a19a21
+            pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
a19a21
         } else {
a19a21
             pbdev->fh |= FH_SHM_EMUL;
a19a21
         }
a19a21
 
a19a21
-        pbdev->pdev = pdev;
a19a21
-        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
a19a21
-        pbdev->iommu->pbdev = pbdev;
a19a21
-        pbdev->state = ZPCI_FS_DISABLED;
a19a21
-
a19a21
         if (s390_pci_msix_init(pbdev)) {
a19a21
             error_setg(errp, "MSI-X support is mandatory "
a19a21
                        "in the S390 architecture");
a19a21
@@ -1014,6 +1017,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
a19a21
         pbdev->fid = 0;
a19a21
         QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
a19a21
         g_hash_table_remove(s->zpci_table, &pbdev->idx);
a19a21
+        if (pbdev->iommu->dma_limit) {
a19a21
+            s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
a19a21
+        }
a19a21
         object_property_set_bool(OBJECT(dev), false, "realized", NULL);
a19a21
     }
a19a21
 }
a19a21
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
a19a21
index b1885344f18..edbdf727984 100644
a19a21
--- a/hw/s390x/s390-pci-inst.c
a19a21
+++ b/hw/s390x/s390-pci-inst.c
a19a21
@@ -32,6 +32,20 @@
a19a21
         }                                                          \
a19a21
     } while (0)
a19a21
 
a19a21
+static inline void inc_dma_avail(S390PCIIOMMU *iommu)
a19a21
+{
a19a21
+    if (iommu->dma_limit) {
a19a21
+        iommu->dma_limit->avail++;
a19a21
+    }
a19a21
+}
a19a21
+
a19a21
+static inline void dec_dma_avail(S390PCIIOMMU *iommu)
a19a21
+{
a19a21
+    if (iommu->dma_limit) {
a19a21
+        iommu->dma_limit->avail--;
a19a21
+    }
a19a21
+}
a19a21
+
a19a21
 static void s390_set_status_code(CPUS390XState *env,
a19a21
                                  uint8_t r, uint64_t status_code)
a19a21
 {
a19a21
@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
a19a21
     return 0;
a19a21
 }
a19a21
 
a19a21
-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
a19a21
+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
a19a21
+                                      S390IOTLBEntry *entry)
a19a21
 {
a19a21
     S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
a19a21
     IOMMUTLBEvent event = {
a19a21
@@ -588,14 +603,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
a19a21
 
a19a21
     if (event.type == IOMMU_NOTIFIER_UNMAP) {
a19a21
         if (!cache) {
a19a21
-            return;
a19a21
+            goto out;
a19a21
         }
a19a21
         g_hash_table_remove(iommu->iotlb, &entry->iova);
a19a21
+        inc_dma_avail(iommu);
a19a21
     } else {
a19a21
         if (cache) {
a19a21
             if (cache->perm == entry->perm &&
a19a21
                 cache->translated_addr == entry->translated_addr) {
a19a21
-                return;
a19a21
+                goto out;
a19a21
             }
a19a21
 
a19a21
             event.type = IOMMU_NOTIFIER_UNMAP;
a19a21
@@ -611,9 +627,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
a19a21
         cache->len = PAGE_SIZE;
a19a21
         cache->perm = entry->perm;
a19a21
         g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
a19a21
+        dec_dma_avail(iommu);
a19a21
     }
a19a21
 
a19a21
     memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
a19a21
+
a19a21
+out:
a19a21
+    return iommu->dma_limit ? iommu->dma_limit->avail : 1;
a19a21
 }
a19a21
 
a19a21
 int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
a19a21
@@ -625,6 +645,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
a19a21
     S390PCIIOMMU *iommu;
a19a21
     S390IOTLBEntry entry;
a19a21
     hwaddr start, end;
a19a21
+    uint32_t dma_avail;
a19a21
 
a19a21
     if (env->psw.mask & PSW_MASK_PSTATE) {
a19a21
         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
a19a21
@@ -663,6 +684,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
a19a21
     }
a19a21
 
a19a21
     iommu = pbdev->iommu;
a19a21
+    if (iommu->dma_limit) {
a19a21
+        dma_avail = iommu->dma_limit->avail;
a19a21
+    } else {
a19a21
+        dma_avail = 1;
a19a21
+    }
a19a21
     if (!iommu->g_iota) {
a19a21
         error = ERR_EVENT_INVALAS;
a19a21
         goto err;
a19a21
@@ -680,8 +706,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
a19a21
         }
a19a21
 
a19a21
         start += entry.len;
a19a21
-        while (entry.iova < start && entry.iova < end) {
a19a21
-            s390_pci_update_iotlb(iommu, &entry);
a19a21
+        while (entry.iova < start && entry.iova < end &&
a19a21
+               (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
a19a21
+            dma_avail = s390_pci_update_iotlb(iommu, &entry);
a19a21
             entry.iova += PAGE_SIZE;
a19a21
             entry.translated_addr += PAGE_SIZE;
a19a21
         }
a19a21
@@ -694,7 +721,13 @@ err:
a19a21
         s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
a19a21
     } else {
a19a21
         pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
a19a21
-        setcc(cpu, ZPCI_PCI_LS_OK);
a19a21
+        if (dma_avail > 0) {
a19a21
+            setcc(cpu, ZPCI_PCI_LS_OK);
a19a21
+        } else {
a19a21
+            /* vfio DMA mappings are exhausted, trigger a RPCIT */
a19a21
+            setcc(cpu, ZPCI_PCI_LS_ERR);
a19a21
+            s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
a19a21
+        }
a19a21
     }
a19a21
     return 0;
a19a21
 }
a19a21
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
a19a21
index 0eb22ffec4c..01c1e8ac89a 100644
a19a21
--- a/hw/s390x/s390-pci-vfio.c
a19a21
+++ b/hw/s390x/s390-pci-vfio.c
a19a21
@@ -12,7 +12,9 @@
a19a21
 #include <sys/ioctl.h>
a19a21
 
a19a21
 #include "qemu/osdep.h"
a19a21
+#include "hw/s390x/s390-pci-bus.h"
a19a21
 #include "hw/s390x/s390-pci-vfio.h"
a19a21
+#include "hw/vfio/pci.h"
a19a21
 #include "hw/vfio/vfio-common.h"
a19a21
 
a19a21
 /*
a19a21
@@ -52,3 +54,43 @@ retry:
a19a21
     return vfio_get_info_dma_avail(info, avail);
a19a21
 }
a19a21
 
a19a21
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
a19a21
+                                          S390PCIBusDevice *pbdev)
a19a21
+{
a19a21
+    S390PCIDMACount *cnt;
a19a21
+    uint32_t avail;
a19a21
+    VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
a19a21
+    int id;
a19a21
+
a19a21
+    assert(vpdev);
a19a21
+
a19a21
+    id = vpdev->vbasedev.group->container->fd;
a19a21
+
a19a21
+    if (!s390_pci_update_dma_avail(id, &avail)) {
a19a21
+        return NULL;
a19a21
+    }
a19a21
+
a19a21
+    QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
a19a21
+        if (cnt->id  == id) {
a19a21
+            cnt->users++;
a19a21
+            return cnt;
a19a21
+        }
a19a21
+    }
a19a21
+
a19a21
+    cnt = g_new0(S390PCIDMACount, 1);
a19a21
+    cnt->id = id;
a19a21
+    cnt->users = 1;
a19a21
+    cnt->avail = avail;
a19a21
+    QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
a19a21
+    return cnt;
a19a21
+}
a19a21
+
a19a21
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
a19a21
+{
a19a21
+    assert(cnt);
a19a21
+
a19a21
+    cnt->users--;
a19a21
+    if (cnt->users == 0) {
a19a21
+        QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
a19a21
+    }
a19a21
+}
a19a21
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
a19a21
index 550f3cc5e92..2f2edbd0bf3 100644
a19a21
--- a/include/hw/s390x/s390-pci-bus.h
a19a21
+++ b/include/hw/s390x/s390-pci-bus.h
a19a21
@@ -266,6 +266,13 @@ typedef struct S390IOTLBEntry {
a19a21
 } S390IOTLBEntry;
a19a21
 
a19a21
 typedef struct S390PCIBusDevice S390PCIBusDevice;
a19a21
+typedef struct S390PCIDMACount {
a19a21
+    int id;
a19a21
+    int users;
a19a21
+    uint32_t avail;
a19a21
+    QTAILQ_ENTRY(S390PCIDMACount) link;
a19a21
+} S390PCIDMACount;
a19a21
+
a19a21
 typedef struct S390PCIIOMMU {
a19a21
     Object parent_obj;
a19a21
     S390PCIBusDevice *pbdev;
a19a21
@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU {
a19a21
     uint64_t pba;
a19a21
     uint64_t pal;
a19a21
     GHashTable *iotlb;
a19a21
+    S390PCIDMACount *dma_limit;
a19a21
 } S390PCIIOMMU;
a19a21
 
a19a21
 typedef struct S390PCIIOMMUTable {
a19a21
@@ -352,6 +360,7 @@ typedef struct S390pciState {
a19a21
     GHashTable *zpci_table;
a19a21
     QTAILQ_HEAD(, SeiContainer) pending_sei;
a19a21
     QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
a19a21
+    QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
a19a21
 } S390pciState;
a19a21
 
a19a21
 S390pciState *s390_get_phb(void);
a19a21
diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h
a19a21
index fa3bf8b5aad..8ee3a3c2375 100644
a19a21
--- a/include/hw/s390x/s390-pci-inst.h
a19a21
+++ b/include/hw/s390x/s390-pci-inst.h
a19a21
@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp {
a19a21
 #define ZPCI_STPCIFC_ST_INVAL_DMAAS   28
a19a21
 #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
a19a21
 
a19a21
+/* Refresh PCI Translations status codes */
a19a21
+#define ZPCI_RPCIT_ST_INSUFF_RES      16
a19a21
+
a19a21
 /* FIB function controls */
a19a21
 #define ZPCI_FIB_FC_ENABLED     0x80
a19a21
 #define ZPCI_FIB_FC_ERROR       0x40
a19a21
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
a19a21
index 1727292e9b5..539bcf04eb5 100644
a19a21
--- a/include/hw/s390x/s390-pci-vfio.h
a19a21
+++ b/include/hw/s390x/s390-pci-vfio.h
a19a21
@@ -12,13 +12,25 @@
a19a21
 #ifndef HW_S390_PCI_VFIO_H
a19a21
 #define HW_S390_PCI_VFIO_H
a19a21
 
a19a21
+#include "hw/s390x/s390-pci-bus.h"
a19a21
+
a19a21
 #ifdef CONFIG_LINUX
a19a21
 bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
a19a21
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
a19a21
+                                          S390PCIBusDevice *pbdev);
a19a21
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
a19a21
 #else
a19a21
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
a19a21
 {
a19a21
     return false;
a19a21
 }
a19a21
+static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
a19a21
+                                                        S390PCIBusDevice *pbdev)
a19a21
+{
a19a21
+    return NULL;
a19a21
+}
a19a21
+static inline void s390_pci_end_dma_count(S390pciState *s,
a19a21
+                                          S390PCIDMACount *cnt) { }
a19a21
 #endif
a19a21
 
a19a21
 #endif
a19a21
-- 
a19a21
2.27.0
a19a21