c687bc
From 7ef9b9c593da98ad32ad20c28d17bb2700a35c29 Mon Sep 17 00:00:00 2001
c687bc
From: Cornelia Huck <cohuck@redhat.com>
c687bc
Date: Tue, 19 Jan 2021 12:50:45 -0500
c687bc
Subject: [PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
c687bc
c687bc
RH-Author: Cornelia Huck <cohuck@redhat.com>
c687bc
Message-id: <20210119125046.472811-7-cohuck@redhat.com>
c687bc
Patchwork-id: 100680
c687bc
O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
c687bc
Bugzilla: 1905391
c687bc
RH-Acked-by: David Hildenbrand <david@redhat.com>
c687bc
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
c687bc
RH-Acked-by: Thomas Huth <thuth@redhat.com>
c687bc
c687bc
From: Matthew Rosato <mjrosato@linux.ibm.com>
c687bc
c687bc
When an s390 guest is using lazy unmapping, it can result in a very
c687bc
large number of oustanding DMA requests, far beyond the default
c687bc
limit configured for vfio.  Let's track DMA usage similar to vfio
c687bc
in the host, and trigger the guest to flush their DMA mappings
c687bc
before vfio runs out.
c687bc
c687bc
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
c687bc
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
c687bc
[aw: non-Linux build fixes]
c687bc
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
c687bc
(cherry picked from commit 37fa32de707340f3a93959ad5a1ebc41ba1520ee)
c687bc
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
c687bc
c687bc
 Conflicts:
c687bc
	hw/s390x/s390-pci-bus.c
c687bc
        --> adapt to missing 981c3dcd9489 ("qdev: Convert to
c687bc
            qdev_unrealize() with Coccinelle")
c687bc
	hw/s390x/s390-pci-inst.c
c687bc
        --> adapt to out of order inclusion of 5039caf3c449 ("memory:
c687bc
            Add IOMMUTLBEvent")
c687bc
	include/hw/s390x/s390-pci-bus.h
c687bc
        --> adapt to missing db1015e92e04 ("Move QOM typedefs and
c687bc
            add missing includes")
c687bc
c687bc
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
c687bc
---
c687bc
 hw/s390x/s390-pci-bus.c          | 16 ++++++++----
c687bc
 hw/s390x/s390-pci-inst.c         | 45 +++++++++++++++++++++++++++-----
c687bc
 hw/s390x/s390-pci-vfio.c         | 42 +++++++++++++++++++++++++++++
c687bc
 include/hw/s390x/s390-pci-bus.h  |  9 +++++++
c687bc
 include/hw/s390x/s390-pci-inst.h |  3 +++
c687bc
 include/hw/s390x/s390-pci-vfio.h | 12 +++++++++
c687bc
 6 files changed, 116 insertions(+), 11 deletions(-)
c687bc
c687bc
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
c687bc
index 6daef2b6d57..a9f6f550472 100644
c687bc
--- a/hw/s390x/s390-pci-bus.c
c687bc
+++ b/hw/s390x/s390-pci-bus.c
c687bc
@@ -17,6 +17,7 @@
c687bc
 #include "cpu.h"
c687bc
 #include "hw/s390x/s390-pci-bus.h"
c687bc
 #include "hw/s390x/s390-pci-inst.h"
c687bc
+#include "hw/s390x/s390-pci-vfio.h"
c687bc
 #include "hw/pci/pci_bus.h"
c687bc
 #include "hw/qdev-properties.h"
c687bc
 #include "hw/pci/pci_bridge.h"
c687bc
@@ -771,6 +772,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
c687bc
     s->bus_no = 0;
c687bc
     QTAILQ_INIT(&s->pending_sei);
c687bc
     QTAILQ_INIT(&s->zpci_devs);
c687bc
+    QTAILQ_INIT(&s->zpci_dma_limit);
c687bc
 
c687bc
     css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
c687bc
                              S390_ADAPTER_SUPPRESSIBLE, &local_err);
c687bc
@@ -951,17 +953,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
c687bc
             }
c687bc
         }
c687bc
 
c687bc
+        pbdev->pdev = pdev;
c687bc
+        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
c687bc
+        pbdev->iommu->pbdev = pbdev;
c687bc
+        pbdev->state = ZPCI_FS_DISABLED;
c687bc
+
c687bc
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
c687bc
             pbdev->fh |= FH_SHM_VFIO;
c687bc
+            pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
c687bc
         } else {
c687bc
             pbdev->fh |= FH_SHM_EMUL;
c687bc
         }
c687bc
 
c687bc
-        pbdev->pdev = pdev;
c687bc
-        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
c687bc
-        pbdev->iommu->pbdev = pbdev;
c687bc
-        pbdev->state = ZPCI_FS_DISABLED;
c687bc
-
c687bc
         if (s390_pci_msix_init(pbdev)) {
c687bc
             error_setg(errp, "MSI-X support is mandatory "
c687bc
                        "in the S390 architecture");
c687bc
@@ -1014,6 +1017,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
c687bc
         pbdev->fid = 0;
c687bc
         QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
c687bc
         g_hash_table_remove(s->zpci_table, &pbdev->idx);
c687bc
+        if (pbdev->iommu->dma_limit) {
c687bc
+            s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
c687bc
+        }
c687bc
         object_property_set_bool(OBJECT(dev), false, "realized", NULL);
c687bc
     }
c687bc
 }
c687bc
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
c687bc
index b1885344f18..edbdf727984 100644
c687bc
--- a/hw/s390x/s390-pci-inst.c
c687bc
+++ b/hw/s390x/s390-pci-inst.c
c687bc
@@ -32,6 +32,20 @@
c687bc
         }                                                          \
c687bc
     } while (0)
c687bc
 
c687bc
+static inline void inc_dma_avail(S390PCIIOMMU *iommu)
c687bc
+{
c687bc
+    if (iommu->dma_limit) {
c687bc
+        iommu->dma_limit->avail++;
c687bc
+    }
c687bc
+}
c687bc
+
c687bc
+static inline void dec_dma_avail(S390PCIIOMMU *iommu)
c687bc
+{
c687bc
+    if (iommu->dma_limit) {
c687bc
+        iommu->dma_limit->avail--;
c687bc
+    }
c687bc
+}
c687bc
+
c687bc
 static void s390_set_status_code(CPUS390XState *env,
c687bc
                                  uint8_t r, uint64_t status_code)
c687bc
 {
c687bc
@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
c687bc
     return 0;
c687bc
 }
c687bc
 
c687bc
-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
c687bc
+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
c687bc
+                                      S390IOTLBEntry *entry)
c687bc
 {
c687bc
     S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
c687bc
     IOMMUTLBEvent event = {
c687bc
@@ -588,14 +603,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
c687bc
 
c687bc
     if (event.type == IOMMU_NOTIFIER_UNMAP) {
c687bc
         if (!cache) {
c687bc
-            return;
c687bc
+            goto out;
c687bc
         }
c687bc
         g_hash_table_remove(iommu->iotlb, &entry->iova);
c687bc
+        inc_dma_avail(iommu);
c687bc
     } else {
c687bc
         if (cache) {
c687bc
             if (cache->perm == entry->perm &&
c687bc
                 cache->translated_addr == entry->translated_addr) {
c687bc
-                return;
c687bc
+                goto out;
c687bc
             }
c687bc
 
c687bc
             event.type = IOMMU_NOTIFIER_UNMAP;
c687bc
@@ -611,9 +627,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
c687bc
         cache->len = PAGE_SIZE;
c687bc
         cache->perm = entry->perm;
c687bc
         g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
c687bc
+        dec_dma_avail(iommu);
c687bc
     }
c687bc
 
c687bc
     memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
c687bc
+
c687bc
+out:
c687bc
+    return iommu->dma_limit ? iommu->dma_limit->avail : 1;
c687bc
 }
c687bc
 
c687bc
 int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
c687bc
@@ -625,6 +645,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
c687bc
     S390PCIIOMMU *iommu;
c687bc
     S390IOTLBEntry entry;
c687bc
     hwaddr start, end;
c687bc
+    uint32_t dma_avail;
c687bc
 
c687bc
     if (env->psw.mask & PSW_MASK_PSTATE) {
c687bc
         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
c687bc
@@ -663,6 +684,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
c687bc
     }
c687bc
 
c687bc
     iommu = pbdev->iommu;
c687bc
+    if (iommu->dma_limit) {
c687bc
+        dma_avail = iommu->dma_limit->avail;
c687bc
+    } else {
c687bc
+        dma_avail = 1;
c687bc
+    }
c687bc
     if (!iommu->g_iota) {
c687bc
         error = ERR_EVENT_INVALAS;
c687bc
         goto err;
c687bc
@@ -680,8 +706,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
c687bc
         }
c687bc
 
c687bc
         start += entry.len;
c687bc
-        while (entry.iova < start && entry.iova < end) {
c687bc
-            s390_pci_update_iotlb(iommu, &entry);
c687bc
+        while (entry.iova < start && entry.iova < end &&
c687bc
+               (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
c687bc
+            dma_avail = s390_pci_update_iotlb(iommu, &entry);
c687bc
             entry.iova += PAGE_SIZE;
c687bc
             entry.translated_addr += PAGE_SIZE;
c687bc
         }
c687bc
@@ -694,7 +721,13 @@ err:
c687bc
         s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
c687bc
     } else {
c687bc
         pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
c687bc
-        setcc(cpu, ZPCI_PCI_LS_OK);
c687bc
+        if (dma_avail > 0) {
c687bc
+            setcc(cpu, ZPCI_PCI_LS_OK);
c687bc
+        } else {
c687bc
+            /* vfio DMA mappings are exhausted, trigger a RPCIT */
c687bc
+            setcc(cpu, ZPCI_PCI_LS_ERR);
c687bc
+            s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
c687bc
+        }
c687bc
     }
c687bc
     return 0;
c687bc
 }
c687bc
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
c687bc
index 0eb22ffec4c..01c1e8ac89a 100644
c687bc
--- a/hw/s390x/s390-pci-vfio.c
c687bc
+++ b/hw/s390x/s390-pci-vfio.c
c687bc
@@ -12,7 +12,9 @@
c687bc
 #include <sys/ioctl.h>
c687bc
 
c687bc
 #include "qemu/osdep.h"
c687bc
+#include "hw/s390x/s390-pci-bus.h"
c687bc
 #include "hw/s390x/s390-pci-vfio.h"
c687bc
+#include "hw/vfio/pci.h"
c687bc
 #include "hw/vfio/vfio-common.h"
c687bc
 
c687bc
 /*
c687bc
@@ -52,3 +54,43 @@ retry:
c687bc
     return vfio_get_info_dma_avail(info, avail);
c687bc
 }
c687bc
 
c687bc
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
c687bc
+                                          S390PCIBusDevice *pbdev)
c687bc
+{
c687bc
+    S390PCIDMACount *cnt;
c687bc
+    uint32_t avail;
c687bc
+    VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
c687bc
+    int id;
c687bc
+
c687bc
+    assert(vpdev);
c687bc
+
c687bc
+    id = vpdev->vbasedev.group->container->fd;
c687bc
+
c687bc
+    if (!s390_pci_update_dma_avail(id, &avail)) {
c687bc
+        return NULL;
c687bc
+    }
c687bc
+
c687bc
+    QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
c687bc
+        if (cnt->id  == id) {
c687bc
+            cnt->users++;
c687bc
+            return cnt;
c687bc
+        }
c687bc
+    }
c687bc
+
c687bc
+    cnt = g_new0(S390PCIDMACount, 1);
c687bc
+    cnt->id = id;
c687bc
+    cnt->users = 1;
c687bc
+    cnt->avail = avail;
c687bc
+    QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
c687bc
+    return cnt;
c687bc
+}
c687bc
+
c687bc
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
c687bc
+{
c687bc
+    assert(cnt);
c687bc
+
c687bc
+    cnt->users--;
c687bc
+    if (cnt->users == 0) {
c687bc
+        QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
c687bc
+    }
c687bc
+}
c687bc
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
c687bc
index 550f3cc5e92..2f2edbd0bf3 100644
c687bc
--- a/include/hw/s390x/s390-pci-bus.h
c687bc
+++ b/include/hw/s390x/s390-pci-bus.h
c687bc
@@ -266,6 +266,13 @@ typedef struct S390IOTLBEntry {
c687bc
 } S390IOTLBEntry;
c687bc
 
c687bc
 typedef struct S390PCIBusDevice S390PCIBusDevice;
c687bc
+typedef struct S390PCIDMACount {
c687bc
+    int id;
c687bc
+    int users;
c687bc
+    uint32_t avail;
c687bc
+    QTAILQ_ENTRY(S390PCIDMACount) link;
c687bc
+} S390PCIDMACount;
c687bc
+
c687bc
 typedef struct S390PCIIOMMU {
c687bc
     Object parent_obj;
c687bc
     S390PCIBusDevice *pbdev;
c687bc
@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU {
c687bc
     uint64_t pba;
c687bc
     uint64_t pal;
c687bc
     GHashTable *iotlb;
c687bc
+    S390PCIDMACount *dma_limit;
c687bc
 } S390PCIIOMMU;
c687bc
 
c687bc
 typedef struct S390PCIIOMMUTable {
c687bc
@@ -352,6 +360,7 @@ typedef struct S390pciState {
c687bc
     GHashTable *zpci_table;
c687bc
     QTAILQ_HEAD(, SeiContainer) pending_sei;
c687bc
     QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
c687bc
+    QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
c687bc
 } S390pciState;
c687bc
 
c687bc
 S390pciState *s390_get_phb(void);
c687bc
diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h
c687bc
index fa3bf8b5aad..8ee3a3c2375 100644
c687bc
--- a/include/hw/s390x/s390-pci-inst.h
c687bc
+++ b/include/hw/s390x/s390-pci-inst.h
c687bc
@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp {
c687bc
 #define ZPCI_STPCIFC_ST_INVAL_DMAAS   28
c687bc
 #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
c687bc
 
c687bc
+/* Refresh PCI Translations status codes */
c687bc
+#define ZPCI_RPCIT_ST_INSUFF_RES      16
c687bc
+
c687bc
 /* FIB function controls */
c687bc
 #define ZPCI_FIB_FC_ENABLED     0x80
c687bc
 #define ZPCI_FIB_FC_ERROR       0x40
c687bc
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
c687bc
index 1727292e9b5..539bcf04eb5 100644
c687bc
--- a/include/hw/s390x/s390-pci-vfio.h
c687bc
+++ b/include/hw/s390x/s390-pci-vfio.h
c687bc
@@ -12,13 +12,25 @@
c687bc
 #ifndef HW_S390_PCI_VFIO_H
c687bc
 #define HW_S390_PCI_VFIO_H
c687bc
 
c687bc
+#include "hw/s390x/s390-pci-bus.h"
c687bc
+
c687bc
 #ifdef CONFIG_LINUX
c687bc
 bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
c687bc
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
c687bc
+                                          S390PCIBusDevice *pbdev);
c687bc
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
c687bc
 #else
c687bc
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
c687bc
 {
c687bc
     return false;
c687bc
 }
c687bc
+static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
c687bc
+                                                        S390PCIBusDevice *pbdev)
c687bc
+{
c687bc
+    return NULL;
c687bc
+}
c687bc
+static inline void s390_pci_end_dma_count(S390pciState *s,
c687bc
+                                          S390PCIDMACount *cnt) { }
c687bc
 #endif
c687bc
 
c687bc
 #endif
c687bc
-- 
c687bc
2.27.0
c687bc