8fced6
From 7ef9b9c593da98ad32ad20c28d17bb2700a35c29 Mon Sep 17 00:00:00 2001
8fced6
From: Cornelia Huck <cohuck@redhat.com>
8fced6
Date: Tue, 19 Jan 2021 12:50:45 -0500
8fced6
Subject: [PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
8fced6
8fced6
RH-Author: Cornelia Huck <cohuck@redhat.com>
8fced6
Message-id: <20210119125046.472811-7-cohuck@redhat.com>
8fced6
Patchwork-id: 100680
8fced6
O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/7] s390x/pci: Honor DMA limits set by vfio
8fced6
Bugzilla: 1905391
8fced6
RH-Acked-by: David Hildenbrand <david@redhat.com>
8fced6
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
8fced6
RH-Acked-by: Thomas Huth <thuth@redhat.com>
8fced6
8fced6
From: Matthew Rosato <mjrosato@linux.ibm.com>
8fced6
8fced6
When an s390 guest is using lazy unmapping, it can result in a very
8fced6
large number of oustanding DMA requests, far beyond the default
8fced6
limit configured for vfio.  Let's track DMA usage similar to vfio
8fced6
in the host, and trigger the guest to flush their DMA mappings
8fced6
before vfio runs out.
8fced6
8fced6
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
8fced6
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
8fced6
[aw: non-Linux build fixes]
8fced6
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
8fced6
(cherry picked from commit 37fa32de707340f3a93959ad5a1ebc41ba1520ee)
8fced6
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
8fced6
8fced6
 Conflicts:
8fced6
	hw/s390x/s390-pci-bus.c
8fced6
        --> adapt to missing 981c3dcd9489 ("qdev: Convert to
8fced6
            qdev_unrealize() with Coccinelle")
8fced6
	hw/s390x/s390-pci-inst.c
8fced6
        --> adapt to out of order inclusion of 5039caf3c449 ("memory:
8fced6
            Add IOMMUTLBEvent")
8fced6
	include/hw/s390x/s390-pci-bus.h
8fced6
        --> adapt to missing db1015e92e04 ("Move QOM typedefs and
8fced6
            add missing includes")
8fced6
8fced6
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
8fced6
---
8fced6
 hw/s390x/s390-pci-bus.c          | 16 ++++++++----
8fced6
 hw/s390x/s390-pci-inst.c         | 45 +++++++++++++++++++++++++++-----
8fced6
 hw/s390x/s390-pci-vfio.c         | 42 +++++++++++++++++++++++++++++
8fced6
 include/hw/s390x/s390-pci-bus.h  |  9 +++++++
8fced6
 include/hw/s390x/s390-pci-inst.h |  3 +++
8fced6
 include/hw/s390x/s390-pci-vfio.h | 12 +++++++++
8fced6
 6 files changed, 116 insertions(+), 11 deletions(-)
8fced6
8fced6
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
8fced6
index 6daef2b6d57..a9f6f550472 100644
8fced6
--- a/hw/s390x/s390-pci-bus.c
8fced6
+++ b/hw/s390x/s390-pci-bus.c
8fced6
@@ -17,6 +17,7 @@
8fced6
 #include "cpu.h"
8fced6
 #include "hw/s390x/s390-pci-bus.h"
8fced6
 #include "hw/s390x/s390-pci-inst.h"
8fced6
+#include "hw/s390x/s390-pci-vfio.h"
8fced6
 #include "hw/pci/pci_bus.h"
8fced6
 #include "hw/qdev-properties.h"
8fced6
 #include "hw/pci/pci_bridge.h"
8fced6
@@ -771,6 +772,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
8fced6
     s->bus_no = 0;
8fced6
     QTAILQ_INIT(&s->pending_sei);
8fced6
     QTAILQ_INIT(&s->zpci_devs);
8fced6
+    QTAILQ_INIT(&s->zpci_dma_limit);
8fced6
 
8fced6
     css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
8fced6
                              S390_ADAPTER_SUPPRESSIBLE, &local_err);
8fced6
@@ -951,17 +953,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
8fced6
             }
8fced6
         }
8fced6
 
8fced6
+        pbdev->pdev = pdev;
8fced6
+        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
8fced6
+        pbdev->iommu->pbdev = pbdev;
8fced6
+        pbdev->state = ZPCI_FS_DISABLED;
8fced6
+
8fced6
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
8fced6
             pbdev->fh |= FH_SHM_VFIO;
8fced6
+            pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
8fced6
         } else {
8fced6
             pbdev->fh |= FH_SHM_EMUL;
8fced6
         }
8fced6
 
8fced6
-        pbdev->pdev = pdev;
8fced6
-        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
8fced6
-        pbdev->iommu->pbdev = pbdev;
8fced6
-        pbdev->state = ZPCI_FS_DISABLED;
8fced6
-
8fced6
         if (s390_pci_msix_init(pbdev)) {
8fced6
             error_setg(errp, "MSI-X support is mandatory "
8fced6
                        "in the S390 architecture");
8fced6
@@ -1014,6 +1017,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
8fced6
         pbdev->fid = 0;
8fced6
         QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
8fced6
         g_hash_table_remove(s->zpci_table, &pbdev->idx);
8fced6
+        if (pbdev->iommu->dma_limit) {
8fced6
+            s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
8fced6
+        }
8fced6
         object_property_set_bool(OBJECT(dev), false, "realized", NULL);
8fced6
     }
8fced6
 }
8fced6
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
8fced6
index b1885344f18..edbdf727984 100644
8fced6
--- a/hw/s390x/s390-pci-inst.c
8fced6
+++ b/hw/s390x/s390-pci-inst.c
8fced6
@@ -32,6 +32,20 @@
8fced6
         }                                                          \
8fced6
     } while (0)
8fced6
 
8fced6
+static inline void inc_dma_avail(S390PCIIOMMU *iommu)
8fced6
+{
8fced6
+    if (iommu->dma_limit) {
8fced6
+        iommu->dma_limit->avail++;
8fced6
+    }
8fced6
+}
8fced6
+
8fced6
+static inline void dec_dma_avail(S390PCIIOMMU *iommu)
8fced6
+{
8fced6
+    if (iommu->dma_limit) {
8fced6
+        iommu->dma_limit->avail--;
8fced6
+    }
8fced6
+}
8fced6
+
8fced6
 static void s390_set_status_code(CPUS390XState *env,
8fced6
                                  uint8_t r, uint64_t status_code)
8fced6
 {
8fced6
@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
8fced6
     return 0;
8fced6
 }
8fced6
 
8fced6
-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
8fced6
+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
8fced6
+                                      S390IOTLBEntry *entry)
8fced6
 {
8fced6
     S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
8fced6
     IOMMUTLBEvent event = {
8fced6
@@ -588,14 +603,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
8fced6
 
8fced6
     if (event.type == IOMMU_NOTIFIER_UNMAP) {
8fced6
         if (!cache) {
8fced6
-            return;
8fced6
+            goto out;
8fced6
         }
8fced6
         g_hash_table_remove(iommu->iotlb, &entry->iova);
8fced6
+        inc_dma_avail(iommu);
8fced6
     } else {
8fced6
         if (cache) {
8fced6
             if (cache->perm == entry->perm &&
8fced6
                 cache->translated_addr == entry->translated_addr) {
8fced6
-                return;
8fced6
+                goto out;
8fced6
             }
8fced6
 
8fced6
             event.type = IOMMU_NOTIFIER_UNMAP;
8fced6
@@ -611,9 +627,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
8fced6
         cache->len = PAGE_SIZE;
8fced6
         cache->perm = entry->perm;
8fced6
         g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
8fced6
+        dec_dma_avail(iommu);
8fced6
     }
8fced6
 
8fced6
     memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
8fced6
+
8fced6
+out:
8fced6
+    return iommu->dma_limit ? iommu->dma_limit->avail : 1;
8fced6
 }
8fced6
 
8fced6
 int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
8fced6
@@ -625,6 +645,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
8fced6
     S390PCIIOMMU *iommu;
8fced6
     S390IOTLBEntry entry;
8fced6
     hwaddr start, end;
8fced6
+    uint32_t dma_avail;
8fced6
 
8fced6
     if (env->psw.mask & PSW_MASK_PSTATE) {
8fced6
         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
8fced6
@@ -663,6 +684,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
8fced6
     }
8fced6
 
8fced6
     iommu = pbdev->iommu;
8fced6
+    if (iommu->dma_limit) {
8fced6
+        dma_avail = iommu->dma_limit->avail;
8fced6
+    } else {
8fced6
+        dma_avail = 1;
8fced6
+    }
8fced6
     if (!iommu->g_iota) {
8fced6
         error = ERR_EVENT_INVALAS;
8fced6
         goto err;
8fced6
@@ -680,8 +706,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
8fced6
         }
8fced6
 
8fced6
         start += entry.len;
8fced6
-        while (entry.iova < start && entry.iova < end) {
8fced6
-            s390_pci_update_iotlb(iommu, &entry);
8fced6
+        while (entry.iova < start && entry.iova < end &&
8fced6
+               (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
8fced6
+            dma_avail = s390_pci_update_iotlb(iommu, &entry);
8fced6
             entry.iova += PAGE_SIZE;
8fced6
             entry.translated_addr += PAGE_SIZE;
8fced6
         }
8fced6
@@ -694,7 +721,13 @@ err:
8fced6
         s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
8fced6
     } else {
8fced6
         pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
8fced6
-        setcc(cpu, ZPCI_PCI_LS_OK);
8fced6
+        if (dma_avail > 0) {
8fced6
+            setcc(cpu, ZPCI_PCI_LS_OK);
8fced6
+        } else {
8fced6
+            /* vfio DMA mappings are exhausted, trigger a RPCIT */
8fced6
+            setcc(cpu, ZPCI_PCI_LS_ERR);
8fced6
+            s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
8fced6
+        }
8fced6
     }
8fced6
     return 0;
8fced6
 }
8fced6
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
8fced6
index 0eb22ffec4c..01c1e8ac89a 100644
8fced6
--- a/hw/s390x/s390-pci-vfio.c
8fced6
+++ b/hw/s390x/s390-pci-vfio.c
8fced6
@@ -12,7 +12,9 @@
8fced6
 #include <sys/ioctl.h>
8fced6
 
8fced6
 #include "qemu/osdep.h"
8fced6
+#include "hw/s390x/s390-pci-bus.h"
8fced6
 #include "hw/s390x/s390-pci-vfio.h"
8fced6
+#include "hw/vfio/pci.h"
8fced6
 #include "hw/vfio/vfio-common.h"
8fced6
 
8fced6
 /*
8fced6
@@ -52,3 +54,43 @@ retry:
8fced6
     return vfio_get_info_dma_avail(info, avail);
8fced6
 }
8fced6
 
8fced6
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
8fced6
+                                          S390PCIBusDevice *pbdev)
8fced6
+{
8fced6
+    S390PCIDMACount *cnt;
8fced6
+    uint32_t avail;
8fced6
+    VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
8fced6
+    int id;
8fced6
+
8fced6
+    assert(vpdev);
8fced6
+
8fced6
+    id = vpdev->vbasedev.group->container->fd;
8fced6
+
8fced6
+    if (!s390_pci_update_dma_avail(id, &avail)) {
8fced6
+        return NULL;
8fced6
+    }
8fced6
+
8fced6
+    QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
8fced6
+        if (cnt->id  == id) {
8fced6
+            cnt->users++;
8fced6
+            return cnt;
8fced6
+        }
8fced6
+    }
8fced6
+
8fced6
+    cnt = g_new0(S390PCIDMACount, 1);
8fced6
+    cnt->id = id;
8fced6
+    cnt->users = 1;
8fced6
+    cnt->avail = avail;
8fced6
+    QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
8fced6
+    return cnt;
8fced6
+}
8fced6
+
8fced6
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
8fced6
+{
8fced6
+    assert(cnt);
8fced6
+
8fced6
+    cnt->users--;
8fced6
+    if (cnt->users == 0) {
8fced6
+        QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
8fced6
+    }
8fced6
+}
8fced6
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
8fced6
index 550f3cc5e92..2f2edbd0bf3 100644
8fced6
--- a/include/hw/s390x/s390-pci-bus.h
8fced6
+++ b/include/hw/s390x/s390-pci-bus.h
8fced6
@@ -266,6 +266,13 @@ typedef struct S390IOTLBEntry {
8fced6
 } S390IOTLBEntry;
8fced6
 
8fced6
 typedef struct S390PCIBusDevice S390PCIBusDevice;
8fced6
+typedef struct S390PCIDMACount {
8fced6
+    int id;
8fced6
+    int users;
8fced6
+    uint32_t avail;
8fced6
+    QTAILQ_ENTRY(S390PCIDMACount) link;
8fced6
+} S390PCIDMACount;
8fced6
+
8fced6
 typedef struct S390PCIIOMMU {
8fced6
     Object parent_obj;
8fced6
     S390PCIBusDevice *pbdev;
8fced6
@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU {
8fced6
     uint64_t pba;
8fced6
     uint64_t pal;
8fced6
     GHashTable *iotlb;
8fced6
+    S390PCIDMACount *dma_limit;
8fced6
 } S390PCIIOMMU;
8fced6
 
8fced6
 typedef struct S390PCIIOMMUTable {
8fced6
@@ -352,6 +360,7 @@ typedef struct S390pciState {
8fced6
     GHashTable *zpci_table;
8fced6
     QTAILQ_HEAD(, SeiContainer) pending_sei;
8fced6
     QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
8fced6
+    QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
8fced6
 } S390pciState;
8fced6
 
8fced6
 S390pciState *s390_get_phb(void);
8fced6
diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h
8fced6
index fa3bf8b5aad..8ee3a3c2375 100644
8fced6
--- a/include/hw/s390x/s390-pci-inst.h
8fced6
+++ b/include/hw/s390x/s390-pci-inst.h
8fced6
@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp {
8fced6
 #define ZPCI_STPCIFC_ST_INVAL_DMAAS   28
8fced6
 #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
8fced6
 
8fced6
+/* Refresh PCI Translations status codes */
8fced6
+#define ZPCI_RPCIT_ST_INSUFF_RES      16
8fced6
+
8fced6
 /* FIB function controls */
8fced6
 #define ZPCI_FIB_FC_ENABLED     0x80
8fced6
 #define ZPCI_FIB_FC_ERROR       0x40
8fced6
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
8fced6
index 1727292e9b5..539bcf04eb5 100644
8fced6
--- a/include/hw/s390x/s390-pci-vfio.h
8fced6
+++ b/include/hw/s390x/s390-pci-vfio.h
8fced6
@@ -12,13 +12,25 @@
8fced6
 #ifndef HW_S390_PCI_VFIO_H
8fced6
 #define HW_S390_PCI_VFIO_H
8fced6
 
8fced6
+#include "hw/s390x/s390-pci-bus.h"
8fced6
+
8fced6
 #ifdef CONFIG_LINUX
8fced6
 bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
8fced6
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
8fced6
+                                          S390PCIBusDevice *pbdev);
8fced6
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
8fced6
 #else
8fced6
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
8fced6
 {
8fced6
     return false;
8fced6
 }
8fced6
+static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
8fced6
+                                                        S390PCIBusDevice *pbdev)
8fced6
+{
8fced6
+    return NULL;
8fced6
+}
8fced6
+static inline void s390_pci_end_dma_count(S390pciState *s,
8fced6
+                                          S390PCIDMACount *cnt) { }
8fced6
 #endif
8fced6
 
8fced6
 #endif
8fced6
-- 
8fced6
2.27.0
8fced6