97168e
From 62fbb66d18f598d0896164383aab465e093fb0c1 Mon Sep 17 00:00:00 2001
97168e
From: Matthew Rosato <mjrosato@linux.ibm.com>
97168e
Date: Fri, 2 Sep 2022 13:27:32 -0400
97168e
Subject: [PATCH 07/42] s390x/pci: enable for load/store interpretation
97168e
MIME-Version: 1.0
97168e
Content-Type: text/plain; charset=UTF-8
97168e
Content-Transfer-Encoding: 8bit
97168e
97168e
RH-Author: Cédric Le Goater <clg@redhat.com>
97168e
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
97168e
RH-Bugzilla: 1664378 2043909
97168e
RH-Acked-by: Thomas Huth <thuth@redhat.com>
97168e
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
97168e
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
97168e
RH-Commit: [7/41] 3a96e901e295bb9e0c530638c45b5da5d60c00bd
97168e
97168e
If the ZPCI_OP ioctl reports that is is available and usable, then the
97168e
underlying KVM host will enable load/store intepretation for any guest
97168e
device without a SHM bit in the guest function handle.  For a device that
97168e
will be using interpretation support, ensure the guest function handle
97168e
matches the host function handle; this value is re-checked every time the
97168e
guest issues a SET PCI FN to enable the guest device as it is the only
97168e
opportunity to reflect function handle changes.
97168e
97168e
By default, unless interpret=off is specified, interpretation support will
97168e
always be assumed and exploited if the necessary ioctl and features are
97168e
available on the host kernel.  When these are unavailable, we will silently
97168e
revert to the interception model; this allows existing guest configurations
97168e
to work unmodified on hosts with and without zPCI interpretation support,
97168e
allowing QEMU to choose the best support model available.
97168e
97168e
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
97168e
Acked-by: Thomas Huth <thuth@redhat.com>
97168e
Message-Id: <20220902172737.170349-4-mjrosato@linux.ibm.com>
97168e
Signed-off-by: Thomas Huth <thuth@redhat.com>
97168e
(cherry picked from commit dd1d5fd9684beeb0c14c39f497ef2aa9ac683aa7)
97168e
Signed-off-by: Cédric Le Goater <clg@redhat.com>
97168e
---
97168e
 hw/s390x/meson.build            |  1 +
97168e
 hw/s390x/s390-pci-bus.c         | 66 ++++++++++++++++++++++++++++++++-
97168e
 hw/s390x/s390-pci-inst.c        | 16 ++++++++
97168e
 hw/s390x/s390-pci-kvm.c         | 22 +++++++++++
97168e
 include/hw/s390x/s390-pci-bus.h |  1 +
97168e
 include/hw/s390x/s390-pci-kvm.h | 24 ++++++++++++
97168e
 target/s390x/kvm/kvm.c          |  7 ++++
97168e
 target/s390x/kvm/kvm_s390x.h    |  1 +
97168e
 8 files changed, 137 insertions(+), 1 deletion(-)
97168e
 create mode 100644 hw/s390x/s390-pci-kvm.c
97168e
 create mode 100644 include/hw/s390x/s390-pci-kvm.h
97168e
97168e
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
97168e
index 28484256ec..6e6e47fcda 100644
97168e
--- a/hw/s390x/meson.build
97168e
+++ b/hw/s390x/meson.build
97168e
@@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
97168e
   's390-skeys-kvm.c',
97168e
   's390-stattrib-kvm.c',
97168e
   'pv.c',
97168e
+  's390-pci-kvm.c',
97168e
 ))
97168e
 s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
97168e
   'tod-tcg.c',
97168e
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
97168e
index 01b58ebc70..18bfae0465 100644
97168e
--- a/hw/s390x/s390-pci-bus.c
97168e
+++ b/hw/s390x/s390-pci-bus.c
97168e
@@ -16,6 +16,7 @@
97168e
 #include "qapi/visitor.h"
97168e
 #include "hw/s390x/s390-pci-bus.h"
97168e
 #include "hw/s390x/s390-pci-inst.h"
97168e
+#include "hw/s390x/s390-pci-kvm.h"
97168e
 #include "hw/s390x/s390-pci-vfio.h"
97168e
 #include "hw/pci/pci_bus.h"
97168e
 #include "hw/qdev-properties.h"
97168e
@@ -971,12 +972,51 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
97168e
     }
97168e
 }
97168e
 
97168e
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
97168e
+{
97168e
+    uint32_t idx, fh;
97168e
+
97168e
+    if (!s390_pci_get_host_fh(pbdev, &fh)) {
97168e
+        return -EPERM;
97168e
+    }
97168e
+
97168e
+    /*
97168e
+     * The host device is already in an enabled state, but we always present
97168e
+     * the initial device state to the guest as disabled (ZPCI_FS_DISABLED).
97168e
+     * Therefore, mask off the enable bit from the passthrough handle until
97168e
+     * the guest issues a CLP SET PCI FN later to enable the device.
97168e
+     */
97168e
+    pbdev->fh = fh & ~FH_MASK_ENABLE;
97168e
+
97168e
+    /* Next, see if the idx is already in-use */
97168e
+    idx = pbdev->fh & FH_MASK_INDEX;
97168e
+    if (pbdev->idx != idx) {
97168e
+        if (s390_pci_find_dev_by_idx(s, idx)) {
97168e
+            return -EINVAL;
97168e
+        }
97168e
+        /*
97168e
+         * Update the idx entry with the passed through idx
97168e
+         * If the relinquished idx is lower than next_idx, use it
97168e
+         * to replace next_idx
97168e
+         */
97168e
+        g_hash_table_remove(s->zpci_table, &pbdev->idx);
97168e
+        if (idx < s->next_idx) {
97168e
+            s->next_idx = idx;
97168e
+        }
97168e
+        pbdev->idx = idx;
97168e
+        g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
97168e
+    }
97168e
+
97168e
+    return 0;
97168e
+}
97168e
+
97168e
 static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
97168e
                               Error **errp)
97168e
 {
97168e
     S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
97168e
     PCIDevice *pdev = NULL;
97168e
     S390PCIBusDevice *pbdev = NULL;
97168e
+    int rc;
97168e
 
97168e
     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
97168e
         PCIBridge *pb = PCI_BRIDGE(dev);
97168e
@@ -1022,12 +1062,35 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
97168e
         set_pbdev_info(pbdev);
97168e
 
97168e
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
97168e
-            pbdev->fh |= FH_SHM_VFIO;
97168e
+            /*
97168e
+             * By default, interpretation is always requested; if the available
97168e
+             * facilities indicate it is not available, fallback to the
97168e
+             * interception model.
97168e
+             */
97168e
+            if (pbdev->interp) {
97168e
+                if (s390_pci_kvm_interp_allowed()) {
97168e
+                    rc = s390_pci_interp_plug(s, pbdev);
97168e
+                    if (rc) {
97168e
+                        error_setg(errp, "Plug failed for zPCI device in "
97168e
+                                   "interpretation mode: %d", rc);
97168e
+                        return;
97168e
+                    }
97168e
+                } else {
97168e
+                    DPRINTF("zPCI interpretation facilities missing.\n");
97168e
+                    pbdev->interp = false;
97168e
+                }
97168e
+            }
97168e
             pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
97168e
             /* Fill in CLP information passed via the vfio region */
97168e
             s390_pci_get_clp_info(pbdev);
97168e
+            if (!pbdev->interp) {
97168e
+                /* Do vfio passthrough but intercept for I/O */
97168e
+                pbdev->fh |= FH_SHM_VFIO;
97168e
+            }
97168e
         } else {
97168e
             pbdev->fh |= FH_SHM_EMUL;
97168e
+            /* Always intercept emulated devices */
97168e
+            pbdev->interp = false;
97168e
         }
97168e
 
97168e
         if (s390_pci_msix_init(pbdev)) {
97168e
@@ -1360,6 +1423,7 @@ static Property s390_pci_device_properties[] = {
97168e
     DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
97168e
     DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
97168e
     DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
97168e
+    DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true),
97168e
     DEFINE_PROP_END_OF_LIST(),
97168e
 };
97168e
 
97168e
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
97168e
index 6d400d4147..651ec38635 100644
97168e
--- a/hw/s390x/s390-pci-inst.c
97168e
+++ b/hw/s390x/s390-pci-inst.c
97168e
@@ -18,6 +18,8 @@
97168e
 #include "sysemu/hw_accel.h"
97168e
 #include "hw/s390x/s390-pci-inst.h"
97168e
 #include "hw/s390x/s390-pci-bus.h"
97168e
+#include "hw/s390x/s390-pci-kvm.h"
97168e
+#include "hw/s390x/s390-pci-vfio.h"
97168e
 #include "hw/s390x/tod.h"
97168e
 
97168e
 #ifndef DEBUG_S390PCI_INST
97168e
@@ -246,6 +248,20 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
97168e
                 goto out;
97168e
             }
97168e
 
97168e
+            /*
97168e
+             * Take this opportunity to make sure we still have an accurate
97168e
+             * host fh.  It's possible part of the handle changed while the
97168e
+             * device was disabled to the guest (e.g. vfio hot reset for
97168e
+             * ISM during plug)
97168e
+             */
97168e
+            if (pbdev->interp) {
97168e
+                /* Take this opportunity to make sure we are sync'd with host */
97168e
+                if (!s390_pci_get_host_fh(pbdev, &pbdev->fh) ||
97168e
+                    !(pbdev->fh & FH_MASK_ENABLE)) {
97168e
+                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH);
97168e
+                    goto out;
97168e
+                }
97168e
+            }
97168e
             pbdev->fh |= FH_MASK_ENABLE;
97168e
             pbdev->state = ZPCI_FS_ENABLED;
97168e
             stl_p(&ressetpci->fh, pbdev->fh);
97168e
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
97168e
new file mode 100644
97168e
index 0000000000..0f16104a74
97168e
--- /dev/null
97168e
+++ b/hw/s390x/s390-pci-kvm.c
97168e
@@ -0,0 +1,22 @@
97168e
+/*
97168e
+ * s390 zPCI KVM interfaces
97168e
+ *
97168e
+ * Copyright 2022 IBM Corp.
97168e
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
97168e
+ *
97168e
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
97168e
+ * your option) any later version. See the COPYING file in the top-level
97168e
+ * directory.
97168e
+ */
97168e
+
97168e
+#include "qemu/osdep.h"
97168e
+
97168e
+#include "kvm/kvm_s390x.h"
97168e
+#include "hw/s390x/pv.h"
97168e
+#include "hw/s390x/s390-pci-kvm.h"
97168e
+#include "cpu_models.h"
97168e
+
97168e
+bool s390_pci_kvm_interp_allowed(void)
97168e
+{
97168e
+    return kvm_s390_get_zpci_op() && !s390_is_pv();
97168e
+}
97168e
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
97168e
index da3cde2bb4..a9843dfe97 100644
97168e
--- a/include/hw/s390x/s390-pci-bus.h
97168e
+++ b/include/hw/s390x/s390-pci-bus.h
97168e
@@ -350,6 +350,7 @@ struct S390PCIBusDevice {
97168e
     IndAddr *indicator;
97168e
     bool pci_unplug_request_processed;
97168e
     bool unplug_requested;
97168e
+    bool interp;
97168e
     QTAILQ_ENTRY(S390PCIBusDevice) link;
97168e
 };
97168e
 
97168e
diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h
97168e
new file mode 100644
97168e
index 0000000000..80a2e7d0ca
97168e
--- /dev/null
97168e
+++ b/include/hw/s390x/s390-pci-kvm.h
97168e
@@ -0,0 +1,24 @@
97168e
+/*
97168e
+ * s390 PCI KVM interfaces
97168e
+ *
97168e
+ * Copyright 2022 IBM Corp.
97168e
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
97168e
+ *
97168e
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
97168e
+ * your option) any later version. See the COPYING file in the top-level
97168e
+ * directory.
97168e
+ */
97168e
+
97168e
+#ifndef HW_S390_PCI_KVM_H
97168e
+#define HW_S390_PCI_KVM_H
97168e
+
97168e
+#ifdef CONFIG_KVM
97168e
+bool s390_pci_kvm_interp_allowed(void);
97168e
+#else
97168e
+static inline bool s390_pci_kvm_interp_allowed(void)
97168e
+{
97168e
+    return false;
97168e
+}
97168e
+#endif
97168e
+
97168e
+#endif
97168e
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
97168e
index ba04997da1..30712487d4 100644
97168e
--- a/target/s390x/kvm/kvm.c
97168e
+++ b/target/s390x/kvm/kvm.c
97168e
@@ -158,6 +158,7 @@ static int cap_ri;
97168e
 static int cap_hpage_1m;
97168e
 static int cap_vcpu_resets;
97168e
 static int cap_protected;
97168e
+static int cap_zpci_op;
97168e
 
97168e
 static bool mem_op_storage_key_support;
97168e
 
97168e
@@ -363,6 +364,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
97168e
     cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ);
97168e
     cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
97168e
     cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
97168e
+    cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP);
97168e
 
97168e
     kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
97168e
     kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
97168e
@@ -2579,3 +2581,8 @@ bool kvm_arch_cpu_check_are_resettable(void)
97168e
 {
97168e
     return true;
97168e
 }
97168e
+
97168e
+int kvm_s390_get_zpci_op(void)
97168e
+{
97168e
+    return cap_zpci_op;
97168e
+}
97168e
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
97168e
index 05a5e1e6f4..aaae8570de 100644
97168e
--- a/target/s390x/kvm/kvm_s390x.h
97168e
+++ b/target/s390x/kvm/kvm_s390x.h
97168e
@@ -27,6 +27,7 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
97168e
 int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
97168e
 int kvm_s390_get_hpage_1m(void);
97168e
 int kvm_s390_get_ri(void);
97168e
+int kvm_s390_get_zpci_op(void);
97168e
 int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
97168e
 int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock);
97168e
 int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock);
97168e
-- 
97168e
2.37.3
97168e