thebeanogamer / rpms / qemu-kvm

Forked from rpms/qemu-kvm 5 months ago
Clone

Blame SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch

ed5979
From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001
ed5979
From: Stefan Hajnoczi <stefanha@redhat.com>
ed5979
Date: Tue, 21 Feb 2023 16:22:18 -0500
ed5979
Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread
ed5979
ed5979
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
ed5979
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
ed5979
RH-Bugzilla: 2155748
ed5979
RH-Acked-by: Eric Blake <eblake@redhat.com>
ed5979
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
ed5979
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
ed5979
RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm)
ed5979
ed5979
When an IOThread is configured, the ctrl virtqueue is processed in the
ed5979
IOThread. TMFs that reset SCSI devices are currently called directly
ed5979
from the IOThread and trigger an assertion failure in blk_drain() from
ed5979
the following call stack:
ed5979
ed5979
virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset
ed5979
-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain
ed5979
ed5979
  ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed.
ed5979
ed5979
The blk_drain() function is not designed to be called from an IOThread
ed5979
because it needs the Big QEMU Lock (BQL).
ed5979
ed5979
This patch defers TMFs that reset SCSI devices to a Bottom Half (BH)
ed5979
that runs in the main loop thread under the BQL. This way it's safe to
ed5979
call blk_drain() and the assertion failure is avoided.
ed5979
ed5979
Introduce s->tmf_bh_list for tracking TMF requests that have been
ed5979
deferred to the BH. When the BH runs it will grab the entire list and
ed5979
process all requests. Care must be taken to clear the list when the
ed5979
virtio-scsi device is reset or unrealized. Otherwise deferred TMF
ed5979
requests could execute later and lead to use-after-free or other
ed5979
undefined behavior.
ed5979
ed5979
The s->resetting counter that's used by TMFs that reset SCSI devices is
ed5979
accessed from multiple threads. This patch makes that explicit by using
ed5979
atomic accessor functions. With this patch applied the counter is only
ed5979
modified by the main loop thread under the BQL but can be read by any
ed5979
thread.
ed5979
ed5979
Reported-by: Qing Wang <qinwang@redhat.com>
ed5979
Cc: Paolo Bonzini <pbonzini@redhat.com>
ed5979
Reviewed-by: Eric Blake <eblake@redhat.com>
ed5979
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
ed5979
Message-Id: <20230221212218.1378734-4-stefanha@redhat.com>
ed5979
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ed5979
(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5)
ed5979
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
ed5979
---
ed5979
 hw/scsi/virtio-scsi.c           | 169 +++++++++++++++++++++++++-------
ed5979
 include/hw/virtio/virtio-scsi.h |  11 ++-
ed5979
 2 files changed, 143 insertions(+), 37 deletions(-)
ed5979
ed5979
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
ed5979
index 6f6e2e32ba..7d27e4c2a1 100644
ed5979
--- a/hw/scsi/virtio-scsi.c
ed5979
+++ b/hw/scsi/virtio-scsi.c
ed5979
@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq {
ed5979
     QEMUSGList qsgl;
ed5979
     QEMUIOVector resp_iov;
ed5979
 
ed5979
-    union {
ed5979
-        /* Used for two-stage request submission */
ed5979
-        QTAILQ_ENTRY(VirtIOSCSIReq) next;
ed5979
+    /* Used for two-stage request submission and TMFs deferred to BH */
ed5979
+    QTAILQ_ENTRY(VirtIOSCSIReq) next;
ed5979
 
ed5979
-        /* Used for cancellation of request during TMFs */
ed5979
-        int remaining;
ed5979
-    };
ed5979
+    /* Used for cancellation of request during TMFs */
ed5979
+    int remaining;
ed5979
 
ed5979
     SCSIRequest *sreq;
ed5979
     size_t resp_size;
ed5979
@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
ed5979
     }
ed5979
 }
ed5979
 
ed5979
+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
ed5979
+{
ed5979
+    VirtIOSCSI *s = req->dev;
ed5979
+    SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
ed5979
+    BusChild *kid;
ed5979
+    int target;
ed5979
+
ed5979
+    switch (req->req.tmf.subtype) {
ed5979
+    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
ed5979
+        if (!d) {
ed5979
+            req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
ed5979
+            goto out;
ed5979
+        }
ed5979
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
ed5979
+            req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
ed5979
+            goto out;
ed5979
+        }
ed5979
+        qatomic_inc(&s->resetting);
ed5979
+        device_cold_reset(&d->qdev);
ed5979
+        qatomic_dec(&s->resetting);
ed5979
+        break;
ed5979
+
ed5979
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
ed5979
+        target = req->req.tmf.lun[1];
ed5979
+        qatomic_inc(&s->resetting);
ed5979
+
ed5979
+        rcu_read_lock();
ed5979
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
ed5979
+            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
ed5979
+            if (d1->channel == 0 && d1->id == target) {
ed5979
+                device_cold_reset(&d1->qdev);
ed5979
+            }
ed5979
+        }
ed5979
+        rcu_read_unlock();
ed5979
+
ed5979
+        qatomic_dec(&s->resetting);
ed5979
+        break;
ed5979
+
ed5979
+    default:
ed5979
+        g_assert_not_reached();
ed5979
+        break;
ed5979
+    }
ed5979
+
ed5979
+out:
ed5979
+    object_unref(OBJECT(d));
ed5979
+
ed5979
+    virtio_scsi_acquire(s);
ed5979
+    virtio_scsi_complete_req(req);
ed5979
+    virtio_scsi_release(s);
ed5979
+}
ed5979
+
ed5979
+/* Some TMFs must be processed from the main loop thread */
ed5979
+static void virtio_scsi_do_tmf_bh(void *opaque)
ed5979
+{
ed5979
+    VirtIOSCSI *s = opaque;
ed5979
+    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
ed5979
+    VirtIOSCSIReq *req;
ed5979
+    VirtIOSCSIReq *tmp;
ed5979
+
ed5979
+    GLOBAL_STATE_CODE();
ed5979
+
ed5979
+    virtio_scsi_acquire(s);
ed5979
+
ed5979
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
ed5979
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
ed5979
+        QTAILQ_INSERT_TAIL(&reqs, req, next);
ed5979
+    }
ed5979
+
ed5979
+    qemu_bh_delete(s->tmf_bh);
ed5979
+    s->tmf_bh = NULL;
ed5979
+
ed5979
+    virtio_scsi_release(s);
ed5979
+
ed5979
+    QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
ed5979
+        QTAILQ_REMOVE(&reqs, req, next);
ed5979
+        virtio_scsi_do_one_tmf_bh(req);
ed5979
+    }
ed5979
+}
ed5979
+
ed5979
+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
ed5979
+{
ed5979
+    VirtIOSCSIReq *req;
ed5979
+    VirtIOSCSIReq *tmp;
ed5979
+
ed5979
+    GLOBAL_STATE_CODE();
ed5979
+
ed5979
+    virtio_scsi_acquire(s);
ed5979
+
ed5979
+    if (s->tmf_bh) {
ed5979
+        qemu_bh_delete(s->tmf_bh);
ed5979
+        s->tmf_bh = NULL;
ed5979
+    }
ed5979
+
ed5979
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
ed5979
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
ed5979
+
ed5979
+        /* SAM-6 6.3.2 Hard reset */
ed5979
+        req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
ed5979
+        virtio_scsi_complete_req(req);
ed5979
+    }
ed5979
+
ed5979
+    virtio_scsi_release(s);
ed5979
+}
ed5979
+
ed5979
+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
ed5979
+{
ed5979
+    VirtIOSCSI *s = req->dev;
ed5979
+
ed5979
+    QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
ed5979
+
ed5979
+    if (!s->tmf_bh) {
ed5979
+        s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
ed5979
+        qemu_bh_schedule(s->tmf_bh);
ed5979
+    }
ed5979
+}
ed5979
+
ed5979
 /* Return 0 if the request is ready to be completed and return to guest;
ed5979
  * -EINPROGRESS if the request is submitted and will be completed later, in the
ed5979
  *  case of async cancellation. */
ed5979
@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
ed5979
 {
ed5979
     SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
ed5979
     SCSIRequest *r, *next;
ed5979
-    BusChild *kid;
ed5979
-    int target;
ed5979
     int ret = 0;
ed5979
 
ed5979
     virtio_scsi_ctx_check(s, d);
ed5979
@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
ed5979
         break;
ed5979
 
ed5979
     case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
ed5979
-        if (!d) {
ed5979
-            goto fail;
ed5979
-        }
ed5979
-        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
ed5979
-            goto incorrect_lun;
ed5979
-        }
ed5979
-        s->resetting++;
ed5979
-        device_cold_reset(&d->qdev);
ed5979
-        s->resetting--;
ed5979
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
ed5979
+        virtio_scsi_defer_tmf_to_bh(req);
ed5979
+        ret = -EINPROGRESS;
ed5979
         break;
ed5979
 
ed5979
     case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
ed5979
@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
ed5979
         }
ed5979
         break;
ed5979
 
ed5979
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
ed5979
-        target = req->req.tmf.lun[1];
ed5979
-        s->resetting++;
ed5979
-
ed5979
-        rcu_read_lock();
ed5979
-        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
ed5979
-            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
ed5979
-            if (d1->channel == 0 && d1->id == target) {
ed5979
-                device_cold_reset(&d1->qdev);
ed5979
-            }
ed5979
-        }
ed5979
-        rcu_read_unlock();
ed5979
-
ed5979
-        s->resetting--;
ed5979
-        break;
ed5979
-
ed5979
     case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
ed5979
     default:
ed5979
         req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
ed5979
@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r)
ed5979
     if (!req) {
ed5979
         return;
ed5979
     }
ed5979
-    if (req->dev->resetting) {
ed5979
+    if (qatomic_read(&req->dev->resetting)) {
ed5979
         req->resp.cmd.response = VIRTIO_SCSI_S_RESET;
ed5979
     } else {
ed5979
         req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED;
ed5979
@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
ed5979
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
ed5979
 
ed5979
     assert(!s->dataplane_started);
ed5979
-    s->resetting++;
ed5979
+
ed5979
+    virtio_scsi_reset_tmf_bh(s);
ed5979
+
ed5979
+    qatomic_inc(&s->resetting);
ed5979
     bus_cold_reset(BUS(&s->bus));
ed5979
-    s->resetting--;
ed5979
+    qatomic_dec(&s->resetting);
ed5979
 
ed5979
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
ed5979
     vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
ed5979
@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
ed5979
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
ed5979
     Error *err = NULL;
ed5979
 
ed5979
+    QTAILQ_INIT(&s->tmf_bh_list);
ed5979
+
ed5979
     virtio_scsi_common_realize(dev,
ed5979
                                virtio_scsi_handle_ctrl,
ed5979
                                virtio_scsi_handle_event,
ed5979
@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
ed5979
 {
ed5979
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
ed5979
 
ed5979
+    virtio_scsi_reset_tmf_bh(s);
ed5979
+
ed5979
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
ed5979
     virtio_scsi_common_unrealize(dev);
ed5979
 }
ed5979
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
ed5979
index a36aad9c86..1c1cd77d6e 100644
ed5979
--- a/include/hw/virtio/virtio-scsi.h
ed5979
+++ b/include/hw/virtio/virtio-scsi.h
ed5979
@@ -75,13 +75,22 @@ struct VirtIOSCSICommon {
ed5979
     VirtQueue **cmd_vqs;
ed5979
 };
ed5979
 
ed5979
+struct VirtIOSCSIReq;
ed5979
+
ed5979
 struct VirtIOSCSI {
ed5979
     VirtIOSCSICommon parent_obj;
ed5979
 
ed5979
     SCSIBus bus;
ed5979
-    int resetting;
ed5979
+    int resetting; /* written from main loop thread, read from any thread */
ed5979
     bool events_dropped;
ed5979
 
ed5979
+    /*
ed5979
+     * TMFs deferred to main loop BH. These fields are protected by
ed5979
+     * virtio_scsi_acquire().
ed5979
+     */
ed5979
+    QEMUBH *tmf_bh;
ed5979
+    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
ed5979
+
ed5979
     /* Fields for dataplane below */
ed5979
     AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
ed5979
 
ed5979
-- 
ed5979
2.39.1
ed5979