Blame SOURCES/0097-scsi-scsi-qla2xxx-Fix-race-conditions-in-the-code-fo.patch

3c6e85
From 92d94d4d1779723fb5605f1d1fa235f86047da00 Mon Sep 17 00:00:00 2001
3c6e85
From: Himanshu Madhani <hmadhani@redhat.com>
3c6e85
Date: Thu, 1 Aug 2019 15:55:57 -0400
3c6e85
Subject: [PATCH 097/124] [scsi] scsi: qla2xxx: Fix race conditions in the code
3c6e85
 for aborting SCSI commands
3c6e85
3c6e85
Message-id: <20190801155618.12650-98-hmadhani@redhat.com>
3c6e85
Patchwork-id: 267896
3c6e85
O-Subject: [RHEL 7.8 e-stor PATCH 097/118] scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands
3c6e85
Bugzilla: 1729270
3c6e85
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
3c6e85
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
3c6e85
3c6e85
From: Bart Van Assche <bvanassche@acm.org>
3c6e85
3c6e85
Bugzilla 1729270
3c6e85
3c6e85
In the *_done() functions, instead of returning early if sp->ref_count >=
3c6e85
2, only decrement sp->ref_count. In qla2xxx_eh_abort(), instead of deciding
3c6e85
what to do based on the value of sp->ref_count, decide which action to take
3c6e85
depending on the completion status of the firmware abort. Remove srb.cwaitq
3c6e85
and use srb.comp instead. In qla2x00_abort_srb(), call
3c6e85
isp_ops->abort_command() directly instead of calling qla2xxx_eh_abort().
3c6e85
3c6e85
Cc: Himanshu Madhani <hmadhani@marvell.com>
3c6e85
Cc: Giridhar Malavali <gmalavali@marvell.com>
3c6e85
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
3c6e85
Acked-by: Himanshu Madhani <hmadhani@marvell.com>
3c6e85
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
3c6e85
(cherry picked from commit 219d27d7147e07fe899a781bd72f9180b78c3852)
3c6e85
Signed-off-by: Himanshu Madhani <hmadhani@redhat.com>
3c6e85
3c6e85
Conflicts:
3c6e85
	drivers/scsi/qla2xxx/qla_os.c
3c6e85
3c6e85
[HM: RHEL78 kernel source does not have commit 25ab0bc334b4 ]
3c6e85
[ ("scsi: sched/wait: Add wait_event_lock_irq_timeout for ]
3c6e85
[ TASK_UNINTERRUPTIBLE usage"). Since this macro was missing ]
3c6e85
[ commit 711a08d79f71 ("scsi: qla2xxx: Change abort wait_loop ]
3c6e85
[ from msleep to wait_event_timeout") was not backported. ]
3c6e85
[ This patch now removes code that was added by commit 711a08d79f71 ]
3c6e85
[ Due to skipped commit code shows deviation from upstream          ]
3c6e85
[ in qla_os.c, qla2xxx_qpair_sp_compl() and qla2x00_sp_free_dma()   ]
3c6e85
[ brings in source from commit 711a08d79f71, to set the cmd->result ]
3c6e85
[ and CMD_SP(cmd) also removes double qla2x00_rel_sp() ]
3c6e85
3c6e85
Signed-off-by: Himanshu Madhani <hmadhani@redhat.com>
3c6e85
Signed-off-by: Jan Stancek <jstancek@redhat.com>
3c6e85
---
3c6e85
 drivers/scsi/qla2xxx/qla_nvme.c |  34 +--------
3c6e85
 drivers/scsi/qla2xxx/qla_nvme.h |   1 -
3c6e85
 drivers/scsi/qla2xxx/qla_os.c   | 150 ++++++++++++++++------------------------
3c6e85
 3 files changed, 62 insertions(+), 123 deletions(-)
3c6e85
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
index 73d6b7833830..8ddd44bb6c7f 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
@@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
3c6e85
 		return;
3c6e85
 	}
3c6e85
 
3c6e85
-	if (!atomic_dec_and_test(&sp->ref_count))
3c6e85
-		return;
3c6e85
+	atomic_dec(&sp->ref_count);
3c6e85
 
3c6e85
 	if (res)
3c6e85
 		res = -EINVAL;
3c6e85
@@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
3c6e85
 	nvme = &sp->u.iocb_cmd;
3c6e85
 	fd = nvme->u.nvme.desc;
3c6e85
 
3c6e85
-	if (!atomic_dec_and_test(&sp->ref_count))
3c6e85
-		return;
3c6e85
+	atomic_dec(&sp->ref_count);
3c6e85
 
3c6e85
 	if (res == QLA_SUCCESS) {
3c6e85
 		fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
3c6e85
@@ -611,34 +609,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
3c6e85
 	.fcprqst_priv_sz = sizeof(struct nvme_private),
3c6e85
 };
3c6e85
 
3c6e85
-#define NVME_ABORT_POLLING_PERIOD    2
3c6e85
-static int qla_nvme_wait_on_command(srb_t *sp)
3c6e85
-{
3c6e85
-	int ret = QLA_SUCCESS;
3c6e85
-
3c6e85
-	wait_event_timeout(sp->nvme_ls_waitq, (atomic_read(&sp->ref_count) > 1),
3c6e85
-	    NVME_ABORT_POLLING_PERIOD*HZ);
3c6e85
-
3c6e85
-	if (atomic_read(&sp->ref_count) > 1)
3c6e85
-		ret = QLA_FUNCTION_FAILED;
3c6e85
-
3c6e85
-	return ret;
3c6e85
-}
3c6e85
-
3c6e85
-void qla_nvme_abort(struct qla_hw_data *ha, struct srb *sp, int res)
3c6e85
-{
3c6e85
-	int rval;
3c6e85
-
3c6e85
-	if (ha->flags.fw_started) {
3c6e85
-		rval = ha->isp_ops->abort_command(sp);
3c6e85
-		if (!rval && !qla_nvme_wait_on_command(sp))
3c6e85
-			ql_log(ql_log_warn, NULL, 0x2112,
3c6e85
-			    "timed out waiting on sp=%p\n", sp);
3c6e85
-	} else {
3c6e85
-		sp->done(sp, res);
3c6e85
-	}
3c6e85
-}
3c6e85
-
3c6e85
 static void qla_nvme_unregister_remote_port(struct work_struct *work)
3c6e85
 {
3c6e85
 	struct fc_port *fcport = container_of(work, struct fc_port,
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h
3c6e85
index da8dad5ad693..0db04f0a4d5d 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_nvme.h
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_nvme.h
3c6e85
@@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol {
3c6e85
 int qla_nvme_register_hba(struct scsi_qla_host *);
3c6e85
 int  qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
3c6e85
 void qla_nvme_delete(struct scsi_qla_host *);
3c6e85
-void qla_nvme_abort(struct qla_hw_data *, struct srb *sp, int res);
3c6e85
 void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
3c6e85
     struct req_que *);
3c6e85
 void qla24xx_async_gffid_sp_done(void *, int);
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
3c6e85
index a13798c4d178..a32074dd4727 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_os.c
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_os.c
3c6e85
@@ -728,7 +728,7 @@ qla2x00_sp_free_dma(void *ptr)
3c6e85
 	}
3c6e85
 
3c6e85
 	if (!ctx)
3c6e85
-		goto end;
3c6e85
+		return;
3c6e85
 
3c6e85
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
3c6e85
 		/* List assured to be having elements */
3c6e85
@@ -753,12 +753,6 @@ qla2x00_sp_free_dma(void *ptr)
3c6e85
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
3c6e85
 		mempool_free(ctx1, ha->ctx_mempool);
3c6e85
 	}
3c6e85
-
3c6e85
-end:
3c6e85
-	if (sp->type != SRB_NVME_CMD && sp->type != SRB_NVME_LS) {
3c6e85
-		CMD_SP(cmd) = NULL;
3c6e85
-		qla2x00_rel_sp(sp);
3c6e85
-	}
3c6e85
 }
3c6e85
 
3c6e85
 void
3c6e85
@@ -766,6 +760,7 @@ qla2x00_sp_compl(void *ptr, int res)
3c6e85
 {
3c6e85
 	srb_t *sp = ptr;
3c6e85
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3c6e85
+	struct completion *comp = sp->comp;
3c6e85
 
3c6e85
 	if (atomic_read(&sp->ref_count) == 0) {
3c6e85
 		ql_dbg(ql_dbg_io, sp->vha, 0x3015,
3c6e85
@@ -775,12 +770,15 @@ qla2x00_sp_compl(void *ptr, int res)
3c6e85
 			WARN_ON(atomic_read(&sp->ref_count) == 0);
3c6e85
 		return;
3c6e85
 	}
3c6e85
-	if (!atomic_dec_and_test(&sp->ref_count))
3c6e85
-		return;
3c6e85
+
3c6e85
+	atomic_dec(&sp->ref_count);
3c6e85
 
3c6e85
 	sp->free(sp);
3c6e85
 	cmd->result = res;
3c6e85
 	cmd->scsi_done(cmd);
3c6e85
+	if (comp)
3c6e85
+		complete(comp);
3c6e85
+	qla2x00_rel_sp(sp);
3c6e85
 }
3c6e85
 
3c6e85
 void
3c6e85
@@ -803,7 +801,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
3c6e85
 	}
3c6e85
 
3c6e85
 	if (!ctx)
3c6e85
-		goto end;
3c6e85
+		return;
3c6e85
 
3c6e85
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
3c6e85
 		/* List assured to be having elements */
3c6e85
@@ -865,10 +863,6 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
3c6e85
 		dma_pool_free(ha->dl_dma_pool, ctx, ctx0->crc_ctx_dma);
3c6e85
 		sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
3c6e85
 	}
3c6e85
-
3c6e85
-end:
3c6e85
-	CMD_SP(cmd) = NULL;
3c6e85
-	qla2xxx_rel_qpair_sp(sp->qpair, sp);
3c6e85
 }
3c6e85
 
3c6e85
 void
3c6e85
@@ -876,8 +870,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
3c6e85
 {
3c6e85
 	srb_t *sp = ptr;
3c6e85
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3c6e85
-
3c6e85
-	cmd->result = res;
3c6e85
+	struct completion *comp = sp->comp;
3c6e85
 
3c6e85
 	if (atomic_read(&sp->ref_count) == 0) {
3c6e85
 		ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
3c6e85
@@ -887,11 +880,16 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
3c6e85
 			WARN_ON(atomic_read(&sp->ref_count) == 0);
3c6e85
 		return;
3c6e85
 	}
3c6e85
-	if (!atomic_dec_and_test(&sp->ref_count))
3c6e85
-		return;
3c6e85
+
3c6e85
+	atomic_dec(&sp->ref_count);
3c6e85
 
3c6e85
 	sp->free(sp);
3c6e85
+	cmd->result = res;
3c6e85
+	CMD_SP(cmd) = NULL;
3c6e85
 	cmd->scsi_done(cmd);
3c6e85
+	if (comp)
3c6e85
+		complete(comp);
3c6e85
+	qla2xxx_rel_qpair_sp(sp->qpair, sp);
3c6e85
 }
3c6e85
 
3c6e85
 static int
3c6e85
@@ -1336,7 +1334,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 	int ret;
3c6e85
 	unsigned int id, lun;
3c6e85
 	unsigned long flags;
3c6e85
-	int rval, wait = 0;
3c6e85
+	int rval;
3c6e85
 	struct qla_hw_data *ha = vha->hw;
3c6e85
 	struct qla_qpair *qpair;
3c6e85
 
3c6e85
@@ -1349,7 +1347,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 	ret = fc_block_scsi_eh(cmd);
3c6e85
 	if (ret != 0)
3c6e85
 		return ret;
3c6e85
-	ret = SUCCESS;
3c6e85
 
3c6e85
 	sp = (srb_t *) CMD_SP(cmd);
3c6e85
 	if (!sp)
3c6e85
@@ -1360,7 +1357,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 		return SUCCESS;
3c6e85
 
3c6e85
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
3c6e85
-	if (!CMD_SP(cmd)) {
3c6e85
+	if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
3c6e85
 		/* there's a chance an interrupt could clear
3c6e85
 		   the ptr as part of done & free */
3c6e85
 		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3c6e85
@@ -1381,58 +1378,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 	    "Aborting from RISC nexus=%ld:%d:%u sp=%p cmd=%p handle=%x\n",
3c6e85
 	    vha->host_no, id, lun, sp, cmd, sp->handle);
3c6e85
 
3c6e85
-	/* Get a reference to the sp and drop the lock.*/
3c6e85
-
3c6e85
 	rval = ha->isp_ops->abort_command(sp);
3c6e85
-	if (rval) {
3c6e85
-		if (rval == QLA_FUNCTION_PARAMETER_ERROR)
3c6e85
-			ret = SUCCESS;
3c6e85
-		else
3c6e85
-			ret = FAILED;
3c6e85
-
3c6e85
-		ql_dbg(ql_dbg_taskm, vha, 0x8003,
3c6e85
-		    "Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
3c6e85
-	} else {
3c6e85
-		ql_dbg(ql_dbg_taskm, vha, 0x8004,
3c6e85
-		    "Abort command mbx success cmd=%p.\n", cmd);
3c6e85
-		wait = 1;
3c6e85
-	}
3c6e85
-
3c6e85
-	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
3c6e85
-	/*
3c6e85
-	 * Clear the slot in the oustanding_cmds array if we can't find the
3c6e85
-	 * command to reclaim the resources.
3c6e85
-	 */
3c6e85
-	if (rval == QLA_FUNCTION_PARAMETER_ERROR)
3c6e85
-		vha->req->outstanding_cmds[sp->handle] = NULL;
3c6e85
-
3c6e85
-	/*
3c6e85
-	 * sp->done will do ref_count--
3c6e85
-	 * sp_get() took an extra count above
3c6e85
-	 */
3c6e85
-	sp->done(sp, DID_RESET << 16);
3c6e85
+	ql_dbg(ql_dbg_taskm, vha, 0x8003,
3c6e85
+	       "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
3c6e85
 
3c6e85
-	/* Did the command return during mailbox execution? */
3c6e85
-	if (ret == FAILED && !CMD_SP(cmd))
3c6e85
+	switch (rval) {
3c6e85
+	case QLA_SUCCESS:
3c6e85
+		/*
3c6e85
+		 * The command has been aborted. That means that the firmware
3c6e85
+		 * won't report a completion.
3c6e85
+		 */
3c6e85
+		sp->done(sp, DID_ABORT << 16);
3c6e85
 		ret = SUCCESS;
3c6e85
-
3c6e85
-	if (!CMD_SP(cmd))
3c6e85
-		wait = 0;
3c6e85
-
3c6e85
-	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3c6e85
-
3c6e85
-	/* Wait for the command to be returned. */
3c6e85
-	if (wait) {
3c6e85
-		if (qla2x00_eh_wait_on_command(cmd) != QLA_SUCCESS) {
3c6e85
-			ql_log(ql_log_warn, vha, 0x8006,
3c6e85
-			    "Abort handler timed out cmd=%p.\n", cmd);
3c6e85
-			ret = FAILED;
3c6e85
-		}
3c6e85
+		break;
3c6e85
+	default:
3c6e85
+		/*
3c6e85
+		 * Either abort failed or abort and completion raced. Let
3c6e85
+		 * the SCSI core retry the abort in the former case.
3c6e85
+		 */
3c6e85
+		ret = FAILED;
3c6e85
+		break;
3c6e85
 	}
3c6e85
 
3c6e85
 	ql_log(ql_log_info, vha, 0x801c,
3c6e85
-	    "Abort command issued nexus=%ld:%d:%d --  %d %x.\n",
3c6e85
-	    vha->host_no, id, lun, wait, ret);
3c6e85
+	    "Abort command issued nexus=%ld:%d:%d -- %x.\n",
3c6e85
+	    vha->host_no, id, lun, ret);
3c6e85
 
3c6e85
 	return ret;
3c6e85
 }
3c6e85
@@ -1806,34 +1776,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
3c6e85
 	__releases(qp->qp_lock_ptr)
3c6e85
 	__acquires(qp->qp_lock_ptr)
3c6e85
 {
3c6e85
+	DECLARE_COMPLETION_ONSTACK(comp);
3c6e85
 	scsi_qla_host_t *vha = qp->vha;
3c6e85
 	struct qla_hw_data *ha = vha->hw;
3c6e85
+	int rval;
3c6e85
 
3c6e85
-	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) {
3c6e85
-		if (!sp_get(sp)) {
3c6e85
-			/* got sp */
3c6e85
-			spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
3c6e85
-			qla_nvme_abort(ha, sp, res);
3c6e85
-			spin_lock_irqsave(qp->qp_lock_ptr, *flags);
3c6e85
-		}
3c6e85
-	} else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
3c6e85
-		   !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
3c6e85
-		   !qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
3c6e85
-		/*
3c6e85
-		 * Don't abort commands in adapter during EEH recovery as it's
3c6e85
-		 * not accessible/responding.
3c6e85
-		 *
3c6e85
-		 * Get a reference to the sp and drop the lock. The reference
3c6e85
-		 * ensures this sp->done() call and not the call in
3c6e85
-		 * qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
3c6e85
-		 */
3c6e85
-		if (!sp_get(sp)) {
3c6e85
-			spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
3c6e85
-			qla2xxx_eh_abort(GET_CMD_SP(sp));
3c6e85
-			spin_lock_irqsave(qp->qp_lock_ptr, *flags);
3c6e85
+	if (sp_get(sp))
3c6e85
+		return;
3c6e85
+
3c6e85
+	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
3c6e85
+	    (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
3c6e85
+	     !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
3c6e85
+	     !qla2x00_isp_reg_stat(ha))) {
3c6e85
+		sp->comp = ∁
3c6e85
+		rval = ha->isp_ops->abort_command(sp);
3c6e85
+		spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
3c6e85
+
3c6e85
+		switch (rval) {
3c6e85
+		case QLA_SUCCESS:
3c6e85
+			sp->done(sp, res);
3c6e85
+			break;
3c6e85
+		case QLA_FUNCTION_PARAMETER_ERROR:
3c6e85
+			wait_for_completion(&comp);
3c6e85
+			break;
3c6e85
 		}
3c6e85
+
3c6e85
+		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
3c6e85
+		sp->comp = NULL;
3c6e85
 	}
3c6e85
-	sp->done(sp, res);
3c6e85
 }
3c6e85
 
3c6e85
 static void
3c6e85
-- 
3c6e85
2.13.6
3c6e85