Blame SOURCES/0147-scsi-scsi-qla2xxx-Fix-double-scsi_done-for-abort-pat.patch

3d7c23
From e3771a20877e59aa665131a10beb2618d917091a Mon Sep 17 00:00:00 2001
3d7c23
From: Himanshu Madhani <hmadhani@redhat.com>
3d7c23
Date: Thu, 21 Nov 2019 16:36:57 -0500
3d7c23
Subject: [PATCH 147/155] [scsi] scsi: qla2xxx: Fix double scsi_done for abort
3d7c23
 path
3d7c23
3d7c23
Message-id: <20191121163701.43688-23-hmadhani@redhat.com>
3d7c23
Patchwork-id: 287852
3d7c23
O-Subject: [RHLE 7.8 e-stor PATCH v3 22/26] scsi: qla2xxx: Fix double scsi_done for abort path
3d7c23
Bugzilla: 1731581
3d7c23
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
3d7c23
RH-Acked-by: Ewan Milne <emilne@redhat.com>
3d7c23
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
3d7c23
3d7c23
From: Quinn Tran <qutran@marvell.com>
3d7c23
3d7c23
Bugzilla 1731581
3d7c23
3d7c23
Current code assumes abort will remove the original command from the active
3d7c23
list where scsi_done will not be called. Instead, the eh_abort thread will
3d7c23
do the scsi_done. That is not the case.  Instead, we have a double
3d7c23
scsi_done calls triggering use after free.
3d7c23
3d7c23
Abort will tell FW to release the command from FW possesion. The original
3d7c23
command will return to ULP with error in its normal fashion via scsi_done.
3d7c23
eh_abort path would wait for the original command completion before
3d7c23
returning.  eh_abort path will not perform the scsi_done call.
3d7c23
3d7c23
Fixes: 219d27d7147e0 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands")
3d7c23
Cc: stable@vger.kernel.org # 5.2
3d7c23
Link: https://lore.kernel.org/r/20191105150657.8092-6-hmadhani@marvell.com
3d7c23
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
3d7c23
Signed-off-by: Quinn Tran <qutran@marvell.com>
3d7c23
Signed-off-by: Arun Easi <aeasi@marvell.com>
3d7c23
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
3d7c23
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
3d7c23
(cherry picked from commit f45bca8c5052e8c59bab64ee90c44441678b9a52)
3d7c23
Signed-off-by: Himanshu Madhani <hmadhani@redhat.com>
3d7c23
Signed-off-by: Jan Stancek <jstancek@redhat.com>
3d7c23
---
3d7c23
 drivers/scsi/qla2xxx/qla_def.h  |   5 +-
3d7c23
 drivers/scsi/qla2xxx/qla_isr.c  |   5 ++
3d7c23
 drivers/scsi/qla2xxx/qla_nvme.c |   4 +-
3d7c23
 drivers/scsi/qla2xxx/qla_os.c   | 117 +++++++++++++++++++++-------------------
3d7c23
 4 files changed, 72 insertions(+), 59 deletions(-)
3d7c23
3d7c23
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
3d7c23
index 24a4a2bdf6a7..f0eeda17e1d9 100644
3d7c23
--- a/drivers/scsi/qla2xxx/qla_def.h
3d7c23
+++ b/drivers/scsi/qla2xxx/qla_def.h
3d7c23
@@ -540,13 +540,16 @@ typedef struct srb {
3d7c23
 	 */
3d7c23
 	uint8_t cmd_type;
3d7c23
 	uint8_t pad[3];
3d7c23
-	atomic_t ref_count;
3d7c23
 	struct kref cmd_kref;	/* need to migrate ref_count over to this */
3d7c23
 	void *priv;
3d7c23
 	wait_queue_head_t nvme_ls_waitq;
3d7c23
 	struct fc_port *fcport;
3d7c23
 	struct scsi_qla_host *vha;
3d7c23
 	unsigned int start_timer:1;
3d7c23
+	unsigned int abort:1;
3d7c23
+	unsigned int aborted:1;
3d7c23
+	unsigned int completed:1;
3d7c23
+
3d7c23
 	uint32_t handle;
3d7c23
 	uint16_t flags;
3d7c23
 	uint16_t type;
3d7c23
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
3d7c23
index 873fcffb5dd7..1cc0d58a4ae8 100644
3d7c23
--- a/drivers/scsi/qla2xxx/qla_isr.c
3d7c23
+++ b/drivers/scsi/qla2xxx/qla_isr.c
3d7c23
@@ -2469,6 +2469,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
3d7c23
 		return;
3d7c23
 	}
3d7c23
 
3d7c23
+	if (sp->abort)
3d7c23
+		sp->aborted = 1;
3d7c23
+	else
3d7c23
+		sp->completed = 1;
3d7c23
+
3d7c23
 	if (sp->cmd_type != TYPE_SRB) {
3d7c23
 		req->outstanding_cmds[handle] = NULL;
3d7c23
 		ql_dbg(ql_dbg_io, vha, 0x3015,
3d7c23
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
3d7c23
index ad406511f2ec..62d7c7032b27 100644
3d7c23
--- a/drivers/scsi/qla2xxx/qla_nvme.c
3d7c23
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
3d7c23
@@ -227,8 +227,8 @@ static void qla_nvme_abort_work(struct work_struct *work)
3d7c23
 
3d7c23
 	if (ha->flags.host_shutting_down) {
3d7c23
 		ql_log(ql_log_info, sp->fcport->vha, 0xffff,
3d7c23
-		    "%s Calling done on sp: %p, type: 0x%x, sp->ref_count: 0x%x\n",
3d7c23
-		    __func__, sp, sp->type, atomic_read(&sp->ref_count));
3d7c23
+		    "%s Calling done on sp: %p, type: 0x%x\n",
3d7c23
+		    __func__, sp, sp->type);
3d7c23
 		sp->done(sp, 0);
3d7c23
 		goto out;
3d7c23
 	}
3d7c23
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
3d7c23
index f44d1617cbcb..b6160b2a5405 100644
3d7c23
--- a/drivers/scsi/qla2xxx/qla_os.c
3d7c23
+++ b/drivers/scsi/qla2xxx/qla_os.c
3d7c23
@@ -715,11 +715,6 @@ qla2x00_sp_compl(void *ptr, int res)
3d7c23
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3d7c23
 	struct completion *comp = sp->comp;
3d7c23
 
3d7c23
-	if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
3d7c23
-		return;
3d7c23
-
3d7c23
-	atomic_dec(&sp->ref_count);
3d7c23
-
3d7c23
 	sp->free(sp);
3d7c23
 	cmd->result = res;
3d7c23
 	cmd->scsi_done(cmd);
3d7c23
@@ -818,11 +813,6 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
3d7c23
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3d7c23
 	struct completion *comp = sp->comp;
3d7c23
 
3d7c23
-	if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
3d7c23
-		return;
3d7c23
-
3d7c23
-	atomic_dec(&sp->ref_count);
3d7c23
-
3d7c23
 	sp->free(sp);
3d7c23
 	cmd->result = res;
3d7c23
 	CMD_SP(cmd) = NULL;
3d7c23
@@ -930,7 +920,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
3d7c23
 
3d7c23
 	sp->u.scmd.cmd = cmd;
3d7c23
 	sp->type = SRB_SCSI_CMD;
3d7c23
-	atomic_set(&sp->ref_count, 1);
3d7c23
+
3d7c23
 	CMD_SP(cmd) = (void *)sp;
3d7c23
 	sp->free = qla2x00_sp_free_dma;
3d7c23
 	sp->done = qla2x00_sp_compl;
3d7c23
@@ -1012,11 +1002,9 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
3d7c23
 
3d7c23
 	sp->u.scmd.cmd = cmd;
3d7c23
 	sp->type = SRB_SCSI_CMD;
3d7c23
-	atomic_set(&sp->ref_count, 1);
3d7c23
 	CMD_SP(cmd) = (void *)sp;
3d7c23
 	sp->free = qla2xxx_qpair_sp_free_dma;
3d7c23
 	sp->done = qla2xxx_qpair_sp_compl;
3d7c23
-	sp->qpair = qpair;
3d7c23
 
3d7c23
 	rval = ha->isp_ops->start_scsi_mq(sp);
3d7c23
 	if (rval != QLA_SUCCESS) {
3d7c23
@@ -1208,16 +1196,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha)
3d7c23
 	return return_status;
3d7c23
 }
3d7c23
 
3d7c23
-static int
3d7c23
-sp_get(struct srb *sp)
3d7c23
-{
3d7c23
-	if (!refcount_inc_not_zero((refcount_t *)&sp->ref_count))
3d7c23
-		/* kref get fail */
3d7c23
-		return ENXIO;
3d7c23
-	else
3d7c23
-		return 0;
3d7c23
-}
3d7c23
-
3d7c23
 #define ISP_REG_DISCONNECT 0xffffffffU
3d7c23
 /**************************************************************************
3d7c23
 * qla2x00_isp_reg_stat
3d7c23
@@ -1272,6 +1250,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3d7c23
 	unsigned int id, lun;
3d7c23
 	int rval;
3d7c23
 	struct qla_hw_data *ha = vha->hw;
3d7c23
+	uint32_t ratov_j;
3d7c23
+	struct qla_qpair *qpair;
3d7c23
+	unsigned long flags;
3d7c23
 
3d7c23
 	if (qla2x00_isp_reg_stat(ha)) {
3d7c23
 		ql_log(ql_log_info, vha, 0x8042,
3d7c23
@@ -1284,13 +1265,26 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3d7c23
 		return ret;
3d7c23
 
3d7c23
 	sp = scsi_cmd_priv(cmd);
3d7c23
+	qpair = sp->qpair;
3d7c23
 
3d7c23
-	if (sp->fcport && sp->fcport->deleted)
3d7c23
+	if ((sp->fcport && sp->fcport->deleted) || !qpair)
3d7c23
 		return SUCCESS;
3d7c23
 
3d7c23
-	/* Return if the command has already finished. */
3d7c23
-	if (sp_get(sp))
3d7c23
+	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
3d7c23
+	if (sp->completed) {
3d7c23
+		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3d7c23
 		return SUCCESS;
3d7c23
+	}
3d7c23
+
3d7c23
+	if (sp->abort || sp->aborted) {
3d7c23
+		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3d7c23
+		return FAILED;
3d7c23
+	}
3d7c23
+
3d7c23
+	sp->abort = 1;
3d7c23
+	sp->comp = ∁
3d7c23
+	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3d7c23
+
3d7c23
 
3d7c23
 	id = cmd->device->id;
3d7c23
 	lun = cmd->device->lun;
3d7c23
@@ -1299,47 +1293,37 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3d7c23
 	    "Aborting from RISC nexus=%ld:%d:%u sp=%p cmd=%p handle=%x\n",
3d7c23
 	    vha->host_no, id, lun, sp, cmd, sp->handle);
3d7c23
 
3d7c23
+	/*
3d7c23
+	 * Abort will release the original Command/sp from FW. Let the
3d7c23
+	 * original command call scsi_done. In return, he will wakeup
3d7c23
+	 * this sleeping thread.
3d7c23
+	 */
3d7c23
 	rval = ha->isp_ops->abort_command(sp);
3d7c23
+
3d7c23
 	ql_dbg(ql_dbg_taskm, vha, 0x8003,
3d7c23
 	       "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
3d7c23
 
3d7c23
+	/* Wait for the command completion. */
3d7c23
+	ratov_j = ha->r_a_tov/10 * 4 * 1000;
3d7c23
+	ratov_j = msecs_to_jiffies(ratov_j);
3d7c23
 	switch (rval) {
3d7c23
 	case QLA_SUCCESS:
3d7c23
-		/*
3d7c23
-		 * The command has been aborted. That means that the firmware
3d7c23
-		 * won't report a completion.
3d7c23
-		 */
3d7c23
-		sp->done(sp, DID_ABORT << 16);
3d7c23
-		ret = SUCCESS;
3d7c23
-		break;
3d7c23
-	case QLA_FUNCTION_PARAMETER_ERROR: {
3d7c23
-		/* Wait for the command completion. */
3d7c23
-		uint32_t ratov = ha->r_a_tov/10;
3d7c23
-		uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000);
3d7c23
-
3d7c23
-		WARN_ON_ONCE(sp->comp);
3d7c23
-		sp->comp = ∁
3d7c23
 		if (!wait_for_completion_timeout(&comp, ratov_j)) {
3d7c23
 			ql_dbg(ql_dbg_taskm, vha, 0xffff,
3d7c23
 			    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
3d7c23
-			    __func__, ha->r_a_tov);
3d7c23
+			    __func__, ha->r_a_tov/10);
3d7c23
 			ret = FAILED;
3d7c23
 		} else {
3d7c23
 			ret = SUCCESS;
3d7c23
 		}
3d7c23
 		break;
3d7c23
-	}
3d7c23
 	default:
3d7c23
-		/*
3d7c23
-		 * Either abort failed or abort and completion raced. Let
3d7c23
-		 * the SCSI core retry the abort in the former case.
3d7c23
-		 */
3d7c23
 		ret = FAILED;
3d7c23
 		break;
3d7c23
 	}
3d7c23
 
3d7c23
 	sp->comp = NULL;
3d7c23
-	atomic_dec(&sp->ref_count);
3d7c23
+
3d7c23
 	ql_log(ql_log_info, vha, 0x801c,
3d7c23
 	    "Abort command issued nexus=%ld:%d:%d -- %x.\n",
3d7c23
 	    vha->host_no, id, lun, ret);
3d7c23
@@ -1726,32 +1710,53 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
3d7c23
 	scsi_qla_host_t *vha = qp->vha;
3d7c23
 	struct qla_hw_data *ha = vha->hw;
3d7c23
 	int rval;
3d7c23
+	bool ret_cmd;
3d7c23
+	uint32_t ratov_j;
3d7c23
 
3d7c23
-	if (sp_get(sp))
3d7c23
+	if (qla2x00_chip_is_down(vha)) {
3d7c23
+		sp->done(sp, res);
3d7c23
 		return;
3d7c23
+	}
3d7c23
 
3d7c23
 	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
3d7c23
 	    (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
3d7c23
 	     !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
3d7c23
 	     !qla2x00_isp_reg_stat(ha))) {
3d7c23
+		if (sp->comp) {
3d7c23
+			sp->done(sp, res);
3d7c23
+			return;
3d7c23
+		}
3d7c23
+
3d7c23
 		sp->comp = ∁
3d7c23
+		sp->abort =  1;
3d7c23
 		spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
3d7c23
-		rval = ha->isp_ops->abort_command(sp);
3d7c23
 
3d7c23
+		rval = ha->isp_ops->abort_command(sp);
3d7c23
+		/* Wait for command completion. */
3d7c23
+		ret_cmd = false;
3d7c23
+		ratov_j = ha->r_a_tov/10 * 4 * 1000;
3d7c23
+		ratov_j = msecs_to_jiffies(ratov_j);
3d7c23
 		switch (rval) {
3d7c23
 		case QLA_SUCCESS:
3d7c23
-			sp->done(sp, res);
3d7c23
+			if (wait_for_completion_timeout(&comp, ratov_j)) {
3d7c23
+				ql_dbg(ql_dbg_taskm, vha, 0xffff,
3d7c23
+				    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
3d7c23
+				    __func__, ha->r_a_tov/10);
3d7c23
+				ret_cmd = true;
3d7c23
+			}
3d7c23
+			/* else FW return SP to driver */
3d7c23
 			break;
3d7c23
-		case QLA_FUNCTION_PARAMETER_ERROR:
3d7c23
-			wait_for_completion(&comp);
3d7c23
+		default:
3d7c23
+			ret_cmd = true;
3d7c23
 			break;
3d7c23
 		}
3d7c23
 
3d7c23
 		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
3d7c23
-		sp->comp = NULL;
3d7c23
+		if (ret_cmd && (!sp->completed || !sp->aborted))
3d7c23
+			sp->done(sp, res);
3d7c23
+	} else {
3d7c23
+		sp->done(sp, res);
3d7c23
 	}
3d7c23
-
3d7c23
-	atomic_dec(&sp->ref_count);
3d7c23
 }
3d7c23
 
3d7c23
 static void
3d7c23
@@ -1773,7 +1778,6 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
3d7c23
 	for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
3d7c23
 		sp = req->outstanding_cmds[cnt];
3d7c23
 		if (sp) {
3d7c23
-			req->outstanding_cmds[cnt] = NULL;
3d7c23
 			switch (sp->cmd_type) {
3d7c23
 			case TYPE_SRB:
3d7c23
 				qla2x00_abort_srb(qp, sp, res, &flags);
3d7c23
@@ -1800,6 +1804,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
3d7c23
 			default:
3d7c23
 				break;
3d7c23
 			}
3d7c23
+			req->outstanding_cmds[cnt] = NULL;
3d7c23
 		}
3d7c23
 	}
3d7c23
 	spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
3d7c23
-- 
3d7c23
2.13.6
3d7c23