Blame SOURCES/0147-scsi-scsi-qla2xxx-Fix-double-scsi_done-for-abort-pat.patch

3c6e85
From e3771a20877e59aa665131a10beb2618d917091a Mon Sep 17 00:00:00 2001
3c6e85
From: Himanshu Madhani <hmadhani@redhat.com>
3c6e85
Date: Thu, 21 Nov 2019 16:36:57 -0500
3c6e85
Subject: [PATCH 147/155] [scsi] scsi: qla2xxx: Fix double scsi_done for abort
3c6e85
 path
3c6e85
3c6e85
Message-id: <20191121163701.43688-23-hmadhani@redhat.com>
3c6e85
Patchwork-id: 287852
3c6e85
O-Subject: [RHLE 7.8 e-stor PATCH v3 22/26] scsi: qla2xxx: Fix double scsi_done for abort path
3c6e85
Bugzilla: 1731581
3c6e85
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
3c6e85
RH-Acked-by: Ewan Milne <emilne@redhat.com>
3c6e85
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
3c6e85
3c6e85
From: Quinn Tran <qutran@marvell.com>
3c6e85
3c6e85
Bugzilla 1731581
3c6e85
3c6e85
Current code assumes abort will remove the original command from the active
3c6e85
list where scsi_done will not be called. Instead, the eh_abort thread will
3c6e85
do the scsi_done. That is not the case.  Instead, we have a double
3c6e85
scsi_done calls triggering use after free.
3c6e85
3c6e85
Abort will tell FW to release the command from FW possesion. The original
3c6e85
command will return to ULP with error in its normal fashion via scsi_done.
3c6e85
eh_abort path would wait for the original command completion before
3c6e85
returning.  eh_abort path will not perform the scsi_done call.
3c6e85
3c6e85
Fixes: 219d27d7147e0 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands")
3c6e85
Cc: stable@vger.kernel.org # 5.2
3c6e85
Link: https://lore.kernel.org/r/20191105150657.8092-6-hmadhani@marvell.com
3c6e85
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
3c6e85
Signed-off-by: Quinn Tran <qutran@marvell.com>
3c6e85
Signed-off-by: Arun Easi <aeasi@marvell.com>
3c6e85
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
3c6e85
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
3c6e85
(cherry picked from commit f45bca8c5052e8c59bab64ee90c44441678b9a52)
3c6e85
Signed-off-by: Himanshu Madhani <hmadhani@redhat.com>
3c6e85
Signed-off-by: Jan Stancek <jstancek@redhat.com>
3c6e85
---
3c6e85
 drivers/scsi/qla2xxx/qla_def.h  |   5 +-
3c6e85
 drivers/scsi/qla2xxx/qla_isr.c  |   5 ++
3c6e85
 drivers/scsi/qla2xxx/qla_nvme.c |   4 +-
3c6e85
 drivers/scsi/qla2xxx/qla_os.c   | 117 +++++++++++++++++++++-------------------
3c6e85
 4 files changed, 72 insertions(+), 59 deletions(-)
3c6e85
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
3c6e85
index 24a4a2bdf6a7..f0eeda17e1d9 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_def.h
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_def.h
3c6e85
@@ -540,13 +540,16 @@ typedef struct srb {
3c6e85
 	 */
3c6e85
 	uint8_t cmd_type;
3c6e85
 	uint8_t pad[3];
3c6e85
-	atomic_t ref_count;
3c6e85
 	struct kref cmd_kref;	/* need to migrate ref_count over to this */
3c6e85
 	void *priv;
3c6e85
 	wait_queue_head_t nvme_ls_waitq;
3c6e85
 	struct fc_port *fcport;
3c6e85
 	struct scsi_qla_host *vha;
3c6e85
 	unsigned int start_timer:1;
3c6e85
+	unsigned int abort:1;
3c6e85
+	unsigned int aborted:1;
3c6e85
+	unsigned int completed:1;
3c6e85
+
3c6e85
 	uint32_t handle;
3c6e85
 	uint16_t flags;
3c6e85
 	uint16_t type;
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
3c6e85
index 873fcffb5dd7..1cc0d58a4ae8 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_isr.c
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_isr.c
3c6e85
@@ -2469,6 +2469,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
3c6e85
 		return;
3c6e85
 	}
3c6e85
 
3c6e85
+	if (sp->abort)
3c6e85
+		sp->aborted = 1;
3c6e85
+	else
3c6e85
+		sp->completed = 1;
3c6e85
+
3c6e85
 	if (sp->cmd_type != TYPE_SRB) {
3c6e85
 		req->outstanding_cmds[handle] = NULL;
3c6e85
 		ql_dbg(ql_dbg_io, vha, 0x3015,
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
index ad406511f2ec..62d7c7032b27 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
3c6e85
@@ -227,8 +227,8 @@ static void qla_nvme_abort_work(struct work_struct *work)
3c6e85
 
3c6e85
 	if (ha->flags.host_shutting_down) {
3c6e85
 		ql_log(ql_log_info, sp->fcport->vha, 0xffff,
3c6e85
-		    "%s Calling done on sp: %p, type: 0x%x, sp->ref_count: 0x%x\n",
3c6e85
-		    __func__, sp, sp->type, atomic_read(&sp->ref_count));
3c6e85
+		    "%s Calling done on sp: %p, type: 0x%x\n",
3c6e85
+		    __func__, sp, sp->type);
3c6e85
 		sp->done(sp, 0);
3c6e85
 		goto out;
3c6e85
 	}
3c6e85
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
3c6e85
index f44d1617cbcb..b6160b2a5405 100644
3c6e85
--- a/drivers/scsi/qla2xxx/qla_os.c
3c6e85
+++ b/drivers/scsi/qla2xxx/qla_os.c
3c6e85
@@ -715,11 +715,6 @@ qla2x00_sp_compl(void *ptr, int res)
3c6e85
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3c6e85
 	struct completion *comp = sp->comp;
3c6e85
 
3c6e85
-	if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
3c6e85
-		return;
3c6e85
-
3c6e85
-	atomic_dec(&sp->ref_count);
3c6e85
-
3c6e85
 	sp->free(sp);
3c6e85
 	cmd->result = res;
3c6e85
 	cmd->scsi_done(cmd);
3c6e85
@@ -818,11 +813,6 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
3c6e85
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
3c6e85
 	struct completion *comp = sp->comp;
3c6e85
 
3c6e85
-	if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
3c6e85
-		return;
3c6e85
-
3c6e85
-	atomic_dec(&sp->ref_count);
3c6e85
-
3c6e85
 	sp->free(sp);
3c6e85
 	cmd->result = res;
3c6e85
 	CMD_SP(cmd) = NULL;
3c6e85
@@ -930,7 +920,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
3c6e85
 
3c6e85
 	sp->u.scmd.cmd = cmd;
3c6e85
 	sp->type = SRB_SCSI_CMD;
3c6e85
-	atomic_set(&sp->ref_count, 1);
3c6e85
+
3c6e85
 	CMD_SP(cmd) = (void *)sp;
3c6e85
 	sp->free = qla2x00_sp_free_dma;
3c6e85
 	sp->done = qla2x00_sp_compl;
3c6e85
@@ -1012,11 +1002,9 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
3c6e85
 
3c6e85
 	sp->u.scmd.cmd = cmd;
3c6e85
 	sp->type = SRB_SCSI_CMD;
3c6e85
-	atomic_set(&sp->ref_count, 1);
3c6e85
 	CMD_SP(cmd) = (void *)sp;
3c6e85
 	sp->free = qla2xxx_qpair_sp_free_dma;
3c6e85
 	sp->done = qla2xxx_qpair_sp_compl;
3c6e85
-	sp->qpair = qpair;
3c6e85
 
3c6e85
 	rval = ha->isp_ops->start_scsi_mq(sp);
3c6e85
 	if (rval != QLA_SUCCESS) {
3c6e85
@@ -1208,16 +1196,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha)
3c6e85
 	return return_status;
3c6e85
 }
3c6e85
 
3c6e85
-static int
3c6e85
-sp_get(struct srb *sp)
3c6e85
-{
3c6e85
-	if (!refcount_inc_not_zero((refcount_t *)&sp->ref_count))
3c6e85
-		/* kref get fail */
3c6e85
-		return ENXIO;
3c6e85
-	else
3c6e85
-		return 0;
3c6e85
-}
3c6e85
-
3c6e85
 #define ISP_REG_DISCONNECT 0xffffffffU
3c6e85
 /**************************************************************************
3c6e85
 * qla2x00_isp_reg_stat
3c6e85
@@ -1272,6 +1250,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 	unsigned int id, lun;
3c6e85
 	int rval;
3c6e85
 	struct qla_hw_data *ha = vha->hw;
3c6e85
+	uint32_t ratov_j;
3c6e85
+	struct qla_qpair *qpair;
3c6e85
+	unsigned long flags;
3c6e85
 
3c6e85
 	if (qla2x00_isp_reg_stat(ha)) {
3c6e85
 		ql_log(ql_log_info, vha, 0x8042,
3c6e85
@@ -1284,13 +1265,26 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 		return ret;
3c6e85
 
3c6e85
 	sp = scsi_cmd_priv(cmd);
3c6e85
+	qpair = sp->qpair;
3c6e85
 
3c6e85
-	if (sp->fcport && sp->fcport->deleted)
3c6e85
+	if ((sp->fcport && sp->fcport->deleted) || !qpair)
3c6e85
 		return SUCCESS;
3c6e85
 
3c6e85
-	/* Return if the command has already finished. */
3c6e85
-	if (sp_get(sp))
3c6e85
+	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
3c6e85
+	if (sp->completed) {
3c6e85
+		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3c6e85
 		return SUCCESS;
3c6e85
+	}
3c6e85
+
3c6e85
+	if (sp->abort || sp->aborted) {
3c6e85
+		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3c6e85
+		return FAILED;
3c6e85
+	}
3c6e85
+
3c6e85
+	sp->abort = 1;
3c6e85
+	sp->comp = ∁
3c6e85
+	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3c6e85
+
3c6e85
 
3c6e85
 	id = cmd->device->id;
3c6e85
 	lun = cmd->device->lun;
3c6e85
@@ -1299,47 +1293,37 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
3c6e85
 	    "Aborting from RISC nexus=%ld:%d:%u sp=%p cmd=%p handle=%x\n",
3c6e85
 	    vha->host_no, id, lun, sp, cmd, sp->handle);
3c6e85
 
3c6e85
+	/*
3c6e85
+	 * Abort will release the original Command/sp from FW. Let the
3c6e85
+	 * original command call scsi_done. In return, he will wakeup
3c6e85
+	 * this sleeping thread.
3c6e85
+	 */
3c6e85
 	rval = ha->isp_ops->abort_command(sp);
3c6e85
+
3c6e85
 	ql_dbg(ql_dbg_taskm, vha, 0x8003,
3c6e85
 	       "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
3c6e85
 
3c6e85
+	/* Wait for the command completion. */
3c6e85
+	ratov_j = ha->r_a_tov/10 * 4 * 1000;
3c6e85
+	ratov_j = msecs_to_jiffies(ratov_j);
3c6e85
 	switch (rval) {
3c6e85
 	case QLA_SUCCESS:
3c6e85
-		/*
3c6e85
-		 * The command has been aborted. That means that the firmware
3c6e85
-		 * won't report a completion.
3c6e85
-		 */
3c6e85
-		sp->done(sp, DID_ABORT << 16);
3c6e85
-		ret = SUCCESS;
3c6e85
-		break;
3c6e85
-	case QLA_FUNCTION_PARAMETER_ERROR: {
3c6e85
-		/* Wait for the command completion. */
3c6e85
-		uint32_t ratov = ha->r_a_tov/10;
3c6e85
-		uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000);
3c6e85
-
3c6e85
-		WARN_ON_ONCE(sp->comp);
3c6e85
-		sp->comp = ∁
3c6e85
 		if (!wait_for_completion_timeout(&comp, ratov_j)) {
3c6e85
 			ql_dbg(ql_dbg_taskm, vha, 0xffff,
3c6e85
 			    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
3c6e85
-			    __func__, ha->r_a_tov);
3c6e85
+			    __func__, ha->r_a_tov/10);
3c6e85
 			ret = FAILED;
3c6e85
 		} else {
3c6e85
 			ret = SUCCESS;
3c6e85
 		}
3c6e85
 		break;
3c6e85
-	}
3c6e85
 	default:
3c6e85
-		/*
3c6e85
-		 * Either abort failed or abort and completion raced. Let
3c6e85
-		 * the SCSI core retry the abort in the former case.
3c6e85
-		 */
3c6e85
 		ret = FAILED;
3c6e85
 		break;
3c6e85
 	}
3c6e85
 
3c6e85
 	sp->comp = NULL;
3c6e85
-	atomic_dec(&sp->ref_count);
3c6e85
+
3c6e85
 	ql_log(ql_log_info, vha, 0x801c,
3c6e85
 	    "Abort command issued nexus=%ld:%d:%d -- %x.\n",
3c6e85
 	    vha->host_no, id, lun, ret);
3c6e85
@@ -1726,32 +1710,53 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
3c6e85
 	scsi_qla_host_t *vha = qp->vha;
3c6e85
 	struct qla_hw_data *ha = vha->hw;
3c6e85
 	int rval;
3c6e85
+	bool ret_cmd;
3c6e85
+	uint32_t ratov_j;
3c6e85
 
3c6e85
-	if (sp_get(sp))
3c6e85
+	if (qla2x00_chip_is_down(vha)) {
3c6e85
+		sp->done(sp, res);
3c6e85
 		return;
3c6e85
+	}
3c6e85
 
3c6e85
 	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
3c6e85
 	    (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
3c6e85
 	     !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
3c6e85
 	     !qla2x00_isp_reg_stat(ha))) {
3c6e85
+		if (sp->comp) {
3c6e85
+			sp->done(sp, res);
3c6e85
+			return;
3c6e85
+		}
3c6e85
+
3c6e85
 		sp->comp = ∁
3c6e85
+		sp->abort =  1;
3c6e85
 		spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
3c6e85
-		rval = ha->isp_ops->abort_command(sp);
3c6e85
 
3c6e85
+		rval = ha->isp_ops->abort_command(sp);
3c6e85
+		/* Wait for command completion. */
3c6e85
+		ret_cmd = false;
3c6e85
+		ratov_j = ha->r_a_tov/10 * 4 * 1000;
3c6e85
+		ratov_j = msecs_to_jiffies(ratov_j);
3c6e85
 		switch (rval) {
3c6e85
 		case QLA_SUCCESS:
3c6e85
-			sp->done(sp, res);
3c6e85
+			if (wait_for_completion_timeout(&comp, ratov_j)) {
3c6e85
+				ql_dbg(ql_dbg_taskm, vha, 0xffff,
3c6e85
+				    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
3c6e85
+				    __func__, ha->r_a_tov/10);
3c6e85
+				ret_cmd = true;
3c6e85
+			}
3c6e85
+			/* else FW return SP to driver */
3c6e85
 			break;
3c6e85
-		case QLA_FUNCTION_PARAMETER_ERROR:
3c6e85
-			wait_for_completion(&comp);
3c6e85
+		default:
3c6e85
+			ret_cmd = true;
3c6e85
 			break;
3c6e85
 		}
3c6e85
 
3c6e85
 		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
3c6e85
-		sp->comp = NULL;
3c6e85
+		if (ret_cmd && (!sp->completed || !sp->aborted))
3c6e85
+			sp->done(sp, res);
3c6e85
+	} else {
3c6e85
+		sp->done(sp, res);
3c6e85
 	}
3c6e85
-
3c6e85
-	atomic_dec(&sp->ref_count);
3c6e85
 }
3c6e85
 
3c6e85
 static void
3c6e85
@@ -1773,7 +1778,6 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
3c6e85
 	for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
3c6e85
 		sp = req->outstanding_cmds[cnt];
3c6e85
 		if (sp) {
3c6e85
-			req->outstanding_cmds[cnt] = NULL;
3c6e85
 			switch (sp->cmd_type) {
3c6e85
 			case TYPE_SRB:
3c6e85
 				qla2x00_abort_srb(qp, sp, res, &flags);
3c6e85
@@ -1800,6 +1804,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
3c6e85
 			default:
3c6e85
 				break;
3c6e85
 			}
3c6e85
+			req->outstanding_cmds[cnt] = NULL;
3c6e85
 		}
3c6e85
 	}
3c6e85
 	spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
3c6e85
-- 
3c6e85
2.13.6
3c6e85