Blame SOURCES/0073-netdrv-bnxt_en-Handle-firmware-reset.patch

f95c89
From e0a7aa5afea68660d3bee9a46dcc04f776da7f1f Mon Sep 17 00:00:00 2001
f95c89
From: Jonathan Toppins <jtoppins@redhat.com>
f95c89
Date: Wed, 2 Oct 2019 18:23:28 -0400
f95c89
Subject: [PATCH 73/96] [netdrv] bnxt_en: Handle firmware reset
f95c89
f95c89
Message-id: <a41729f956772546658a415dc95fed2ba2df6e71.1570027456.git.jtoppins@redhat.com>
f95c89
Patchwork-id: 276504
f95c89
O-Subject: [RHEL-8.2 PATCH 66/78] bnxt_en: Handle firmware reset.
f95c89
Bugzilla: 1724766
f95c89
RH-Acked-by: John Linville <linville@redhat.com>
f95c89
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
f95c89
f95c89
Add the bnxt_fw_reset() main function to handle firmware reset.  This
f95c89
is triggered by firmware to initiate an orderly reset, for example
f95c89
when a non-fatal exception condition has been detected.  bnxt_fw_reset()
f95c89
will first wait for all VFs to shutdown and then start the
f95c89
bnxt_fw_reset_task() work queue to go through the sequence of reset,
f95c89
re-probe, and re-initialization.
f95c89
f95c89
The next patch will add the devlink reporter to start the sequence and
f95c89
call bnxt_fw_reset().
f95c89
f95c89
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
f95c89
Signed-off-by: David S. Miller <davem@davemloft.net>
f95c89
(cherry picked from commit 230d1f0de754b483ec6eefc1ca5aaeff2b6b9a4c)
f95c89
Bugzilla: 1724766
f95c89
Build Info: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23809532
f95c89
Tested: build, boot, basic ping
f95c89
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
f95c89
Signed-off-by: Bruno Meneguele <bmeneg@redhat.com>
f95c89
---
f95c89
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 150 ++++++++++++++++++++++++++
f95c89
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  11 ++
f95c89
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c |   3 +
f95c89
 3 files changed, 164 insertions(+)
f95c89
f95c89
Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt.c
f95c89
===================================================================
f95c89
--- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt.c	2020-02-06 16:23:20.029473507 +0100
f95c89
+++ src/drivers/net/ethernet/broadcom/bnxt/bnxt.c	2020-02-06 16:23:20.162472286 +0100
f95c89
@@ -1142,6 +1142,14 @@
f95c89
 	return 0;
f95c89
 }
f95c89
 
f95c89
+static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
f95c89
+{
f95c89
+	if (BNXT_PF(bp))
f95c89
+		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
f95c89
+	else
f95c89
+		schedule_delayed_work(&bp->fw_reset_task, delay);
f95c89
+}
f95c89
+
f95c89
 static void bnxt_queue_sp_work(struct bnxt *bp)
f95c89
 {
f95c89
 	if (BNXT_PF(bp))
f95c89
@@ -6360,6 +6368,8 @@
f95c89
 		struct bnxt_vf_info *vf = &bp->vf;
f95c89
 
f95c89
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
f95c89
+	} else {
f95c89
+		bp->pf.registered_vfs = le16_to_cpu(resp->registered_vfs);
f95c89
 	}
f95c89
 #endif
f95c89
 	flags = le16_to_cpu(resp->flags);
f95c89
@@ -9985,6 +9995,53 @@
f95c89
 	bnxt_rtnl_unlock_sp(bp);
f95c89
 }
f95c89
 
f95c89
+static void bnxt_fw_reset_close(struct bnxt *bp)
f95c89
+{
f95c89
+	__bnxt_close_nic(bp, true, false);
f95c89
+	bnxt_ulp_irq_stop(bp);
f95c89
+	bnxt_clear_int_mode(bp);
f95c89
+	bnxt_hwrm_func_drv_unrgtr(bp);
f95c89
+	bnxt_free_ctx_mem(bp);
f95c89
+	kfree(bp->ctx);
f95c89
+	bp->ctx = NULL;
f95c89
+}
f95c89
+
f95c89
+void bnxt_fw_reset(struct bnxt *bp)
f95c89
+{
f95c89
+	int rc;
f95c89
+
f95c89
+	bnxt_rtnl_lock_sp(bp);
f95c89
+	if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
f95c89
+	    !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
f95c89
+		set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+		if (BNXT_PF(bp) && bp->pf.active_vfs) {
f95c89
+			rc = bnxt_hwrm_func_qcfg(bp);
f95c89
+			if (rc) {
f95c89
+				netdev_err(bp->dev, "Firmware reset aborted, first func_qcfg cmd failed, rc = %d\n",
f95c89
+					   rc);
f95c89
+				clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+				dev_close(bp->dev);
f95c89
+				goto fw_reset_exit;
f95c89
+			}
f95c89
+			if (bp->pf.registered_vfs || bp->sriov_cfg) {
f95c89
+				u16 vf_tmo_dsecs = bp->pf.registered_vfs * 10;
f95c89
+
f95c89
+				if (bp->fw_reset_max_dsecs < vf_tmo_dsecs)
f95c89
+					bp->fw_reset_max_dsecs = vf_tmo_dsecs;
f95c89
+				bp->fw_reset_state =
f95c89
+					BNXT_FW_RESET_STATE_POLL_VF;
f95c89
+				bnxt_queue_fw_reset_work(bp, HZ / 10);
f95c89
+				goto fw_reset_exit;
f95c89
+			}
f95c89
+		}
f95c89
+		bnxt_fw_reset_close(bp);
f95c89
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
f95c89
+		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
f95c89
+	}
f95c89
+fw_reset_exit:
f95c89
+	bnxt_rtnl_unlock_sp(bp);
f95c89
+}
f95c89
+
f95c89
 static void bnxt_chk_missed_irq(struct bnxt *bp)
f95c89
 {
f95c89
 	int i;
f95c89
@@ -10344,6 +10401,98 @@
f95c89
 	return 0;
f95c89
 }
f95c89
 
f95c89
+static void bnxt_fw_reset_task(struct work_struct *work)
f95c89
+{
f95c89
+	struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work);
f95c89
+	int rc;
f95c89
+
f95c89
+	if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
f95c89
+		netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n");
f95c89
+		return;
f95c89
+	}
f95c89
+
f95c89
+	switch (bp->fw_reset_state) {
f95c89
+	case BNXT_FW_RESET_STATE_POLL_VF:
f95c89
+		rc = bnxt_hwrm_func_qcfg(bp);
f95c89
+		if (rc) {
f95c89
+			netdev_err(bp->dev, "Firmware reset aborted, subsequent func_qcfg cmd failed, rc = %d, %d msecs since reset timestamp\n",
f95c89
+				   rc, jiffies_to_msecs(jiffies -
f95c89
+				   bp->fw_reset_timestamp));
f95c89
+			goto fw_reset_abort;
f95c89
+		}
f95c89
+		if (bp->pf.registered_vfs || bp->sriov_cfg) {
f95c89
+			if (time_after(jiffies, bp->fw_reset_timestamp +
f95c89
+				       (bp->fw_reset_max_dsecs * HZ / 10))) {
f95c89
+				clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+				bp->fw_reset_state = 0;
f95c89
+				netdev_err(bp->dev, "Firmware reset aborted, %d VFs still registered, sriov_cfg %d\n",
f95c89
+					   bp->pf.registered_vfs,
f95c89
+					   bp->sriov_cfg);
f95c89
+				return;
f95c89
+			}
f95c89
+			bnxt_queue_fw_reset_work(bp, HZ / 10);
f95c89
+			return;
f95c89
+		}
f95c89
+		bp->fw_reset_timestamp = jiffies;
f95c89
+		rtnl_lock();
f95c89
+		bnxt_fw_reset_close(bp);
f95c89
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
f95c89
+		rtnl_unlock();
f95c89
+		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
f95c89
+		return;
f95c89
+	case BNXT_FW_RESET_STATE_ENABLE_DEV:
f95c89
+		if (pci_enable_device(bp->pdev)) {
f95c89
+			netdev_err(bp->dev, "Cannot re-enable PCI device\n");
f95c89
+			goto fw_reset_abort;
f95c89
+		}
f95c89
+		pci_set_master(bp->pdev);
f95c89
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW;
f95c89
+		/* fall through */
f95c89
+	case BNXT_FW_RESET_STATE_POLL_FW:
f95c89
+		bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
f95c89
+		rc = __bnxt_hwrm_ver_get(bp, true);
f95c89
+		if (rc) {
f95c89
+			if (time_after(jiffies, bp->fw_reset_timestamp +
f95c89
+				       (bp->fw_reset_max_dsecs * HZ / 10))) {
f95c89
+				netdev_err(bp->dev, "Firmware reset aborted\n");
f95c89
+				goto fw_reset_abort;
f95c89
+			}
f95c89
+			bnxt_queue_fw_reset_work(bp, HZ / 5);
f95c89
+			return;
f95c89
+		}
f95c89
+		bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT;
f95c89
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
f95c89
+		/* fall through */
f95c89
+	case BNXT_FW_RESET_STATE_OPENING:
f95c89
+		while (!rtnl_trylock()) {
f95c89
+			bnxt_queue_fw_reset_work(bp, HZ / 10);
f95c89
+			return;
f95c89
+		}
f95c89
+		rc = bnxt_open(bp->dev);
f95c89
+		if (rc) {
f95c89
+			netdev_err(bp->dev, "bnxt_open_nic() failed\n");
f95c89
+			clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+			dev_close(bp->dev);
f95c89
+		}
f95c89
+		bnxt_ulp_irq_restart(bp, rc);
f95c89
+		rtnl_unlock();
f95c89
+
f95c89
+		bp->fw_reset_state = 0;
f95c89
+		/* Make sure fw_reset_state is 0 before clearing the flag */
f95c89
+		smp_mb__before_atomic();
f95c89
+		clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+		break;
f95c89
+	}
f95c89
+	return;
f95c89
+
f95c89
+fw_reset_abort:
f95c89
+	clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
f95c89
+	bp->fw_reset_state = 0;
f95c89
+	rtnl_lock();
f95c89
+	dev_close(bp->dev);
f95c89
+	rtnl_unlock();
f95c89
+}
f95c89
+
f95c89
 static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
f95c89
 {
f95c89
 	int rc;
f95c89
@@ -10406,6 +10555,7 @@
f95c89
 	pci_enable_pcie_error_reporting(pdev);
f95c89
 
f95c89
 	INIT_WORK(&bp->sp_task, bnxt_sp_task);
f95c89
+	INIT_DELAYED_WORK(&bp->fw_reset_task, bnxt_fw_reset_task);
f95c89
 
f95c89
 	spin_lock_init(&bp->ntp_fltr_lock);
f95c89
 #if BITS_PER_LONG == 32
f95c89
Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt.h
f95c89
===================================================================
f95c89
--- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt.h	2020-02-06 16:23:20.029473507 +0100
f95c89
+++ src/drivers/net/ethernet/broadcom/bnxt/bnxt.h	2020-02-06 16:23:20.163472277 +0100
f95c89
@@ -640,6 +640,7 @@
f95c89
 #define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
f95c89
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
f95c89
 #define DFLT_HWRM_CMD_TIMEOUT		500
f95c89
+#define SHORT_HWRM_CMD_TIMEOUT		20
f95c89
 #define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
f95c89
 #define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
f95c89
 #define HWRM_RESP_ERR_CODE_MASK		0xffff
f95c89
@@ -1066,6 +1067,7 @@
f95c89
 	u8	mac_addr[ETH_ALEN];
f95c89
 	u32	first_vf_id;
f95c89
 	u16	active_vfs;
f95c89
+	u16	registered_vfs;
f95c89
 	u16	max_vfs;
f95c89
 	u32	max_encap_records;
f95c89
 	u32	max_decap_records;
f95c89
@@ -1721,6 +1723,14 @@
f95c89
 #define BNXT_RING_COAL_NOW_SP_EVENT	17
f95c89
 #define BNXT_FW_RESET_NOTIFY_SP_EVENT	18
f95c89
 
f95c89
+	struct delayed_work	fw_reset_task;
f95c89
+	int			fw_reset_state;
f95c89
+#define BNXT_FW_RESET_STATE_POLL_VF	1
f95c89
+#define BNXT_FW_RESET_STATE_RESET_FW	2
f95c89
+#define BNXT_FW_RESET_STATE_ENABLE_DEV	3
f95c89
+#define BNXT_FW_RESET_STATE_POLL_FW	4
f95c89
+#define BNXT_FW_RESET_STATE_OPENING	5
f95c89
+
f95c89
 	u16			fw_reset_min_dsecs;
f95c89
 #define BNXT_DFLT_FW_RST_MIN_DSECS	20
f95c89
 	u16			fw_reset_max_dsecs;
f95c89
@@ -1966,6 +1976,7 @@
f95c89
 int bnxt_half_open_nic(struct bnxt *bp);
f95c89
 void bnxt_half_close_nic(struct bnxt *bp);
f95c89
 int bnxt_close_nic(struct bnxt *, bool, bool);
f95c89
+void bnxt_fw_reset(struct bnxt *bp);
f95c89
 int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
f95c89
 		     int tx_xdp);
f95c89
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
f95c89
Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
f95c89
===================================================================
f95c89
--- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c	2020-02-06 16:22:54.098711531 +0100
f95c89
+++ src/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c	2020-02-06 16:23:20.163472277 +0100
f95c89
@@ -226,6 +226,9 @@
f95c89
 	struct input *req;
f95c89
 	int rc;
f95c89
 
f95c89
+	if (ulp_id != BNXT_ROCE_ULP && bp->fw_reset_state)
f95c89
+		return -EBUSY;
f95c89
+
f95c89
 	mutex_lock(&bp->hwrm_cmd_lock);
f95c89
 	req = fw_msg->msg;
f95c89
 	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);