Blame SOURCES/0031-netdrv-net-mlx5e-Report-and-recover-from-rx-timeout.patch

d8f823
From ac9174fc02907c3b322b1cba4fe37b73ae29e71b Mon Sep 17 00:00:00 2001
d8f823
From: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Date: Sun, 10 May 2020 14:51:55 -0400
d8f823
Subject: [PATCH 031/312] [netdrv] net/mlx5e: Report and recover from rx
d8f823
 timeout
d8f823
d8f823
Message-id: <20200510145245.10054-33-ahleihel@redhat.com>
d8f823
Patchwork-id: 306573
d8f823
Patchwork-instance: patchwork
d8f823
O-Subject: [RHEL8.3 BZ 1789378 v2 32/82] net/mlx5e: Report and recover from rx timeout
d8f823
Bugzilla: 1790198 1789378
d8f823
RH-Acked-by: Kamal Heib <kheib@redhat.com>
d8f823
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
d8f823
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
d8f823
RH-Acked-by: Jonathan Toppins <jtoppins@redhat.com>
d8f823
d8f823
Bugzilla: http://bugzilla.redhat.com/1789378
d8f823
Bugzilla: http://bugzilla.redhat.com/1790198
d8f823
Upstream: v5.4-rc1
d8f823
d8f823
commit 32c57fb26863b48982e33aa95f3b5b23f24b1feb
d8f823
Author: Aya Levin <ayal@mellanox.com>
d8f823
Date:   Tue Jun 25 21:42:27 2019 +0300
d8f823
d8f823
    net/mlx5e: Report and recover from rx timeout
d8f823
d8f823
    Add support for report and recovery from rx timeout. On driver open we
d8f823
    post NOP work request on the rx channels to trigger napi in order to
d8f823
    fillup the rx rings. In case napi wasn't scheduled due to a lost
d8f823
    interrupt, perform EQ recovery.
d8f823
d8f823
    Signed-off-by: Aya Levin <ayal@mellanox.com>
d8f823
    Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
d8f823
    Acked-by: Jiri Pirko <jiri@mellanox.com>
d8f823
    Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
d8f823
d8f823
Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
d8f823
---
d8f823
 .../net/ethernet/mellanox/mlx5/core/en/health.h    |  1 +
d8f823
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 32 ++++++++++++++++++++++
d8f823
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  1 +
d8f823
 3 files changed, 34 insertions(+)
d8f823
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
d8f823
index 8acd9dc520cf..b4a2d9be17d6 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
d8f823
@@ -19,6 +19,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
d8f823
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
d8f823
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
d8f823
 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
d8f823
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
d8f823
 
d8f823
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
d8f823
 
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
d8f823
index 661de567ca6c..4e933db759b2 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
d8f823
@@ -115,6 +115,38 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
d8f823
 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
d8f823
 }
d8f823
 
d8f823
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
d8f823
+{
d8f823
+	struct mlx5e_icosq *icosq;
d8f823
+	struct mlx5_eq_comp *eq;
d8f823
+	struct mlx5e_rq *rq;
d8f823
+	int err;
d8f823
+
d8f823
+	rq = ctx;
d8f823
+	icosq = &rq->channel->icosq;
d8f823
+	eq = rq->cq.mcq.eq;
d8f823
+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
d8f823
+	if (err)
d8f823
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
d8f823
+
d8f823
+	return err;
d8f823
+}
d8f823
+
d8f823
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
d8f823
+{
d8f823
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
d8f823
+	struct mlx5e_priv *priv = rq->channel->priv;
d8f823
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
d8f823
+	struct mlx5e_err_ctx err_ctx = {};
d8f823
+
d8f823
+	err_ctx.ctx = rq;
d8f823
+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
d8f823
+	sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
d8f823
+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
d8f823
+
d8f823
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
d8f823
+}
d8f823
+
d8f823
 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
d8f823
 {
d8f823
 	return err_ctx->recover(err_ctx->ctx);
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
d8f823
index 430fb04ea96f..c3eba55e8a21 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
d8f823
@@ -799,6 +799,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
d8f823
 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
d8f823
 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
d8f823
 
d8f823
+	mlx5e_reporter_rx_timeout(rq);
d8f823
 	return -ETIMEDOUT;
d8f823
 }
d8f823
 
d8f823
-- 
d8f823
2.13.6
d8f823