Blame SOURCES/0278-netdrv-net-mlx5-Add-command-entry-handling-completio.patch

d8f823
From c751e5890724beeb0b41017f54ac5509f50697e8 Mon Sep 17 00:00:00 2001
d8f823
From: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Date: Mon, 1 Jun 2020 15:40:31 -0400
d8f823
Subject: [PATCH 278/312] [netdrv] net/mlx5: Add command entry handling
d8f823
 completion
d8f823
d8f823
Message-id: <20200601154102.25980-9-ahleihel@redhat.com>
d8f823
Patchwork-id: 315713
d8f823
Patchwork-instance: patchwork
d8f823
O-Subject: [RHEL8.3 BZ 1842258 08/39] net/mlx5: Add command entry handling completion
d8f823
Bugzilla: 1842258
d8f823
RH-Acked-by: Honggang Li <honli@redhat.com>
d8f823
RH-Acked-by: Kamal Heib <kheib@redhat.com>
d8f823
RH-Acked-by: Marcelo Leitner <mleitner@redhat.com>
d8f823
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
d8f823
d8f823
Bugzilla: http://bugzilla.redhat.com/1842258
d8f823
Upstream: v5.7-rc7
d8f823
d8f823
commit 17d00e839d3b592da9659c1977d45f85b77f986a
d8f823
Author: Moshe Shemesh <moshe@mellanox.com>
d8f823
Date:   Fri Dec 27 07:01:53 2019 +0200
d8f823
d8f823
    net/mlx5: Add command entry handling completion
d8f823
d8f823
    When FW response to commands is very slow and all command entries in
d8f823
    use are waiting for completion we can have a race where commands can get
d8f823
    timeout before they get out of the queue and handled. Timeout
d8f823
    completion on uninitialized command will cause releasing command's
d8f823
    buffers before accessing it for initialization and then we will get NULL
d8f823
    pointer exception while trying access it. It may also cause releasing
d8f823
    buffers of another command since we may have timeout completion before
d8f823
    even allocating entry index for this command.
d8f823
    Add entry handling completion to avoid this race.
d8f823
d8f823
    Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
d8f823
    Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
d8f823
    Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
d8f823
    Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
d8f823
d8f823
Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
d8f823
---
d8f823
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 14 ++++++++++++++
d8f823
 include/linux/mlx5/driver.h                   |  3 +++
d8f823
 2 files changed, 17 insertions(+)
d8f823
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
d8f823
index 23acec5a31d4..50783828d2e8 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
d8f823
@@ -861,6 +861,7 @@ static void cmd_work_handler(struct work_struct *work)
d8f823
 	int alloc_ret;
d8f823
 	int cmd_mode;
d8f823
 
d8f823
+	complete(&ent->handling);
d8f823
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
d8f823
 	down(sem);
d8f823
 	if (!ent->page_queue) {
d8f823
@@ -978,6 +979,11 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
d8f823
 	struct mlx5_cmd *cmd = &dev->cmd;
d8f823
 	int err;
d8f823
 
d8f823
+	if (!wait_for_completion_timeout(&ent->handling, timeout) &&
d8f823
+	    cancel_work_sync(&ent->work)) {
d8f823
+		ent->ret = -ECANCELED;
d8f823
+		goto out_err;
d8f823
+	}
d8f823
 	if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
d8f823
 		wait_for_completion(&ent->done);
d8f823
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
d8f823
@@ -985,12 +991,17 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
d8f823
 		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
d8f823
 	}
d8f823
 
d8f823
+out_err:
d8f823
 	err = ent->ret;
d8f823
 
d8f823
 	if (err == -ETIMEDOUT) {
d8f823
 		mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
d8f823
 			       mlx5_command_str(msg_to_opcode(ent->in)),
d8f823
 			       msg_to_opcode(ent->in));
d8f823
+	} else if (err == -ECANCELED) {
d8f823
+		mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
d8f823
+			       mlx5_command_str(msg_to_opcode(ent->in)),
d8f823
+			       msg_to_opcode(ent->in));
d8f823
 	}
d8f823
 	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
d8f823
 		      err, deliv_status_to_str(ent->status), ent->status);
d8f823
@@ -1026,6 +1037,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
d8f823
 	ent->token = token;
d8f823
 	ent->polling = force_polling;
d8f823
 
d8f823
+	init_completion(&ent->handling);
d8f823
 	if (!callback)
d8f823
 		init_completion(&ent->done);
d8f823
 
d8f823
@@ -1045,6 +1057,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
d8f823
 	err = wait_func(dev, ent);
d8f823
 	if (err == -ETIMEDOUT)
d8f823
 		goto out;
d8f823
+	if (err == -ECANCELED)
d8f823
+		goto out_free;
d8f823
 
d8f823
 	ds = ent->ts2 - ent->ts1;
d8f823
 	op = MLX5_GET(mbox_in, in->first.data, opcode);
d8f823
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
d8f823
index 0d728007078c..df47476d6fca 100644
d8f823
--- a/include/linux/mlx5/driver.h
d8f823
+++ b/include/linux/mlx5/driver.h
d8f823
@@ -761,6 +761,9 @@ struct mlx5_cmd_work_ent {
d8f823
 	struct delayed_work	cb_timeout_work;
d8f823
 	void		       *context;
d8f823
 	int			idx;
d8f823
+#ifndef __GENKSYMS__
d8f823
+	struct completion	handling;
d8f823
+#endif
d8f823
 	struct completion	done;
d8f823
 	struct mlx5_cmd        *cmd;
d8f823
 	struct work_struct	work;
d8f823
-- 
d8f823
2.13.6
d8f823