Blob Blame History Raw
From 71d5ed76fabd2bd8135d8c34484842f6acad5a65 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <ahleihel@redhat.com>
Date: Tue, 19 May 2020 07:49:34 -0400
Subject: [PATCH 271/312] [netdrv] net/mlx5: E-switch, Protect eswitch mode
 changes

Message-id: <20200519074934.6303-64-ahleihel@redhat.com>
Patchwork-id: 310566
Patchwork-instance: patchwork
O-Subject: [RHEL8.3 BZ 1663246 63/63] net/mlx5: E-switch, Protect eswitch mode changes
Bugzilla: 1835595 1663246
RH-Acked-by: Marcelo Leitner <mleitner@redhat.com>
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
RH-Acked-by: John Linville <linville@redhat.com>
RH-Acked-by: Ivan Vecera <ivecera@redhat.com>
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
RH-Acked-by: Kamal Heib <kheib@redhat.com>

Bugzilla: http://bugzilla.redhat.com/1835595
Bugzilla: http://bugzilla.redhat.com/1663246
Upstream: v5.7-rc1
Conflicts:
 - drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
   Diff due to missing commit:
   9ffbe8ac05db ("locking/lockdep: Rename lockdep_assert_held_exclusive() -> lockdep_assert_held_write()")
   ---> Use lockdep_assert_held_exclusive instead of lockdep_assert_held_write.

commit 8e0aa4bc959c98c14ed0aaee522d77ca52690189
Author: Parav Pandit <parav@mellanox.com>
Date:   Wed Dec 18 02:51:19 2019 -0600

    net/mlx5: E-switch, Protect eswitch mode changes

    Currently eswitch mode change is occurring from 2 different execution
    contexts as below.
    1. sriov sysfs enable/disable
    2. devlink eswitch set commands

    Both of them need to access eswitch related data structures in
    synchronized manner.
    Without any synchronization below race condition exist.

    SR-IOV enable/disable with devlink eswitch mode change:

              cpu-0                         cpu-1
              -----                         -----
    mlx5_device_disable_sriov()        mlx5_devlink_eswitch_mode_set()
      mlx5_eswitch_disable()             esw_offloads_stop()
        esw_offloads_disable()             mlx5_eswitch_disable()
                                             esw_offloads_disable()

    Hence, they are synchronized using a new mode_lock.
    eswitch's state_lock is not used as it can lead to a deadlock scenario
    below and state_lock is only for vport and fdb exclusive access.

    ip link set vf <param>
      netlink rcv_msg() - Lock A
        rtnl_lock
        vfinfo()
          esw->state_lock() - Lock B
    devlink eswitch_set
       devlink_mutex
         esw->state_lock() - Lock B
           attach_netdev()
             register_netdev()
               rtnl_lock - Lock A

    Alternatives considered:
    1. Acquiring rtnl lock before taking esw->state_lock to follow similar
    locking sequence as ip link flow during eswitch mode set.
    rtnl lock is not good idea for two reasons.
    (a) Holding rtnl lock for several hundred device commands is not good
        idea.
    (b) It leads to below and more similar deadlocks.

    devlink eswitch_set
       devlink_mutex
         rtnl_lock - Lock A
           esw->state_lock() - Lock B
             eswitch_disable()
               reload()
                 ib_register_device()
                   ib_cache_setup_one()
                     rtnl_lock()

    2. Exporting devlink lock may lead to undesired use of it in vendor
    driver(s) in future.

    3. Unloading representors outside of the mode_lock requires
    serialization with other process trying to enable the eswitch.

    4. Differing the representors life cycle to a different workqueue
    requires synchronization with func_change_handler workqueue.

    Reviewed-by: Roi Dayan <roid@mellanox.com>
    Reviewed-by: Bodong Wang <bodong@mellanox.com>
    Reviewed-by: Mark Bloch <markb@mellanox.com>
    Signed-off-by: Parav Pandit <parav@mellanox.com>
    Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>

Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  54 +++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  13 ++-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 105 +++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c    |   3 +-
 4 files changed, 125 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 19b887ad9392..4126f9be46d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2051,7 +2051,7 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs)
 }
 
 /**
- * mlx5_eswitch_enable - Enable eswitch
+ * mlx5_eswitch_enable_locked - Enable eswitch
  * @esw:	Pointer to eswitch
  * @mode:	Eswitch mode to enable
  * @num_vfs:	Enable eswitch for given number of VFs. This is optional.
@@ -2063,16 +2063,17 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs)
  *		eswitch. Caller should pass < 0 when num_vfs should be
  *		completely ignored. This is typically the case when eswitch
  *		is enabled without sriov regardless of PF/ECPF system.
- * mlx5_eswitch_enable() Enables eswitch in either legacy or offloads mode.
- * If num_vfs >=0 is provided, it setup VF related eswitch vports. It returns
- * 0 on success or error code on failure.
+ * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads
+ * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports.
+ * It returns 0 on success or error code on failure.
  */
-int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs)
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
 {
 	int err;
 
-	if (!ESW_ALLOWED(esw) ||
-	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+	lockdep_assert_held(&esw->mode_lock);
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
 	}
@@ -2123,11 +2124,34 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs)
 	return err;
 }
 
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
+/**
+ * mlx5_eswitch_enable - Enable eswitch
+ * @esw:	Pointer to eswitch
+ * @num_vfs:	Enable eswitch swich for given number of VFs.
+ *		Caller must pass num_vfs > 0 when enabling eswitch for
+ *		vf vports.
+ * mlx5_eswitch_enable() returns 0 on success or error code on failure.
+ */
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+{
+	int ret;
+
+	if (!ESW_ALLOWED(esw))
+		return 0;
+
+	mutex_lock(&esw->mode_lock);
+	ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
+	mutex_unlock(&esw->mode_lock);
+	return ret;
+}
+
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
 {
 	int old_mode;
 
-	if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
+	lockdep_assert_held_exclusive(&esw->mode_lock);
+
+	if (esw->mode == MLX5_ESWITCH_NONE)
 		return;
 
 	esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
@@ -2156,6 +2180,16 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 		mlx5_eswitch_clear_vf_vports_info(esw);
 }
 
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
+{
+	if (!ESW_ALLOWED(esw))
+		return;
+
+	mutex_lock(&esw->mode_lock);
+	mlx5_eswitch_disable_locked(esw, clear_vf);
+	mutex_unlock(&esw->mode_lock);
+}
+
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eswitch *esw;
@@ -2207,6 +2241,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	hash_init(esw->offloads.mod_hdr.hlist);
 	atomic64_set(&esw->offloads.num_flows, 0);
 	mutex_init(&esw->state_lock);
+	mutex_init(&esw->mode_lock);
 
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
@@ -2241,6 +2276,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
+	mutex_destroy(&esw->mode_lock);
 	mutex_destroy(&esw->state_lock);
 	mutex_destroy(&esw->offloads.mod_hdr.lock);
 	mutex_destroy(&esw->offloads.encap_tbl_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5ec76f48e9b7..05600fda987f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -295,6 +295,13 @@ struct mlx5_eswitch {
 	 */
 	struct mutex            state_lock;
 
+#ifndef __GENKSYMS__
+	/* Protects eswitch mode change that occurs via one or more
+	 * user commands, i.e. sriov state change, devlink commands.
+	 */
+	struct mutex mode_lock;
+#endif
+
 	struct {
 		bool            enabled;
 		u32             root_tsar_id;
@@ -337,7 +344,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
 
 #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
-int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs);
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf);
 void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       u16 vport, u8 mac[ETH_ALEN]);
@@ -701,7 +710,7 @@ esw_get_max_restore_tag(struct mlx5_eswitch *esw);
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) { return 0; }
+static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
 static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 29280218518a..f34a5160d104 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1549,14 +1549,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 		return -EINVAL;
 	}
 
-	mlx5_eswitch_disable(esw, false);
-	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS,
-				  esw->dev->priv.sriov.num_vfs);
+	mlx5_eswitch_disable_locked(esw, false);
+	err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
+					 esw->dev->priv.sriov.num_vfs);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Failed setting eswitch to offloads");
-		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY,
-					   MLX5_ESWITCH_IGNORE_NUM_VFS);
+		err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
+						  MLX5_ESWITCH_IGNORE_NUM_VFS);
 		if (err1) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Failed setting eswitch back to legacy");
@@ -2473,13 +2473,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 {
 	int err, err1;
 
-	mlx5_eswitch_disable(esw, false);
-	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY,
-				  MLX5_ESWITCH_IGNORE_NUM_VFS);
+	mlx5_eswitch_disable_locked(esw, false);
+	err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
+					 MLX5_ESWITCH_IGNORE_NUM_VFS);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
-		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS,
-					   MLX5_ESWITCH_IGNORE_NUM_VFS);
+		err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
+						  MLX5_ESWITCH_IGNORE_NUM_VFS);
 		if (err1) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Failed setting eswitch back to offloads");
@@ -2601,6 +2601,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 				  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	u16 cur_mlx5_mode, mlx5_mode = 0;
 	int err;
 
@@ -2608,40 +2609,50 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	if (err)
 		return err;
 
-	err = eswitch_devlink_esw_mode_check(dev->priv.eswitch);
-	if (err)
-		return err;
-
-	cur_mlx5_mode = dev->priv.eswitch->mode;
-
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
+	mutex_lock(&esw->mode_lock);
+	err = eswitch_devlink_esw_mode_check(esw);
+	if (err)
+		goto unlock;
+
+	cur_mlx5_mode = esw->mode;
+
 	if (cur_mlx5_mode == mlx5_mode)
-		return 0;
+		goto unlock;
 
 	if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
-		return esw_offloads_start(dev->priv.eswitch, extack);
+		err = esw_offloads_start(esw, extack);
 	else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
-		return esw_offloads_stop(dev->priv.eswitch, extack);
+		err = esw_offloads_stop(esw, extack);
 	else
-		return -EINVAL;
+		err = -EINVAL;
+
+unlock:
+	mutex_unlock(&esw->mode_lock);
+	return err;
 }
 
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int err;
 
 	err = mlx5_eswitch_check(dev);
 	if (err)
 		return err;
 
+	mutex_lock(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(dev->priv.eswitch);
 	if (err)
-		return err;
+		goto unlock;
 
-	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
+	err = esw_mode_to_devlink(esw->mode, mode);
+unlock:
+	mutex_unlock(&esw->mode_lock);
+	return err;
 }
 
 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
@@ -2656,18 +2667,20 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (err)
 		return err;
 
+	mutex_lock(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
-		return err;
+		goto out;
 
 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
 		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
-			return 0;
+			goto out;
 		/* fall through */
 	case MLX5_CAP_INLINE_MODE_L2:
 		NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto out;
 	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
 		break;
 	}
@@ -2675,7 +2688,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (atomic64_read(&esw->offloads.num_flows) > 0) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Can't set inline mode when flows are configured");
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto out;
 	}
 
 	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
@@ -2692,6 +2706,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	}
 
 	esw->offloads.inline_mode = mlx5_mode;
+	mutex_unlock(&esw->mode_lock);
 	return 0;
 
 revert_inline_mode:
@@ -2701,6 +2716,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 						 vport,
 						 esw->offloads.inline_mode);
 out:
+	mutex_unlock(&esw->mode_lock);
 	return err;
 }
 
@@ -2714,11 +2730,15 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (err)
 		return err;
 
+	mutex_lock(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
-		return err;
+		goto unlock;
 
-	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+	err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+unlock:
+	mutex_unlock(&esw->mode_lock);
+	return err;
 }
 
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
@@ -2770,30 +2790,36 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 	if (err)
 		return err;
 
+	mutex_lock(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
-		return err;
+		goto unlock;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
 	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
-	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
-		return -EOPNOTSUPP;
+	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
-	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
-		return -EOPNOTSUPP;
+	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
 	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		esw->offloads.encap = encap;
-		return 0;
+		goto unlock;
 	}
 
 	if (esw->offloads.encap == encap)
-		return 0;
+		goto unlock;
 
 	if (atomic64_read(&esw->offloads.num_flows) > 0) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Can't set encapsulation when flows are configured");
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto unlock;
 	}
 
 	esw_destroy_offloads_fdb_tables(esw);
@@ -2809,6 +2835,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 		(void)esw_create_offloads_fdb_tables(esw, esw->nvports);
 	}
 
+unlock:
+	mutex_unlock(&esw->mode_lock);
 	return err;
 }
 
@@ -2823,11 +2851,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
 	if (err)
 		return err;
 
+	mutex_lock(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
-		return err;
+		goto unlock;
 
 	*encap = esw->offloads.encap;
+unlock:
+	mutex_unlock(&esw->mode_lock);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 10a64b91d04c..3094d20297a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -77,8 +77,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		goto enable_vfs_hca;
 
-	err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY,
-				  num_vfs);
+	err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "failed to enable eswitch SRIOV (%d)\n", err);
-- 
2.13.6