Blame SOURCES/0215-netdrv-net-mlx5-E-Switch-Increase-number-of-chains-a.patch

d8f823
From e4bdcb77308e795a58ba030a7c27d11a62e98515 Mon Sep 17 00:00:00 2001
d8f823
From: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Date: Tue, 19 May 2020 07:48:36 -0400
d8f823
Subject: [PATCH 215/312] [netdrv] net/mlx5: E-Switch, Increase number of
d8f823
 chains and priorities
d8f823
d8f823
Message-id: <20200519074934.6303-6-ahleihel@redhat.com>
d8f823
Patchwork-id: 310507
d8f823
Patchwork-instance: patchwork
d8f823
O-Subject: [RHEL8.3 BZ 1663246 05/63] net/mlx5: E-Switch, Increase number of chains and priorities
d8f823
Bugzilla: 1663246
d8f823
RH-Acked-by: Marcelo Leitner <mleitner@redhat.com>
d8f823
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
d8f823
RH-Acked-by: John Linville <linville@redhat.com>
d8f823
RH-Acked-by: Ivan Vecera <ivecera@redhat.com>
d8f823
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
d8f823
RH-Acked-by: Kamal Heib <kheib@redhat.com>
d8f823
d8f823
Bugzilla: http://bugzilla.redhat.com/1663246
d8f823
Upstream: v5.6-rc1
d8f823
d8f823
commit 278d51f24330718aefd7fe86996a6da66fd345e7
d8f823
Author: Paul Blakey <paulb@mellanox.com>
d8f823
Date:   Wed Nov 20 15:06:19 2019 +0200
d8f823
d8f823
    net/mlx5: E-Switch, Increase number of chains and priorities
d8f823
d8f823
    Increase the number of chains and priorities to support
d8f823
    the whole range available in tc.
d8f823
d8f823
    We use unmanaged tables and ignore flow level to create more
d8f823
    tables than what we declared to fs_core steering, and we manage
d8f823
    the connections between the tables themselves.
d8f823
d8f823
    To support that we need FW with ignore_flow_level capability.
d8f823
    Otherwise the old behaviour will be used, where we are limited
d8f823
    by the number of levels we declared (4 chains, 16 prios).
d8f823
d8f823
    Signed-off-by: Paul Blakey <paulb@mellanox.com>
d8f823
    Reviewed-by: Roi Dayan <roid@mellanox.com>
d8f823
    Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
d8f823
    Reviewed-by: Mark Bloch <markb@mellanox.com>
d8f823
    Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
d8f823
d8f823
Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
d8f823
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
d8f823
---
d8f823
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |   3 +-
d8f823
 .../mellanox/mlx5/core/eswitch_offloads_chains.c   | 238 ++++++++++++++++++++-
d8f823
 .../mellanox/mlx5/core/eswitch_offloads_chains.h   |   3 +
d8f823
 3 files changed, 232 insertions(+), 12 deletions(-)
d8f823
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
d8f823
index b8db12635730..7c33ce7ec074 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
d8f823
@@ -151,7 +151,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
d8f823
 		if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
d8f823
 			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
d8f823
 			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
d8f823
-			dest[i].ft = esw->fdb_table.offloads.slow_fdb;
d8f823
+			dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
d8f823
 			i++;
d8f823
 		} else if (attr->dest_chain) {
d8f823
 			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
d8f823
@@ -275,6 +275,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
d8f823
 	if (attr->outer_match_level != MLX5_MATCH_NONE)
d8f823
 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
d8f823
 
d8f823
+	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
d8f823
 	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
d8f823
 
d8f823
 	if (IS_ERR(rule))
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
d8f823
index 589b94df252a..d569969afd9d 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
d8f823
@@ -16,6 +16,10 @@
d8f823
 #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
d8f823
 #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
d8f823
 #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
d8f823
+#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
d8f823
+#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
d8f823
+#define fdb_ignore_flow_level_supported(esw) \
d8f823
+	(MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
d8f823
 
d8f823
 #define ESW_OFFLOADS_NUM_GROUPS  4
d8f823
 
d8f823
@@ -39,6 +43,8 @@ struct mlx5_esw_chains_priv {
d8f823
 	/* Protects above chains_ht and prios_ht */
d8f823
 	struct mutex lock;
d8f823
 
d8f823
+	struct mlx5_flow_table *tc_end_fdb;
d8f823
+
d8f823
 	int fdb_left[ARRAY_SIZE(ESW_POOLS)];
d8f823
 };
d8f823
 
d8f823
@@ -50,6 +56,7 @@ struct fdb_chain {
d8f823
 	int ref;
d8f823
 
d8f823
 	struct mlx5_eswitch *esw;
d8f823
+	struct list_head prios_list;
d8f823
 };
d8f823
 
d8f823
 struct fdb_prio_key {
d8f823
@@ -60,6 +67,7 @@ struct fdb_prio_key {
d8f823
 
d8f823
 struct fdb_prio {
d8f823
 	struct rhash_head node;
d8f823
+	struct list_head list;
d8f823
 
d8f823
 	struct fdb_prio_key key;
d8f823
 
d8f823
@@ -67,6 +75,9 @@ struct fdb_prio {
d8f823
 
d8f823
 	struct fdb_chain *fdb_chain;
d8f823
 	struct mlx5_flow_table *fdb;
d8f823
+	struct mlx5_flow_table *next_fdb;
d8f823
+	struct mlx5_flow_group *miss_group;
d8f823
+	struct mlx5_flow_handle *miss_rule;
d8f823
 };
d8f823
 
d8f823
 static const struct rhashtable_params chain_params = {
d8f823
@@ -93,6 +104,9 @@ u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
d8f823
 	if (!mlx5_esw_chains_prios_supported(esw))
d8f823
 		return 1;
d8f823
 
d8f823
+	if (fdb_ignore_flow_level_supported(esw))
d8f823
+		return UINT_MAX - 1;
d8f823
+
d8f823
 	return FDB_TC_MAX_CHAIN;
d8f823
 }
d8f823
 
d8f823
@@ -106,11 +120,17 @@ u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
d8f823
 	if (!mlx5_esw_chains_prios_supported(esw))
d8f823
 		return 1;
d8f823
 
d8f823
+	if (fdb_ignore_flow_level_supported(esw))
d8f823
+		return UINT_MAX;
d8f823
+
d8f823
 	return FDB_TC_MAX_PRIO;
d8f823
 }
d8f823
 
d8f823
 static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
d8f823
 {
d8f823
+	if (fdb_ignore_flow_level_supported(esw))
d8f823
+		return UINT_MAX;
d8f823
+
d8f823
 	return FDB_TC_LEVELS_PER_PRIO;
d8f823
 }
d8f823
 
d8f823
@@ -181,13 +201,40 @@ mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
d8f823
 	sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
d8f823
 	if (!sz)
d8f823
 		return ERR_PTR(-ENOSPC);
d8f823
-
d8f823
 	ft_attr.max_fte = sz;
d8f823
-	ft_attr.level = level;
d8f823
-	ft_attr.prio = prio - 1;
d8f823
-	ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS;
d8f823
-	ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
d8f823
 
d8f823
+	/* We use tc_slow_fdb(esw) as the table's next_ft till
d8f823
+	 * ignore_flow_level is allowed on FT creation and not just for FTEs.
d8f823
+	 * Instead caller should add an explicit miss rule if needed.
d8f823
+	 */
d8f823
+	ft_attr.next_ft = tc_slow_fdb(esw);
d8f823
+
d8f823
+	/* The root table(chain 0, prio 1, level 0) is required to be
d8f823
+	 * connected to the previous prio (FDB_BYPASS_PATH if exists).
d8f823
+	 * We always create it, as a managed table, in order to align with
d8f823
+	 * fs_core logic.
d8f823
+	 */
d8f823
+	if (!fdb_ignore_flow_level_supported(esw) ||
d8f823
+	    (chain == 0 && prio == 1 && level == 0)) {
d8f823
+		ft_attr.level = level;
d8f823
+		ft_attr.prio = prio - 1;
d8f823
+		ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
d8f823
+	} else {
d8f823
+		ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
d8f823
+		ft_attr.prio = FDB_TC_OFFLOAD;
d8f823
+		/* Firmware doesn't allow us to create another level 0 table,
d8f823
+		 * so we create all unmanaged tables as level 1.
d8f823
+		 *
d8f823
+		 * To connect them, we use explicit miss rules with
d8f823
+		 * ignore_flow_level. Caller is responsible to create
d8f823
+		 * these rules (if needed).
d8f823
+		 */
d8f823
+		ft_attr.level = 1;
d8f823
+		ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
d8f823
+	}
d8f823
+
d8f823
+	ft_attr.autogroup.num_reserved_entries = 2;
d8f823
+	ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS;
d8f823
 	fdb = mlx5_create_auto_grouped_flow_table_attr_(ns, &ft_attr);
d8f823
 	if (IS_ERR(fdb)) {
d8f823
 		esw_warn(esw->dev,
d8f823
@@ -220,6 +267,7 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
d8f823
 
d8f823
 	fdb_chain->esw = esw;
d8f823
 	fdb_chain->chain = chain;
d8f823
+	INIT_LIST_HEAD(&fdb_chain->prios_list);
d8f823
 
d8f823
 	err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
d8f823
 				     chain_params);
d8f823
@@ -261,6 +309,79 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
d8f823
 	return fdb_chain;
d8f823
 }
d8f823
 
d8f823
+static struct mlx5_flow_handle *
d8f823
+mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
d8f823
+			      struct mlx5_flow_table *next_fdb)
d8f823
+{
d8f823
+	static const struct mlx5_flow_spec spec = {};
d8f823
+	struct mlx5_flow_destination dest = {};
d8f823
+	struct mlx5_flow_act act = {};
d8f823
+
d8f823
+	act.flags  = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
d8f823
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
d8f823
+	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
d8f823
+	dest.ft = next_fdb;
d8f823
+
d8f823
+	return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
d8f823
+}
d8f823
+
d8f823
+static int
d8f823
+mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
d8f823
+				  struct mlx5_flow_table *next_fdb)
d8f823
+{
d8f823
+	struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
d8f823
+	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
d8f823
+	struct fdb_prio *pos;
d8f823
+	int n = 0, err;
d8f823
+
d8f823
+	if (fdb_prio->key.level)
d8f823
+		return 0;
d8f823
+
d8f823
+	/* Iterate in reverse order until reaching the level 0 rule of
d8f823
+	 * the previous priority, adding all the miss rules first, so we can
d8f823
+	 * revert them if any of them fails.
d8f823
+	 */
d8f823
+	pos = fdb_prio;
d8f823
+	list_for_each_entry_continue_reverse(pos,
d8f823
+					     &fdb_chain->prios_list,
d8f823
+					     list) {
d8f823
+		miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
d8f823
+							      next_fdb);
d8f823
+		if (IS_ERR(miss_rules[n])) {
d8f823
+			err = PTR_ERR(miss_rules[n]);
d8f823
+			goto err_prev_rule;
d8f823
+		}
d8f823
+
d8f823
+		n++;
d8f823
+		if (!pos->key.level)
d8f823
+			break;
d8f823
+	}
d8f823
+
d8f823
+	/* Success, delete old miss rules, and update the pointers. */
d8f823
+	n = 0;
d8f823
+	pos = fdb_prio;
d8f823
+	list_for_each_entry_continue_reverse(pos,
d8f823
+					     &fdb_chain->prios_list,
d8f823
+					     list) {
d8f823
+		mlx5_del_flow_rules(pos->miss_rule);
d8f823
+
d8f823
+		pos->miss_rule = miss_rules[n];
d8f823
+		pos->next_fdb = next_fdb;
d8f823
+
d8f823
+		n++;
d8f823
+		if (!pos->key.level)
d8f823
+			break;
d8f823
+	}
d8f823
+
d8f823
+	return 0;
d8f823
+
d8f823
+err_prev_rule:
d8f823
+	while (--n >= 0)
d8f823
+		mlx5_del_flow_rules(miss_rules[n]);
d8f823
+
d8f823
+	return err;
d8f823
+}
d8f823
+
d8f823
 static void
d8f823
 mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
d8f823
 {
d8f823
@@ -272,9 +393,15 @@ static struct fdb_prio *
d8f823
 mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
d8f823
 				u32 chain, u32 prio, u32 level)
d8f823
 {
d8f823
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
d8f823
+	struct mlx5_flow_handle *miss_rule = NULL;
d8f823
+	struct mlx5_flow_group *miss_group;
d8f823
 	struct fdb_prio *fdb_prio = NULL;
d8f823
+	struct mlx5_flow_table *next_fdb;
d8f823
 	struct fdb_chain *fdb_chain;
d8f823
 	struct mlx5_flow_table *fdb;
d8f823
+	struct list_head *pos;
d8f823
+	u32 *flow_group_in;
d8f823
 	int err;
d8f823
 
d8f823
 	fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
d8f823
@@ -282,18 +409,65 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
d8f823
 		return ERR_CAST(fdb_chain);
d8f823
 
d8f823
 	fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
d8f823
-	if (!fdb_prio) {
d8f823
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
d8f823
+	if (!fdb_prio || !flow_group_in) {
d8f823
 		err = -ENOMEM;
d8f823
 		goto err_alloc;
d8f823
 	}
d8f823
 
d8f823
-	fdb = mlx5_esw_chains_create_fdb_table(esw, fdb_chain->chain, prio,
d8f823
-					       level);
d8f823
+	/* Chain's prio list is sorted by prio and level.
d8f823
+	 * And all levels of some prio point to the next prio's level 0.
d8f823
+	 * Example list (prio, level):
d8f823
+	 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
d8f823
+	 * In hardware, we will we have the following pointers:
d8f823
+	 * (3,0) -> (5,0) -> (7,0) -> Slow path
d8f823
+	 * (3,1) -> (5,0)
d8f823
+	 * (5,1) -> (7,0)
d8f823
+	 * (6,1) -> (7,0)
d8f823
+	 */
d8f823
+
d8f823
+	/* Default miss for each chain: */
d8f823
+	next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
d8f823
+		    tc_slow_fdb(esw) :
d8f823
+		    tc_end_fdb(esw);
d8f823
+	list_for_each(pos, &fdb_chain->prios_list) {
d8f823
+		struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
d8f823
+
d8f823
+		/* exit on first pos that is larger */
d8f823
+		if (prio < p->key.prio || (prio == p->key.prio &&
d8f823
+					   level < p->key.level)) {
d8f823
+			/* Get next level 0 table */
d8f823
+			next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
d8f823
+			break;
d8f823
+		}
d8f823
+	}
d8f823
+
d8f823
+	fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
d8f823
 	if (IS_ERR(fdb)) {
d8f823
 		err = PTR_ERR(fdb);
d8f823
 		goto err_create;
d8f823
 	}
d8f823
 
d8f823
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
d8f823
+		 fdb->max_fte - 2);
d8f823
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
d8f823
+		 fdb->max_fte - 1);
d8f823
+	miss_group = mlx5_create_flow_group(fdb, flow_group_in);
d8f823
+	if (IS_ERR(miss_group)) {
d8f823
+		err = PTR_ERR(miss_group);
d8f823
+		goto err_group;
d8f823
+	}
d8f823
+
d8f823
+	/* Add miss rule to next_fdb */
d8f823
+	miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
d8f823
+	if (IS_ERR(miss_rule)) {
d8f823
+		err = PTR_ERR(miss_rule);
d8f823
+		goto err_miss_rule;
d8f823
+	}
d8f823
+
d8f823
+	fdb_prio->miss_group = miss_group;
d8f823
+	fdb_prio->miss_rule = miss_rule;
d8f823
+	fdb_prio->next_fdb = next_fdb;
d8f823
 	fdb_prio->fdb_chain = fdb_chain;
d8f823
 	fdb_prio->key.chain = chain;
d8f823
 	fdb_prio->key.prio = prio;
d8f823
@@ -305,13 +479,30 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
d8f823
 	if (err)
d8f823
 		goto err_insert;
d8f823
 
d8f823
+	list_add(&fdb_prio->list, pos->prev);
d8f823
+
d8f823
+	/* Table is ready, connect it */
d8f823
+	err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
d8f823
+	if (err)
d8f823
+		goto err_update;
d8f823
+
d8f823
+	kvfree(flow_group_in);
d8f823
 	return fdb_prio;
d8f823
 
d8f823
+err_update:
d8f823
+	list_del(&fdb_prio->list);
d8f823
+	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
d8f823
+			       prio_params);
d8f823
 err_insert:
d8f823
+	mlx5_del_flow_rules(miss_rule);
d8f823
+err_miss_rule:
d8f823
+	mlx5_destroy_flow_group(miss_group);
d8f823
+err_group:
d8f823
 	mlx5_esw_chains_destroy_fdb_table(esw, fdb);
d8f823
 err_create:
d8f823
-	kvfree(fdb_prio);
d8f823
 err_alloc:
d8f823
+	kvfree(fdb_prio);
d8f823
+	kvfree(flow_group_in);
d8f823
 	mlx5_esw_chains_put_fdb_chain(fdb_chain);
d8f823
 	return ERR_PTR(err);
d8f823
 }
d8f823
@@ -322,8 +513,14 @@ mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
d8f823
 {
d8f823
 	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
d8f823
 
d8f823
+	WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
d8f823
+						  fdb_prio->next_fdb));
d8f823
+
d8f823
+	list_del(&fdb_prio->list);
d8f823
 	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
d8f823
 			       prio_params);
d8f823
+	mlx5_del_flow_rules(fdb_prio->miss_rule);
d8f823
+	mlx5_destroy_flow_group(fdb_prio->miss_group);
d8f823
 	mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
d8f823
 	mlx5_esw_chains_put_fdb_chain(fdb_chain);
d8f823
 	kvfree(fdb_prio);
d8f823
@@ -415,6 +612,12 @@ mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
d8f823
 		  chain, prio, level);
d8f823
 }
d8f823
 
d8f823
+struct mlx5_flow_table *
d8f823
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
d8f823
+{
d8f823
+	return tc_end_fdb(esw);
d8f823
+}
d8f823
+
d8f823
 static int
d8f823
 mlx5_esw_chains_init(struct mlx5_eswitch *esw)
d8f823
 {
d8f823
@@ -484,11 +687,21 @@ mlx5_esw_chains_open(struct mlx5_eswitch *esw)
d8f823
 	struct mlx5_flow_table *ft;
d8f823
 	int err;
d8f823
 
d8f823
-	/* Always open the root for fast path */
d8f823
-	ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
d8f823
+	/* Create tc_end_fdb(esw) which is the always created ft chain */
d8f823
+	ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
d8f823
+				       1, 0);
d8f823
 	if (IS_ERR(ft))
d8f823
 		return PTR_ERR(ft);
d8f823
 
d8f823
+	tc_end_fdb(esw) = ft;
d8f823
+
d8f823
+	/* Always open the root for fast path */
d8f823
+	ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
d8f823
+	if (IS_ERR(ft)) {
d8f823
+		err = PTR_ERR(ft);
d8f823
+		goto level_0_err;
d8f823
+	}
d8f823
+
d8f823
 	/* Open level 1 for split rules now if prios isn't supported  */
d8f823
 	if (!mlx5_esw_chains_prios_supported(esw)) {
d8f823
 		ft = mlx5_esw_chains_get_table(esw, 0, 1, 1);
d8f823
@@ -503,6 +716,8 @@ mlx5_esw_chains_open(struct mlx5_eswitch *esw)
d8f823
 
d8f823
 level_1_err:
d8f823
 	mlx5_esw_chains_put_table(esw, 0, 1, 0);
d8f823
+level_0_err:
d8f823
+	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
d8f823
 	return err;
d8f823
 }
d8f823
 
d8f823
@@ -512,6 +727,7 @@ mlx5_esw_chains_close(struct mlx5_eswitch *esw)
d8f823
 	if (!mlx5_esw_chains_prios_supported(esw))
d8f823
 		mlx5_esw_chains_put_table(esw, 0, 1, 1);
d8f823
 	mlx5_esw_chains_put_table(esw, 0, 1, 0);
d8f823
+	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
d8f823
 }
d8f823
 
d8f823
 int
d8f823
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
d8f823
index 52fadacab84d..2e13097fe348 100644
d8f823
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
d8f823
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
d8f823
@@ -20,6 +20,9 @@ void
d8f823
 mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
d8f823
 			  u32 level);
d8f823
 
d8f823
+struct mlx5_flow_table *
d8f823
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
d8f823
+
d8f823
 int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
d8f823
 void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
d8f823
 
d8f823
-- 
d8f823
2.13.6
d8f823