From dd674b21a94bd385f3ee22ffa180a2029beef026 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 19 May 2020 07:49:25 -0400 Subject: [PATCH 263/312] [netdrv] net/mlx5e: CT: Fix offload with CT action after CT NAT action Message-id: <20200519074934.6303-55-ahleihel@redhat.com> Patchwork-id: 310555 Patchwork-instance: patchwork O-Subject: [RHEL8.3 BZ 1663246 54/63] net/mlx5e: CT: Fix offload with CT action after CT NAT action Bugzilla: 1663246 RH-Acked-by: Marcelo Leitner RH-Acked-by: Jarod Wilson RH-Acked-by: John Linville RH-Acked-by: Ivan Vecera RH-Acked-by: Tony Camuso RH-Acked-by: Kamal Heib Bugzilla: http://bugzilla.redhat.com/1663246 Upstream: git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git , branch: master commit 9102d836d296fbc94517736d2dd1131ad6b01740 Author: Roi Dayan Date: Sun Apr 12 15:39:15 2020 +0300 net/mlx5e: CT: Fix offload with CT action after CT NAT action It could be a chain of rules will do action CT again after CT NAT Before this fix matching will break as we get into the CT table after NAT changes and not CT NAT. Fix this by adding pre ct and pre ct nat tables to skip ct/ct_nat tables and go straight to post_ct table if ct/nat was already done. Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: Alaa Hleihel Signed-off-by: Frantisek Hrbata --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 308 +++++++++++++++++++-- 1 file changed, 286 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 003079b09b67..8f94a4dde2bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -24,6 +24,7 @@ #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) +#define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule { bool nat; }; +struct mlx5_tc_ct_pre { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *flow_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *modify_hdr; +}; + struct mlx5_ct_ft { struct rhash_head node; u16 zone; @@ -68,6 +78,8 @@ struct mlx5_ct_ft { struct nf_flowtable *nf_ft; struct mlx5_tc_ct_priv *ct_priv; struct rhashtable ct_entries_ht; + struct mlx5_tc_ct_pre pre_ct; + struct mlx5_tc_ct_pre pre_ct_nat; }; struct mlx5_ct_entry { @@ -428,6 +440,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_modify_hdr *mod_hdr; struct flow_action_entry *meta; + u16 ct_state = 0; int err; meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -446,11 +459,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, &mod_acts); if (err) goto err_mapping; + + ct_state |= MLX5_CT_STATE_NAT_BIT; } + ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, - (MLX5_CT_STATE_ESTABLISHED_BIT | - MLX5_CT_STATE_TRK_BIT), + ct_state, meta->ct_metadata.mark, meta->ct_metadata.labels[0], tupleid); @@ -793,6 +808,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, return 0; } +static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table *fdb = pre_ct->fdb; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 ctstate; + u16 zone; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + zone = ct_ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + mod_hdr = mlx5_modify_header_alloc(dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct->modify_hdr = mod_hdr; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.modify_hdr = mod_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + /* add flow rule */ + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + zone, MLX5_CT_ZONE_MASK); + ctstate = MLX5_CT_STATE_TRK_BIT; + if (nat) + ctstate |= MLX5_CT_STATE_NAT_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); + + dest.ft = ct_priv->post_ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct flow rule zone %d", zone); + goto err_flow_rule; + } + pre_ct->flow_rule = rule; + + /* add miss rule */ + memset(spec, 0, sizeof(*spec)); + dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct miss rule zone %d", zone); + goto err_miss_rule; + } + pre_ct->miss_rule = rule; + + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return 0; + +err_miss_rule: + mlx5_del_flow_rules(pre_ct->flow_rule); +err_flow_rule: + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return err; +} + +static void +tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + + mlx5_del_flow_rules(pre_ct->flow_rule); + mlx5_del_flow_rules(pre_ct->miss_rule); + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +} + +static int +mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 metadata_reg_c_2_mask; + u32 *flow_group_in; + void *misc; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + err = -EOPNOTSUPP; + ct_dbg("Failed to get FDB flow namespace"); + return err; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = 2; + ft_attr.level = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to create pre ct table"); + goto out_free; + } + pre_ct->fdb = ft; + + /* create flow group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria.misc_parameters_2); + + metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; + metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); + if (nat) + metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, + metadata_reg_c_2_mask); + + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct group"); + goto err_flow_grp; + } + pre_ct->flow_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct miss group"); + goto err_miss_grp; + } + pre_ct->miss_grp = g; + + err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); + if (err) + goto err_add_rules; + + kvfree(flow_group_in); + return 0; + +err_add_rules: + mlx5_destroy_flow_group(pre_ct->miss_grp); +err_miss_grp: + mlx5_destroy_flow_group(pre_ct->flow_grp); +err_flow_grp: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + tc_ct_pre_ct_del_rules(ct_ft, pre_ct); + mlx5_destroy_flow_group(pre_ct->miss_grp); + mlx5_destroy_flow_group(pre_ct->flow_grp); + mlx5_destroy_flow_table(pre_ct->fdb); +} + +static int +mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + int err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); + if (err) + return err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); + if (err) + goto err_pre_ct_nat; + + return 0; + +err_pre_ct_nat: + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); +} + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@ -815,6 +1062,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ft->ct_priv = ct_priv; refcount_set(&ft->refcount, 1); + err = mlx5_tc_ct_alloc_pre_ct_tables(ft); + if (err) + goto err_alloc_pre_ct; + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); if (err) goto err_init; @@ -836,6 +1087,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, err_insert: rhashtable_destroy(&ft->ct_entries_ht); err_init: + mlx5_tc_ct_free_pre_ct_tables(ft); +err_alloc_pre_ct: kfree(ft); return ERR_PTR(err); } @@ -861,21 +1114,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + mlx5_tc_ct_free_pre_ct_tables(ft); kfree(ft); } /* We translate the tc filter with CT action to the following HW model: * - * +-------------------+ +--------------------+ +--------------+ - * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> - * + original match + | + tuple + zone match + | + fte_id match + | - * +-------------------+ | +--------------------+ | +--------------+ | - * v v v - * set chain miss mapping set mark original - * set fte_id set label filter - * set zone set established actions - * set tunnel_id do nat (if needed) - * do decap + * +---------------------+ + * + fdb prio (tc chain) + + * + original match + + * +---------------------+ + * | set chain miss mapping + * | set fte_id + * | set tunnel_id + * | do decap + * v + * +---------------------+ + * + pre_ct/pre_ct_nat + if matches +---------------------+ + * + zone+nat match +---------------->+ post_ct (see below) + + * +---------------------+ set zone +---------------------+ + * | set zone + * v + * +--------------------+ + * + CT (nat or no nat) + + * + tuple + zone match + + * +--------------------+ + * | set mark + * | set label + * | set established + * | do nat (if needed) + * v + * +--------------+ + * + post_ct + original filter actions + * + fte_id match +------------------------> + * +--------------+ */ static int __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, @@ -890,7 +1162,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, struct mlx5_flow_spec *post_ct_spec = NULL; struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_esw_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; + struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; int chain_mapping = 0, err; @@ -953,14 +1225,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, - attr->ct_attr.zone & - MLX5_CT_ZONE_MASK); - if (err) { - ct_dbg("Failed to set zone register mapping"); - goto err_mapping; - } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, FTEID_TO_REG, fte_id); if (err) { @@ -1020,7 +1284,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; - pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); -- 2.13.6