From 7a413876c0e669357b19e05c5e4a9e392b927d3f Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 12 May 2020 10:54:44 -0400 Subject: [PATCH 172/312] [netdrv] net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases Message-id: <20200512105530.4207-79-ahleihel@redhat.com> Patchwork-id: 306950 Patchwork-instance: patchwork O-Subject: [RHEL8.3 BZ 1789382 078/124] net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases Bugzilla: 1789382 RH-Acked-by: Tony Camuso RH-Acked-by: Kamal Heib RH-Acked-by: Jarod Wilson Bugzilla: http://bugzilla.redhat.com/1789382 Upstream: v5.7-rc1 commit 3909a12e79135a66a797041ab337a8c7cb387bdf Author: Maxim Mikityanskiy Date: Tue Sep 3 17:55:45 2019 +0300 net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases Currently, mlx5e notifies the kernel about the number of queues and sets the default XPS cpumasks when channels are activated. This implementation has several corner cases, in which the kernel may not be updated on time, or XPS cpumasks may be reset when not directly touched by the user. This commit fixes these corner cases to match the following expected behavior: 1. The number of queues always corresponds to the number of channels configured. 2. XPS cpumasks are set to driver's defaults on netdev attach. 3. XPS cpumasks set by user are not reset, unless the number of channels changes. If the number of channels changes, they are reset to driver's defaults. (In general case, when the number of channels increases or decreases, it's not possible to guess how to convert the current XPS cpumasks to work with the new number of channels, so we let the user reconfigure it if they change the number of channels.) XPS cpumasks are no longer stored per channel. Only one temporary cpumask is used. The old stored cpumasks didn't reflect the user's changes and were not used after applying them. A scratchpad area is added to struct mlx5e_priv. As cpumask_var_t requires allocation, and the preactivate hook can't fail, we need to preallocate the temporary cpumask in advance. It's stored in the scratchpad. Fixes: 149e566fef81 ("net/mlx5e: Expand XPS cpumask to cover all online cpus") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Alaa Hleihel Signed-off-by: Frantisek Hrbata --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 95 +++++++++++++---------- 2 files changed, 65 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cef4c1cba2b8..b90225f62234 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -737,7 +737,6 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; - cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -804,6 +803,15 @@ struct mlx5e_xsk { bool ever_used; }; +/* Temporary storage for variables that are allocated when struct mlx5e_priv is + * initialized, and used where we can't allocate them because that functions + * must not fail. Use with care and make sure the same variable is not used + * simultaneously by multiple users. + */ +struct mlx5e_scratchpad { + cpumask_var_t cpumask; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -864,6 +872,7 @@ struct mlx5e_priv { struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; struct mlx5e_xsk xsk; + struct mlx5e_scratchpad scratchpad; }; struct mlx5e_profile { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1d72ee543447..d97c865989e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1812,29 +1812,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, - struct mlx5e_params *params) -{ - int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); - int irq; - - if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) - return -ENOMEM; - - for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); - - cpumask_set_cpu(cpu, c->xps_cpumask); - } - - return 0; -} - -static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) -{ - free_cpumask_var(c->xps_cpumask); -} - static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1985,10 +1962,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->irq_desc = irq_to_desc(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_queues(c, params, cparam); @@ -2011,9 +1984,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_napi_del: netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); -err_free_channel: kvfree(c); return err; @@ -2027,7 +1998,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); @@ -2052,7 +2022,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_xsk(c); mlx5e_close_queues(c); netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -2887,10 +2856,10 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } -static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv) +static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count) { - int num_txqs = priv->channels.num * priv->channels.params.num_tc; - int num_rxqs = priv->channels.num * priv->profile->rq_groups; + int num_txqs = count * priv->channels.params.num_tc; + int num_rxqs = count * priv->profile->rq_groups; struct net_device *netdev = priv->netdev; mlx5e_netdev_set_tcs(netdev); @@ -2898,10 +2867,34 @@ static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv) netif_set_real_num_rx_queues(netdev, num_rxqs); } +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_count(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + int mlx5e_num_channels_changed(struct mlx5e_priv *priv) { u16 count = priv->channels.params.num_channels; + mlx5e_update_netdev_queues(priv, count); + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + if (!netif_is_rxfh_configured(priv->netdev)) mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); @@ -2930,8 +2923,6 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_update_netdev_queues(priv); - mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_xdp_tx_enable(priv); @@ -3468,7 +3459,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_num_channels_changed); if (err) goto out; @@ -5252,6 +5243,9 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); priv->max_opened_tc = 1; + if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) + return -ENOMEM; + mutex_init(&priv->state_lock); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); @@ -5260,7 +5254,7 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - return -ENOMEM; + goto err_free_cpumask; /* netdev init */ netif_carrier_off(netdev); @@ -5270,11 +5264,17 @@ int mlx5e_netdev_init(struct net_device *netdev, #endif return 0; + +err_free_cpumask: + free_cpumask_var(priv->scratchpad.cpumask); + + return -ENOMEM; } void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { destroy_workqueue(priv->wq); + free_cpumask_var(priv->scratchpad.cpumask); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5309,6 +5309,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; const struct mlx5e_profile *profile; int max_nch; int err; @@ -5320,11 +5321,25 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) max_nch = mlx5e_get_max_num_channels(priv->mdev); if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); - /* Reducing the number of channels - RXFH has to be reset. */ + /* Reducing the number of channels - RXFH has to be reset, and + * mlx5e_num_channels_changed below will build the RQT. + */ priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; priv->channels.params.num_channels = max_nch; - mlx5e_num_channels_changed(priv); } + /* 1. Set the real number of queues in the kernel the first time. + * 2. Set our default XPS cpumask. + * 3. Build the RQT. + * + * rtnl_lock is required by netif_set_real_num_*_queues in case the + * netdev has been registered by this point (if this function was called + * in the reload or resume flow). + */ + if (take_rtnl) + rtnl_lock(); + mlx5e_num_channels_changed(priv); + if (take_rtnl) + rtnl_unlock(); err = profile->init_tx(priv); if (err) -- 2.13.6