You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
317 lines
11 KiB
317 lines
11 KiB
From 7a413876c0e669357b19e05c5e4a9e392b927d3f Mon Sep 17 00:00:00 2001
|
|
From: Alaa Hleihel <ahleihel@redhat.com>
|
|
Date: Tue, 12 May 2020 10:54:44 -0400
|
|
Subject: [PATCH 172/312] [netdrv] net/mlx5e: Fix configuration of XPS cpumasks
|
|
and netdev queues in corner cases
|
|
|
|
Message-id: <20200512105530.4207-79-ahleihel@redhat.com>
|
|
Patchwork-id: 306950
|
|
Patchwork-instance: patchwork
|
|
O-Subject: [RHEL8.3 BZ 1789382 078/124] net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases
|
|
Bugzilla: 1789382
|
|
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
|
|
RH-Acked-by: Kamal Heib <kheib@redhat.com>
|
|
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
|
|
|
|
Bugzilla: http://bugzilla.redhat.com/1789382
|
|
Upstream: v5.7-rc1
|
|
|
|
commit 3909a12e79135a66a797041ab337a8c7cb387bdf
|
|
Author: Maxim Mikityanskiy <maximmi@mellanox.com>
|
|
Date: Tue Sep 3 17:55:45 2019 +0300
|
|
|
|
net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases
|
|
|
|
Currently, mlx5e notifies the kernel about the number of queues and sets
|
|
the default XPS cpumasks when channels are activated. This
|
|
implementation has several corner cases, in which the kernel may not be
|
|
updated on time, or XPS cpumasks may be reset when not directly touched
|
|
by the user.
|
|
|
|
This commit fixes these corner cases to match the following expected
|
|
behavior:
|
|
|
|
1. The number of queues always corresponds to the number of channels
|
|
configured.
|
|
|
|
2. XPS cpumasks are set to driver's defaults on netdev attach.
|
|
|
|
3. XPS cpumasks set by user are not reset, unless the number of channels
|
|
changes. If the number of channels changes, they are reset to driver's
|
|
defaults. (In general case, when the number of channels increases or
|
|
decreases, it's not possible to guess how to convert the current XPS
|
|
cpumasks to work with the new number of channels, so we let the user
|
|
reconfigure it if they change the number of channels.)
|
|
|
|
XPS cpumasks are no longer stored per channel. Only one temporary
|
|
cpumask is used. The old stored cpumasks didn't reflect the user's
|
|
changes and were not used after applying them.
|
|
|
|
A scratchpad area is added to struct mlx5e_priv. As cpumask_var_t
|
|
requires allocation, and the preactivate hook can't fail, we need to
|
|
preallocate the temporary cpumask in advance. It's stored in the
|
|
scratchpad.
|
|
|
|
Fixes: 149e566fef81 ("net/mlx5e: Expand XPS cpumask to cover all online cpus")
|
|
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
|
|
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
|
|
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
|
|
|
|
Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
|
|
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
|
|
---
|
|
drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 ++-
|
|
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 95 +++++++++++++----------
|
|
2 files changed, 65 insertions(+), 41 deletions(-)
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
|
index cef4c1cba2b8..b90225f62234 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
|
@@ -737,7 +737,6 @@ struct mlx5e_channel {
|
|
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
|
|
int ix;
|
|
int cpu;
|
|
- cpumask_var_t xps_cpumask;
|
|
};
|
|
|
|
struct mlx5e_channels {
|
|
@@ -804,6 +803,15 @@ struct mlx5e_xsk {
|
|
bool ever_used;
|
|
};
|
|
|
|
+/* Temporary storage for variables that are allocated when struct mlx5e_priv is
|
|
+ * initialized, and used where we can't allocate them because that functions
|
|
+ * must not fail. Use with care and make sure the same variable is not used
|
|
+ * simultaneously by multiple users.
|
|
+ */
|
|
+struct mlx5e_scratchpad {
|
|
+ cpumask_var_t cpumask;
|
|
+};
|
|
+
|
|
struct mlx5e_priv {
|
|
/* priv data path fields - start */
|
|
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
|
|
@@ -864,6 +872,7 @@ struct mlx5e_priv {
|
|
struct devlink_health_reporter *tx_reporter;
|
|
struct devlink_health_reporter *rx_reporter;
|
|
struct mlx5e_xsk xsk;
|
|
+ struct mlx5e_scratchpad scratchpad;
|
|
};
|
|
|
|
struct mlx5e_profile {
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
|
index 1d72ee543447..d97c865989e1 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
|
@@ -1812,29 +1812,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
|
|
return err;
|
|
}
|
|
|
|
-static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c,
|
|
- struct mlx5e_params *params)
|
|
-{
|
|
- int num_comp_vectors = mlx5_comp_vectors_count(c->mdev);
|
|
- int irq;
|
|
-
|
|
- if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL))
|
|
- return -ENOMEM;
|
|
-
|
|
- for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) {
|
|
- int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq));
|
|
-
|
|
- cpumask_set_cpu(cpu, c->xps_cpumask);
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
|
|
-{
|
|
- free_cpumask_var(c->xps_cpumask);
|
|
-}
|
|
-
|
|
static int mlx5e_open_queues(struct mlx5e_channel *c,
|
|
struct mlx5e_params *params,
|
|
struct mlx5e_channel_param *cparam)
|
|
@@ -1985,10 +1962,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
|
c->irq_desc = irq_to_desc(irq);
|
|
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
|
|
|
|
- err = mlx5e_alloc_xps_cpumask(c, params);
|
|
- if (err)
|
|
- goto err_free_channel;
|
|
-
|
|
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
|
|
|
|
err = mlx5e_open_queues(c, params, cparam);
|
|
@@ -2011,9 +1984,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
|
|
|
err_napi_del:
|
|
netif_napi_del(&c->napi);
|
|
- mlx5e_free_xps_cpumask(c);
|
|
|
|
-err_free_channel:
|
|
kvfree(c);
|
|
|
|
return err;
|
|
@@ -2027,7 +1998,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
|
|
mlx5e_activate_txqsq(&c->sq[tc]);
|
|
mlx5e_activate_icosq(&c->icosq);
|
|
mlx5e_activate_rq(&c->rq);
|
|
- netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
|
|
|
|
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
|
|
mlx5e_activate_xsk(c);
|
|
@@ -2052,7 +2022,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
|
|
mlx5e_close_xsk(c);
|
|
mlx5e_close_queues(c);
|
|
netif_napi_del(&c->napi);
|
|
- mlx5e_free_xps_cpumask(c);
|
|
|
|
kvfree(c);
|
|
}
|
|
@@ -2887,10 +2856,10 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
|
|
netdev_set_tc_queue(netdev, tc, nch, 0);
|
|
}
|
|
|
|
-static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
|
|
+static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count)
|
|
{
|
|
- int num_txqs = priv->channels.num * priv->channels.params.num_tc;
|
|
- int num_rxqs = priv->channels.num * priv->profile->rq_groups;
|
|
+ int num_txqs = count * priv->channels.params.num_tc;
|
|
+ int num_rxqs = count * priv->profile->rq_groups;
|
|
struct net_device *netdev = priv->netdev;
|
|
|
|
mlx5e_netdev_set_tcs(netdev);
|
|
@@ -2898,10 +2867,34 @@ static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
|
|
netif_set_real_num_rx_queues(netdev, num_rxqs);
|
|
}
|
|
|
|
+static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
|
|
+ struct mlx5e_params *params)
|
|
+{
|
|
+ struct mlx5_core_dev *mdev = priv->mdev;
|
|
+ int num_comp_vectors, ix, irq;
|
|
+
|
|
+ num_comp_vectors = mlx5_comp_vectors_count(mdev);
|
|
+
|
|
+ for (ix = 0; ix < params->num_channels; ix++) {
|
|
+ cpumask_clear(priv->scratchpad.cpumask);
|
|
+
|
|
+ for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
|
|
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
|
|
+
|
|
+ cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
|
|
+ }
|
|
+
|
|
+ netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
|
|
+ }
|
|
+}
|
|
+
|
|
int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
|
|
{
|
|
u16 count = priv->channels.params.num_channels;
|
|
|
|
+ mlx5e_update_netdev_queues(priv, count);
|
|
+ mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
|
|
+
|
|
if (!netif_is_rxfh_configured(priv->netdev))
|
|
mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
|
|
MLX5E_INDIR_RQT_SIZE, count);
|
|
@@ -2930,8 +2923,6 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
|
|
|
|
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
|
|
{
|
|
- mlx5e_update_netdev_queues(priv);
|
|
-
|
|
mlx5e_build_txq_maps(priv);
|
|
mlx5e_activate_channels(&priv->channels);
|
|
mlx5e_xdp_tx_enable(priv);
|
|
@@ -3468,7 +3459,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
|
|
goto out;
|
|
}
|
|
|
|
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
|
|
+ err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_num_channels_changed);
|
|
if (err)
|
|
goto out;
|
|
|
|
@@ -5252,6 +5243,9 @@ int mlx5e_netdev_init(struct net_device *netdev,
|
|
priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
|
|
priv->max_opened_tc = 1;
|
|
|
|
+ if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
|
|
+ return -ENOMEM;
|
|
+
|
|
mutex_init(&priv->state_lock);
|
|
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
|
|
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
|
|
@@ -5260,7 +5254,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
|
|
|
|
priv->wq = create_singlethread_workqueue("mlx5e");
|
|
if (!priv->wq)
|
|
- return -ENOMEM;
|
|
+ goto err_free_cpumask;
|
|
|
|
/* netdev init */
|
|
netif_carrier_off(netdev);
|
|
@@ -5270,11 +5264,17 @@ int mlx5e_netdev_init(struct net_device *netdev,
|
|
#endif
|
|
|
|
return 0;
|
|
+
|
|
+err_free_cpumask:
|
|
+ free_cpumask_var(priv->scratchpad.cpumask);
|
|
+
|
|
+ return -ENOMEM;
|
|
}
|
|
|
|
void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
|
|
{
|
|
destroy_workqueue(priv->wq);
|
|
+ free_cpumask_var(priv->scratchpad.cpumask);
|
|
}
|
|
|
|
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
|
|
@@ -5309,6 +5309,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
|
|
|
|
int mlx5e_attach_netdev(struct mlx5e_priv *priv)
|
|
{
|
|
+ const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
|
|
const struct mlx5e_profile *profile;
|
|
int max_nch;
|
|
int err;
|
|
@@ -5320,11 +5321,25 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
|
|
max_nch = mlx5e_get_max_num_channels(priv->mdev);
|
|
if (priv->channels.params.num_channels > max_nch) {
|
|
mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
|
|
- /* Reducing the number of channels - RXFH has to be reset. */
|
|
+ /* Reducing the number of channels - RXFH has to be reset, and
|
|
+ * mlx5e_num_channels_changed below will build the RQT.
|
|
+ */
|
|
priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
|
|
priv->channels.params.num_channels = max_nch;
|
|
- mlx5e_num_channels_changed(priv);
|
|
}
|
|
+ /* 1. Set the real number of queues in the kernel the first time.
|
|
+ * 2. Set our default XPS cpumask.
|
|
+ * 3. Build the RQT.
|
|
+ *
|
|
+ * rtnl_lock is required by netif_set_real_num_*_queues in case the
|
|
+ * netdev has been registered by this point (if this function was called
|
|
+ * in the reload or resume flow).
|
|
+ */
|
|
+ if (take_rtnl)
|
|
+ rtnl_lock();
|
|
+ mlx5e_num_channels_changed(priv);
|
|
+ if (take_rtnl)
|
|
+ rtnl_unlock();
|
|
|
|
err = profile->init_tx(priv);
|
|
if (err)
|
|
--
|
|
2.13.6
|
|
|