You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmod-redhat-mlx5_core/SOURCES/0031-netdrv-net-mlx5e-Repor...

115 lines
4.3 KiB

From ac9174fc02907c3b322b1cba4fe37b73ae29e71b Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <ahleihel@redhat.com>
Date: Sun, 10 May 2020 14:51:55 -0400
Subject: [PATCH 031/312] [netdrv] net/mlx5e: Report and recover from rx
timeout
Message-id: <20200510145245.10054-33-ahleihel@redhat.com>
Patchwork-id: 306573
Patchwork-instance: patchwork
O-Subject: [RHEL8.3 BZ 1789378 v2 32/82] net/mlx5e: Report and recover from rx timeout
Bugzilla: 1790198 1789378
RH-Acked-by: Kamal Heib <kheib@redhat.com>
RH-Acked-by: Jarod Wilson <jarod@redhat.com>
RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
RH-Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Bugzilla: http://bugzilla.redhat.com/1789378
Bugzilla: http://bugzilla.redhat.com/1790198
Upstream: v5.4-rc1
commit 32c57fb26863b48982e33aa95f3b5b23f24b1feb
Author: Aya Levin <ayal@mellanox.com>
Date: Tue Jun 25 21:42:27 2019 +0300
net/mlx5e: Report and recover from rx timeout
Add support for report and recovery from rx timeout. On driver open we
post NOP work request on the rx channels to trigger napi in order to
fillup the rx rings. In case napi wasn't scheduled due to a lost
interrupt, perform EQ recovery.
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Alaa Hleihel <ahleihel@redhat.com>
Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
---
.../net/ethernet/mellanox/mlx5/core/en/health.h | 1 +
.../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 32 ++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 +
3 files changed, 34 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 8acd9dc520cf..b4a2d9be17d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -19,6 +19,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 661de567ca6c..4e933db759b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -115,6 +115,38 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
}
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+ struct mlx5e_icosq *icosq;
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_rq *rq;
+ int err;
+
+ rq = ctx;
+ icosq = &rq->channel->icosq;
+ eq = rq->cq.mcq.eq;
+ err = mlx5e_health_channel_eq_recover(eq, rq->channel);
+ if (err)
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+
+ return err;
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+ struct mlx5e_icosq *icosq = &rq->channel->icosq;
+ struct mlx5e_priv *priv = rq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+ sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
+ icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
{
return err_ctx->recover(err_ctx->ctx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 430fb04ea96f..c3eba55e8a21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -799,6 +799,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
+ mlx5e_reporter_rx_timeout(rq);
return -ETIMEDOUT;
}
--
2.13.6