From e0a7aa5afea68660d3bee9a46dcc04f776da7f1f Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Wed, 2 Oct 2019 18:23:28 -0400 Subject: [PATCH 73/96] [netdrv] bnxt_en: Handle firmware reset Message-id: Patchwork-id: 276504 O-Subject: [RHEL-8.2 PATCH 66/78] bnxt_en: Handle firmware reset. Bugzilla: 1724766 RH-Acked-by: John Linville RH-Acked-by: Jarod Wilson Add the bnxt_fw_reset() main function to handle firmware reset. This is triggered by firmware to initiate an orderly reset, for example when a non-fatal exception condition has been detected. bnxt_fw_reset() will first wait for all VFs to shutdown and then start the bnxt_fw_reset_task() work queue to go through the sequence of reset, re-probe, and re-initialization. The next patch will add the devlink reporter to start the sequence and call bnxt_fw_reset(). Signed-off-by: Michael Chan Signed-off-by: David S. Miller (cherry picked from commit 230d1f0de754b483ec6eefc1ca5aaeff2b6b9a4c) Bugzilla: 1724766 Build Info: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23809532 Tested: build, boot, basic ping Signed-off-by: Jonathan Toppins Signed-off-by: Bruno Meneguele --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 150 ++++++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 11 ++ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 + 3 files changed, 164 insertions(+) Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt.c =================================================================== --- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt.c 2020-02-06 16:23:20.029473507 +0100 +++ src/drivers/net/ethernet/broadcom/bnxt/bnxt.c 2020-02-06 16:23:20.162472286 +0100 @@ -1142,6 +1142,14 @@ return 0; } +static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay) +{ + if (BNXT_PF(bp)) + queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay); + else + schedule_delayed_work(&bp->fw_reset_task, delay); +} + static void bnxt_queue_sp_work(struct bnxt *bp) { if (BNXT_PF(bp)) @@ -6360,6 +6368,8 @@ struct bnxt_vf_info *vf = &bp->vf; vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK; + } else { + bp->pf.registered_vfs = le16_to_cpu(resp->registered_vfs); } #endif flags = le16_to_cpu(resp->flags); @@ -9985,6 +9995,53 @@ bnxt_rtnl_unlock_sp(bp); } +static void bnxt_fw_reset_close(struct bnxt *bp) +{ + __bnxt_close_nic(bp, true, false); + bnxt_ulp_irq_stop(bp); + bnxt_clear_int_mode(bp); + bnxt_hwrm_func_drv_unrgtr(bp); + bnxt_free_ctx_mem(bp); + kfree(bp->ctx); + bp->ctx = NULL; +} + +void bnxt_fw_reset(struct bnxt *bp) +{ + int rc; + + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state) && + !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (BNXT_PF(bp) && bp->pf.active_vfs) { + rc = bnxt_hwrm_func_qcfg(bp); + if (rc) { + netdev_err(bp->dev, "Firmware reset aborted, first func_qcfg cmd failed, rc = %d\n", + rc); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + dev_close(bp->dev); + goto fw_reset_exit; + } + if (bp->pf.registered_vfs || bp->sriov_cfg) { + u16 vf_tmo_dsecs = bp->pf.registered_vfs * 10; + + if (bp->fw_reset_max_dsecs < vf_tmo_dsecs) + bp->fw_reset_max_dsecs = vf_tmo_dsecs; + bp->fw_reset_state = + BNXT_FW_RESET_STATE_POLL_VF; + bnxt_queue_fw_reset_work(bp, HZ / 10); + goto fw_reset_exit; + } + } + bnxt_fw_reset_close(bp); + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10); + } +fw_reset_exit: + bnxt_rtnl_unlock_sp(bp); +} + static void bnxt_chk_missed_irq(struct bnxt *bp) { int i; @@ -10344,6 +10401,98 @@ return 0; } +static void bnxt_fw_reset_task(struct work_struct *work) +{ + struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); + int rc; + + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); + return; + } + + switch (bp->fw_reset_state) { + case BNXT_FW_RESET_STATE_POLL_VF: + rc = bnxt_hwrm_func_qcfg(bp); + if (rc) { + netdev_err(bp->dev, "Firmware reset aborted, subsequent func_qcfg cmd failed, rc = %d, %d msecs since reset timestamp\n", + rc, jiffies_to_msecs(jiffies - + bp->fw_reset_timestamp)); + goto fw_reset_abort; + } + if (bp->pf.registered_vfs || bp->sriov_cfg) { + if (time_after(jiffies, bp->fw_reset_timestamp + + (bp->fw_reset_max_dsecs * HZ / 10))) { + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bp->fw_reset_state = 0; + netdev_err(bp->dev, "Firmware reset aborted, %d VFs still registered, sriov_cfg %d\n", + bp->pf.registered_vfs, + bp->sriov_cfg); + return; + } + bnxt_queue_fw_reset_work(bp, HZ / 10); + return; + } + bp->fw_reset_timestamp = jiffies; + rtnl_lock(); + bnxt_fw_reset_close(bp); + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + rtnl_unlock(); + bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10); + return; + case BNXT_FW_RESET_STATE_ENABLE_DEV: + if (pci_enable_device(bp->pdev)) { + netdev_err(bp->dev, "Cannot re-enable PCI device\n"); + goto fw_reset_abort; + } + pci_set_master(bp->pdev); + bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW; + /* fall through */ + case BNXT_FW_RESET_STATE_POLL_FW: + bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT; + rc = __bnxt_hwrm_ver_get(bp, true); + if (rc) { + if (time_after(jiffies, bp->fw_reset_timestamp + + (bp->fw_reset_max_dsecs * HZ / 10))) { + netdev_err(bp->dev, "Firmware reset aborted\n"); + goto fw_reset_abort; + } + bnxt_queue_fw_reset_work(bp, HZ / 5); + return; + } + bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT; + bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; + /* fall through */ + case BNXT_FW_RESET_STATE_OPENING: + while (!rtnl_trylock()) { + bnxt_queue_fw_reset_work(bp, HZ / 10); + return; + } + rc = bnxt_open(bp->dev); + if (rc) { + netdev_err(bp->dev, "bnxt_open_nic() failed\n"); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + dev_close(bp->dev); + } + bnxt_ulp_irq_restart(bp, rc); + rtnl_unlock(); + + bp->fw_reset_state = 0; + /* Make sure fw_reset_state is 0 before clearing the flag */ + smp_mb__before_atomic(); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + break; + } + return; + +fw_reset_abort: + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bp->fw_reset_state = 0; + rtnl_lock(); + dev_close(bp->dev); + rtnl_unlock(); +} + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@ -10406,6 +10555,7 @@ pci_enable_pcie_error_reporting(pdev); INIT_WORK(&bp->sp_task, bnxt_sp_task); + INIT_DELAYED_WORK(&bp->fw_reset_task, bnxt_fw_reset_task); spin_lock_init(&bp->ntp_fltr_lock); #if BITS_PER_LONG == 32 Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt.h =================================================================== --- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt.h 2020-02-06 16:23:20.029473507 +0100 +++ src/drivers/net/ethernet/broadcom/bnxt/bnxt.h 2020-02-06 16:23:20.163472277 +0100 @@ -640,6 +640,7 @@ #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) #define DFLT_HWRM_CMD_TIMEOUT 500 +#define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) #define HWRM_RESP_ERR_CODE_MASK 0xffff @@ -1066,6 +1067,7 @@ u8 mac_addr[ETH_ALEN]; u32 first_vf_id; u16 active_vfs; + u16 registered_vfs; u16 max_vfs; u32 max_encap_records; u32 max_decap_records; @@ -1721,6 +1723,14 @@ #define BNXT_RING_COAL_NOW_SP_EVENT 17 #define BNXT_FW_RESET_NOTIFY_SP_EVENT 18 + struct delayed_work fw_reset_task; + int fw_reset_state; +#define BNXT_FW_RESET_STATE_POLL_VF 1 +#define BNXT_FW_RESET_STATE_RESET_FW 2 +#define BNXT_FW_RESET_STATE_ENABLE_DEV 3 +#define BNXT_FW_RESET_STATE_POLL_FW 4 +#define BNXT_FW_RESET_STATE_OPENING 5 + u16 fw_reset_min_dsecs; #define BNXT_DFLT_FW_RST_MIN_DSECS 20 u16 fw_reset_max_dsecs; @@ -1966,6 +1976,7 @@ int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); int bnxt_close_nic(struct bnxt *, bool, bool); +void bnxt_fw_reset(struct bnxt *bp); int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); Index: src/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c =================================================================== --- src.orig/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c 2020-02-06 16:22:54.098711531 +0100 +++ src/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c 2020-02-06 16:23:20.163472277 +0100 @@ -226,6 +226,9 @@ struct input *req; int rc; + if (ulp_id != BNXT_ROCE_ULP && bp->fw_reset_state) + return -EBUSY; + mutex_lock(&bp->hwrm_cmd_lock); req = fw_msg->msg; req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);