commit b9d231cb909a141762da4fc856d13c9c4ed25792 Author: MSVSphere Packaging Team Date: Thu Mar 28 18:24:59 2024 +0300 import rasdaemon-0.6.7-9.el9 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c56a73 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/rasdaemon-0.6.7.tar.bz2 diff --git a/.rasdaemon.metadata b/.rasdaemon.metadata new file mode 100644 index 0000000..cfbe66d --- /dev/null +++ b/.rasdaemon.metadata @@ -0,0 +1 @@ +8ae34f40b676a0843be6647854b950f45161e7d4 SOURCES/rasdaemon-0.6.7.tar.bz2 diff --git a/SOURCES/1f74a59ee33b7448b00d7ba13d5ecd4918b9853c.patch b/SOURCES/1f74a59ee33b7448b00d7ba13d5ecd4918b9853c.patch new file mode 100644 index 0000000..e0cb4a2 --- /dev/null +++ b/SOURCES/1f74a59ee33b7448b00d7ba13d5ecd4918b9853c.patch @@ -0,0 +1,163 @@ +commit 1f74a59ee33b7448b00d7ba13d5ecd4918b9853c +Author: Muralidhara M K +Date: Fri Jun 30 10:36:53 2023 +0000 + + rasdaemon: Add new MA_LLC, USR_DP, and USR_CP bank types. + + Add HWID and McaType values for new SMCA bank types + and error decoding for those new SMCA banks. + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 7c88a46..fc51b5a 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -61,6 +61,7 @@ enum smca_bank_types { + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, ++ SMCA_MA_LLC, /* Memory Attached Last Level Cache */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_PSP_V2, +@@ -76,6 +77,8 @@ enum smca_bank_types { + SMCA_SHUB, /* System Hub Unit */ + SMCA_SATA, /* SATA Unit */ + SMCA_USB, /* USB Unit */ ++ SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */ ++ SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */ + SMCA_GMI_PCS, /* GMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ +@@ -325,6 +328,16 @@ static const char * const smca_umc2_mce_desc[] = { + "LM32 MP errors", + }; + ++static const char * const smca_mall_mce_desc[] = { ++ "Counter overflow error", ++ "Counter underflow error", ++ "Write Data Parity Error", ++ "Read Response Parity Error", ++ "Cache Tag ECC Error Macro 0", ++ "Cache Tag ECC Error Macro 1", ++ "Cache Data ECC Error" ++}; ++ + static const char * const smca_pb_mce_desc[] = { + "An ECC error in the Parameter Block RAM array" + }; +@@ -524,6 +537,57 @@ static const char * const smca_usb_mce_desc[] = { + "AXI Slave Response error", + }; + ++static const char * const smca_usrdp_mce_desc[] = { ++ "Mst CMD Error", ++ "Mst Rx FIFO Error", ++ "Mst Deskew Error", ++ "Mst Detect Timeout Error", ++ "Mst FlowControl Error", ++ "Mst DataValid FIFO Error", ++ "Mac LinkState Error", ++ "Deskew Error", ++ "Init Timeout Error", ++ "Init Attempt Error", ++ "Recovery Timeout Error", ++ "Recovery Attempt Error", ++ "Eye Training Timeout Error", ++ "Data Startup Limit Error", ++ "LS0 Exit Error", ++ "PLL powerState Update Timeout Error", ++ "Rx FIFO Error", ++ "Lcu Error", ++ "Conv CECC Error", ++ "Conv UECC Error", ++ "Reserved", ++ "Rx DataLoss Error", ++ "Replay CECC Error", ++ "Replay UECC Error", ++ "CRC Error", ++ "BER Exceeded Error", ++ "FC Init Timeout Error", ++ "FC Init Attempt Error", ++ "Replay Timeout Error", ++ "Replay Attempt Error", ++ "Replay Underflow Error", ++ "Replay Overflow Error", ++}; ++ ++static const char * const smca_usrcp_mce_desc[] = { ++ "Packet Type Error", ++ "Rx FIFO Error", ++ "Deskew Error", ++ "Rx Detect Timeout Error", ++ "Data Parity Error", ++ "Data Loss Error", ++ "Lcu Error", ++ "HB1 Handshake Timeout Error", ++ "HB2 Handshake Timeout Error", ++ "Clk Sleep Rsp Timeout Error", ++ "Clk Wake Rsp Timeout Error", ++ "Reset Attack Error", ++ "Remote Link Fatal Error", ++}; ++ + static const char * const smca_gmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", +@@ -579,6 +643,7 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, ++ [SMCA_MA_LLC] = { smca_mall_mce_desc, ARRAY_SIZE(smca_mall_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, + [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, +@@ -595,6 +660,8 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, + [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, ++ [SMCA_USR_DP] = { smca_usrdp_mce_desc, ARRAY_SIZE(smca_usrdp_mce_desc) }, ++ [SMCA_USR_CP] = { smca_usrcp_mce_desc, ARRAY_SIZE(smca_usrcp_mce_desc) }, + [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, + /* All the PHY bank types have the same error descriptions, for now. */ + [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, +@@ -631,6 +698,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + { SMCA_UMC, 0x00000096 }, + /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ + { SMCA_UMC_V2, 0x00010096 }, ++ /* Memory Attached Last Level Cache */ ++ { SMCA_MA_LLC, 0x0004002E }, + + /* Parameter Block MCA type */ + { SMCA_PB, 0x00000005 }, +@@ -664,6 +733,11 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + { SMCA_SHUB, 0x00000080 }, + { SMCA_SATA, 0x000000A8 }, + { SMCA_USB, 0x000000AA }, ++ ++ /* Ultra Short Reach Data and Control Plane Controller */ ++ { SMCA_USR_DP, 0x00000170 }, ++ { SMCA_USR_CP, 0x00000180 }, ++ + { SMCA_GMI_PCS, 0x00000241 }, + + /* Ext Global Memory Interconnect PHY MCA type */ +@@ -692,6 +766,7 @@ static struct smca_bank_name smca_names[] = { + [SMCA_PIE] = { "Power, Interrupts, etc." }, + [SMCA_UMC] = { "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, ++ [SMCA_MA_LLC] = { "Memory Attached Last Level Cache" }, + [SMCA_PB] = { "Parameter Block" }, + [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, +@@ -704,6 +779,8 @@ static struct smca_bank_name smca_names[] = { + [SMCA_SHUB] = { "System Hub Unit" }, + [SMCA_SATA] = { "SATA Unit" }, + [SMCA_USB] = { "USB Unit" }, ++ [SMCA_USR_DP] = { "Ultra Short Reach Data Plane Controller" }, ++ [SMCA_USR_CP] = { "Ultra Short Reach Control Plane Controller" }, + [SMCA_GMI_PCS] = { "Global Memory Interconnect PCS Unit" }, + [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, + [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, diff --git a/SOURCES/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch b/SOURCES/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch new file mode 100644 index 0000000..99a9ba6 --- /dev/null +++ b/SOURCES/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch @@ -0,0 +1,32 @@ +commit 1ff5f3d2a0fcd48add9462567c30fe0e14585fb4 +Author: Matt Whitlock +Date: Wed Jun 9 10:25:18 2021 -0400 + + configure.ac: fix SYSCONFDEFDIR default value + + configure.ac was using AC_ARG_WITH incorrectly, yielding a generated configure script like: + + # Check whether --with-sysconfdefdir was given. + if test "${with_sysconfdefdir+set}" = set; then : + withval=$with_sysconfdefdir; SYSCONFDEFDIR=$withval + else + "/etc/sysconfig" + fi + + This commit fixes the default case so that the SYSCONFDEFDIR variable is assigned the value "/etc/sysconfig" rather than trying to execute "/etc/sysconfig" as a command. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/configure.ac b/configure.ac +index f7d1947..33b81fe 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -172,7 +172,7 @@ AC_SUBST([RASSTATEDIR]) + AC_ARG_WITH(sysconfdefdir, + AC_HELP_STRING([--with-sysconfdefdir=DIR], [rasdaemon environment file dir]), + [SYSCONFDEFDIR=$withval], +- ["/etc/sysconfig"]) ++ [SYSCONFDEFDIR=/etc/sysconfig]) + AC_SUBST([SYSCONFDEFDIR]) + + AC_DEFINE([RAS_DB_FNAME], ["ras-mc_event.db"], [ras events database]) diff --git a/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch b/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch new file mode 100644 index 0000000..fdc509b --- /dev/null +++ b/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch @@ -0,0 +1,28 @@ +commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79 +Author: Muralidhara M K +Date: Mon Jul 12 05:18:43 2021 -0500 + + rasdaemon: set SMCA maximum number of banks to 64 + + Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU. + + This patch is based on the commit below upstremed into the kernel: + a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64") + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index e0cf512..3c346f4 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -75,6 +75,9 @@ enum smca_bank_types { + N_SMCA_BANK_TYPES + }; + ++/* Maximum number of MCA banks per CPU. */ ++#define MAX_NR_BANKS 64 ++ + /* SMCA Extended error strings */ + /* Load Store */ + static const char * const smca_ls_mce_desc[] = { diff --git a/SOURCES/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch b/SOURCES/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch new file mode 100644 index 0000000..eb45db0 --- /dev/null +++ b/SOURCES/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch @@ -0,0 +1,63 @@ +commit 2b37a26dcec389723f75d69d3da9c2f15f6c317d +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:41:27 2021 +0200 + + ci.yml: Fix the job for it to run on a single arch + + There were some issues on the previous content. Fix them, in + order to allow it to build on a single architecture. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +index 5b3e757..747a844 100644 +--- a/.github/workflows/ci.yml ++++ b/.github/workflows/ci.yml +@@ -1,34 +1,23 @@ + name: CI + +-# Should run only on branches and PR, as "on_tag.yml" will handle tags + on: ++ workflow_dispatch: + push: +- branches: master test + pull_request: +- branches: master + + jobs: +- +-# +-# Linux +-# + Ubuntu: + name: Ubuntu +- runs-on: ubuntu-20.04 +- strategy: +- matrix: +- arch: [x64_64, aarch64, armv7, ppc64le] ++ runs-on: ubuntu-latest + steps: +- - uses: actions/checkout@v2 +- with: +- arch: ${{ matrix.arch }} +- - name: prepare +- run: | +- sudo apt-get update +- sudo apt-get install -y build-essential sqlite3 +- - name: build +- run: | +- autoreconf -vfi +- ./configure --enable-all +- make +- sudo make install ++ - uses: actions/checkout@v2 ++ - name: prepare ++ run: | ++ sudo apt-get update ++ sudo apt-get install -y build-essential sqlite3 ++ - name: build ++ run: | ++ autoreconf -vfi ++ ./configure --enable-all ++ make ++ sudo make install diff --git a/SOURCES/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch b/SOURCES/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch new file mode 100644 index 0000000..c2a9376 --- /dev/null +++ b/SOURCES/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch @@ -0,0 +1,44 @@ +commit 2b6a54b0d31e02e657171fd27f4e31d996756bc6 +Author: DmNosachev +Date: Thu Jul 22 10:25:38 2021 +0300 + + labels/supermicro: added Supermicro X10DRL, X11SPM + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 1e7761f..990fc9e 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -88,6 +88,16 @@ Vendor: Supermicro + P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; + P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; + P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; ++ ++ Model: X10DRL-i ++ P1-DIMMA1: 0.0.0; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 0.3.0; ++ P2-DIMME1: 1.0.0; ++ P2-DIMMF1: 1.1.0; ++ P2-DIMMG1: 1.2.0; ++ P2-DIMMH1: 1.3.0; + + Model: X11DDW-NT, X11DDW-L + P1-DIMMA1: 0.0.0; +@@ -102,6 +112,14 @@ Vendor: Supermicro + P2-DIMMD1: 3.0.0; + P2-DIMME1: 3.1.0; + P2-DIMMF1: 3.2.0; ++ ++ Model: X11SPM-F, X11SPM-TF, X11SPM-TPF ++ DIMMA1: 0.0.0; ++ DIMMB1: 0.1.0; ++ DIMMC1: 0.2.0; ++ DIMMD1: 1.0.0; ++ DIMME1: 1.1.0; ++ DIMMF1: 1.2.0; + + Model: B1DRi + P1_DIMMA1: 0.0.0; diff --git a/SOURCES/2d15882a0cbfce0b905039bebc811ac8311cd739.patch b/SOURCES/2d15882a0cbfce0b905039bebc811ac8311cd739.patch new file mode 100644 index 0000000..1791705 --- /dev/null +++ b/SOURCES/2d15882a0cbfce0b905039bebc811ac8311cd739.patch @@ -0,0 +1,105 @@ +commit 2d15882a0cbfce0b905039bebc811ac8311cd739 +Author: Muralidhara M K +Date: Fri Jun 30 11:19:42 2023 +0000 + + rasdaemon: Handle reassigned bit definitions for UMC bank + + On some AMD systems some of the existing bit definitions in the + CTL register of SMCA bank type are reassigned without defining + new HWID and McaType. Consequently, the errors whose bit + definitions have been reassigned in the CTL register are being + erroneously decoded. + + Add new error description structure to compensate for the + reassigned bit definitions, by new software defined SMCA bank + type by utilizing the hardware-reserved values for HWID. + The new SMCA bank type will only be employed for UMC error + decoding on affected models and the existing error description + structure for UMC bank type is still valid. + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index fc51b5a..54060ee 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -60,6 +60,7 @@ enum smca_bank_types { + SMCA_CS_V2_QUIRK, + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ ++ SMCA_UMC_QUIRK, + SMCA_UMC_V2, + SMCA_MA_LLC, /* Memory Attached Last Level Cache */ + SMCA_PB, /* Parameter Block */ +@@ -313,6 +314,25 @@ static const char * const smca_umc_mce_desc[] = { + "Read CRC Error", + }; + ++static const char * const smca_umc_quirk_mce_desc[] = { ++ "DRAM On Die ECC error", ++ "Data poison error", ++ "SDP parity error", ++ "Reserved", ++ "Address/Command parity error", ++ "HBM Write data parity error", ++ "Consolidated SRAM ECC error", ++ "Reserved", ++ "Reserved", ++ "Rdb SRAM ECC error", ++ "Thermal throttling", ++ "HBM Read Data Parity error", ++ "Reserved", ++ "UMC FW Error", ++ "SRAM Parity Error", ++ "HBM CRC Error", ++}; ++ + static const char * const smca_umc2_mce_desc[] = { + "DRAM ECC error", + "Data poison error", +@@ -642,6 +662,7 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_CS_V2_QUIRK] = { smca_cs2_quirk_mce_desc, ARRAY_SIZE(smca_cs2_quirk_mce_desc)}, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, ++ [SMCA_UMC_QUIRK] = { smca_umc_quirk_mce_desc, ARRAY_SIZE(smca_umc_quirk_mce_desc) }, + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, + [SMCA_MA_LLC] = { smca_mall_mce_desc, ARRAY_SIZE(smca_mall_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, +@@ -696,6 +717,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* Unified Memory Controller MCA type */ + { SMCA_UMC, 0x00000096 }, ++ { SMCA_UMC_QUIRK, 0x00020000 }, + /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ + { SMCA_UMC_V2, 0x00010096 }, + /* Memory Attached Last Level Cache */ +@@ -764,7 +786,7 @@ static struct smca_bank_name smca_names[] = { + [SMCA_L3_CACHE] = { "L3 Cache" }, + [SMCA_CS ... SMCA_CS_V2_QUIRK] = { "Coherent Slave" }, + [SMCA_PIE] = { "Power, Interrupts, etc." }, +- [SMCA_UMC] = { "Unified Memory Controller" }, ++ [SMCA_UMC ... SMCA_UMC_QUIRK] = { "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, + [SMCA_MA_LLC] = { "Memory Attached Last Level Cache" }, + [SMCA_PB] = { "Parameter Block" }, +@@ -843,6 +865,10 @@ static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype) + if (*hwid_mcatype == 0x0002002E) + *hwid_mcatype = 0x00010000; + break; ++ case 0x90 ... 0x9F: ++ if ((*hwid_mcatype & 0xFF) == 0x00000096) ++ *hwid_mcatype = 0x00020000; ++ break; + default: + break; + } +@@ -908,7 +934,7 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m) + smca_mce_descs[bank_type].descs[xec], + xec); + +- if (bank_type == SMCA_UMC && xec == 0) { ++ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_QUIRK) && xec == 0) { + channel = find_umc_channel(e); + csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */ + mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", diff --git a/SOURCES/30158ef8d7aebc3e5201bf39b73ce7644f8e419e.patch b/SOURCES/30158ef8d7aebc3e5201bf39b73ce7644f8e419e.patch new file mode 100644 index 0000000..240baaf --- /dev/null +++ b/SOURCES/30158ef8d7aebc3e5201bf39b73ce7644f8e419e.patch @@ -0,0 +1,524 @@ +commit 30158ef8d7aebc3e5201bf39b73ce7644f8e419e +Author: Avadhut Naik +Date: Tue Apr 18 18:24:21 2023 +0000 + + rasdaemon: Update SMCA bank error descriptions + + Update, reword some existing SMCA bank type error descriptions to extend + SMCA error decoding functionality for modern AMD processors. Additionally, + also add new error descriptions for missing SMCA bank types. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 27ca8aa..7ec787a 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -66,12 +66,19 @@ enum smca_bank_types { + SMCA_SMU, /* System Management Unit */ + SMCA_SMU_V2, + SMCA_MP5, /* Microprocessor 5 Unit */ ++ SMCA_MPDMA, /* MPDMA Unit */ + SMCA_NBIO, /* Northbridge IO Unit */ + SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ ++ SMCA_NBIF, /*NBIF Unit */ ++ SMCA_SHUB, /* System Hub Unit */ ++ SMCA_SATA, /* SATA Unit */ ++ SMCA_USB, /* USB Unit */ ++ SMCA_GMI_PCS, /* GMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ ++ SMCA_GMI_PHY, /* GMI PHY Unit */ + N_SMCA_BANK_TYPES + }; + +@@ -85,7 +92,6 @@ enum smca_bank_types { + #define NONCPU_NODE_INDEX 8 + + /* SMCA Extended error strings */ +-/* Load Store */ + static const char * const smca_ls_mce_desc[] = { + "Load queue parity", + "Store queue parity", +@@ -109,6 +115,7 @@ static const char * const smca_ls_mce_desc[] = { + "DC tag error type 5", + "L2 fill data error", + }; ++ + static const char * const smca_ls2_mce_desc[] = { + "An ECC error was detected on a data cache read by a probe or victimization", + "An ECC error or L2 poison was detected on a data cache read by a load", +@@ -133,92 +140,104 @@ static const char * const smca_ls2_mce_desc[] = { + "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", + "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", + "A hardware assertion error was reported", +- "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", ++ "A parity error was detected in an STLF, SCB EMEM entry, store data mask or SRB store data by any access", + }; +-/* Instruction Fetch */ ++ + static const char * const smca_if_mce_desc[] = { + "microtag probe port parity error", + "IC microtag or full tag multi-hit error", + "IC full tag parity", + "IC data array parity", +- "Decoupling queue phys addr parity error", ++ "PRQ Parity Error", + "L0 ITLB parity error", +- "L1 ITLB parity error", +- "L2 ITLB parity error", ++ "L1-TLB parity error", ++ "L2-TLB parity error", + "BPQ snoop parity on Thread 0", + "BPQ snoop parity on Thread 1", +- "L1 BTB multi-match error", +- "L2 BTB multi-match error", ++ "BP L1-BTB Multi-Hit Error", ++ "BP L2-BTB Multi-Hit Error", + "L2 Cache Response Poison error", +- "System Read Data error", ++ "L2 Cache Error Response", ++ "Hardware Assertion Error", ++ "L1-TLB Multi-Hit", ++ "L2-TLB Multi-Hit", ++ "BSR Parity Error", ++ "CT MCE", + }; +-/* L2 Cache */ ++ + static const char * const smca_l2_mce_desc[] = { +- "L2M tag multi-way-hit error", +- "L2M tag ECC error", +- "L2M data ECC error", +- "HW assert", ++ "L2M Tag Multiple-Way-Hit error", ++ "L2M Tag or State Array ECC Error", ++ "L2M Data Array ECC Error", ++ "Hardware Assert Error", ++ "SDP Read Response Parity Error", + }; +-/* Decoder Unit */ ++ + static const char * const smca_de_mce_desc[] = { +- "uop cache tag parity error", +- "uop cache data parity error", +- "Insn buffer parity error", +- "uop queue parity error", +- "Insn dispatch queue parity error", +- "Fetch address FIFO parity", +- "Patch RAM data parity", +- "Patch RAM sequencer parity", +- "uop buffer parity" +-}; +-/* Execution Unit */ ++ "Micro-op cache tag array parity error", ++ "Micro-op cache data array parity error", ++ "IBB Register File parity error", ++ "Micro-op queue parity error", ++ "Instruction dispatch queue parity error", ++ "Fetch address FIFO parity error", ++ "Patch RAM data parity error", ++ "Patch RAM sequencer parity error", ++ "Micro-op buffer parity error", ++ "Hardware Assertion MCA Error", ++}; ++ + static const char * const smca_ex_mce_desc[] = { + "Watchdog timeout error", +- "Phy register file parity", +- "Flag register file parity", +- "Immediate displacement register file parity", +- "Address generator payload parity", +- "EX payload parity", +- "Checkpoint queue parity", +- "Retire dispatch queue parity", ++ "Physical register file parity error", ++ "Flag register file parity error", ++ "Immediate displacement register file parity error", ++ "Address generator payload parity error", ++ "EX payload parity error", ++ "Checkpoint queue parity error", ++ "Retire dispatch queue parity error", + "Retire status queue parity error", +- "Scheduling queue parity error", ++ "Scheduler queue parity error", + "Branch buffer queue parity error", ++ "Hardware Assertion error", ++ "Spec Map parity error", ++ "Retire Map parity error", + }; +-/* Floating Point Unit */ ++ + static const char * const smca_fp_mce_desc[] = { +- "Physical register file parity", +- "Freelist parity error", +- "Schedule queue parity", ++ "Physical register file (PRF) parity error", ++ "Freelist (FL) parity error", ++ "Schedule queue parity error", + "NSQ parity error", +- "Retire queue parity", +- "Status register file parity", ++ "Retire queue (RQ) parity error", ++ "Status register file (SRF) parity error", + "Hardware assertion", ++ "Physical K mask register file (KRF) parity error", + }; +-/* L3 Cache */ ++ + static const char * const smca_l3_mce_desc[] = { + "Shadow tag macro ECC error", + "Shadow tag macro multi-way-hit error", + "L3M tag ECC error", + "L3M tag multi-way-hit error", + "L3M data ECC error", +- "XI parity, L3 fill done channel error", +- "L3 victim queue parity", +- "L3 HW assert", ++ "SDP Parity Error from XI", ++ "L3 victim queue Data Fabric error", ++ "L3 Hardware Assertion", ++ "XI WCB Parity Poison Creation event", + }; +-/* Coherent Slave Unit */ ++ + static const char * const smca_cs_mce_desc[] = { +- "Illegal request from transport layer", ++ "Illegal request", + "Address violation", + "Security violation", +- "Illegal response from transport layer", ++ "Illegal response", + "Unexpected response", +- "Parity error on incoming request or probe response data", +- "Parity error on incoming read response data", +- "Atomic request parity", +- "ECC error on probe filter access", ++ "Request or Probe Parity Error", ++ "Read Response Parity Error", ++ "Atomic request parity error", ++ "Probe Filter ECC Error", + }; +-/* Coherent Slave Unit V2 */ ++ + static const char * const smca_cs2_mce_desc[] = { + "Illegal Request", + "Address Violation", +@@ -234,15 +253,22 @@ static const char * const smca_cs2_mce_desc[] = { + "SDP read response had an unexpected RETRY error", + "Counter overflow error", + "Counter underflow error", ++ "Illegal Request on the no data channel", ++ "Address Violation on the no data channel", ++ "Security Violation on the no data channel", ++ "Hardware Assert Error", + }; +-/* Power, Interrupt, etc.. */ ++ + static const char * const smca_pie_mce_desc[] = { +- "HW assert", +- "Internal PIE register security violation", +- "Error on GMI link", +- "Poison data written to internal PIE register", ++ "Hardware assert", ++ "Register security violation", ++ "Link error", ++ "Poison data consumption", ++ "A deferred error was detected in the DF", ++ "Watch Dog Timer", ++ "An SRAM ECC error was detected in the CNLI block", + }; +-/* Unified Memory Controller */ ++ + static const char * const smca_umc_mce_desc[] = { + "DRAM ECC error", + "Data poison error on DRAM", +@@ -250,6 +276,12 @@ static const char * const smca_umc_mce_desc[] = { + "Advanced peripheral bus error", + "Command/address parity error", + "Write data CRC error", ++ "DCQ SRAM ECC error", ++ "AES SRAM ECC error", ++ "ECS Row Error", ++ "ECS Error", ++ "UMC Throttling Error", ++ "Read CRC Error", + }; + + static const char * const smca_umc2_mce_desc[] = { +@@ -267,15 +299,14 @@ static const char * const smca_umc2_mce_desc[] = { + "LM32 MP errors", + }; + +-/* Parameter Block */ + static const char * const smca_pb_mce_desc[] = { +- "Parameter Block RAM ECC error", ++ "An ECC error in the Parameter Block RAM array" + }; +-/* Platform Security Processor */ ++ + static const char * const smca_psp_mce_desc[] = { +- "PSP RAM ECC or parity error", ++ "An ECC or parity error in a PSP RAM instance", + }; +-/* Platform Security Processor V2 */ ++ + static const char * const smca_psp2_mce_desc[] = { + "High SRAM ECC or parity error", + "Low SRAM ECC or parity error", +@@ -296,11 +327,11 @@ static const char * const smca_psp2_mce_desc[] = { + "TLB Bank 1 parity error", + "System Hub Read Buffer ECC or parity error", + }; +-/* System Management Unit */ ++ + static const char * const smca_smu_mce_desc[] = { +- "SMU RAM ECC or parity error", ++ "An ECC or parity error in an SMU RAM instance", + }; +-/* System Management Unit V2 */ ++ + static const char * const smca_smu2_mce_desc[] = { + "High SRAM ECC or parity error", + "Low SRAM ECC or parity error", +@@ -314,7 +345,7 @@ static const char * const smca_smu2_mce_desc[] = { + "Instruction Tag Cache Bank B ECC or parity error", + "System Hub Read Buffer ECC or parity error", + }; +-/* Microprocessor 5 Unit */ ++ + static const char * const smca_mp5_mce_desc[] = { + "High SRAM ECC or parity error", + "Low SRAM ECC or parity error", +@@ -327,15 +358,68 @@ static const char * const smca_mp5_mce_desc[] = { + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + }; +-/* Northbridge IO Unit */ ++ ++static const char * const smca_mpdma_mce_desc[] = { ++ "Main SRAM [31:0] bank ECC or parity error", ++ "Main SRAM [63:32] bank ECC or parity error", ++ "Main SRAM [95:64] bank ECC or parity error", ++ "Main SRAM [127:96] bank ECC or parity error", ++ "Data Cache Bank A ECC or parity error", ++ "Data Cache Bank B ECC or parity error", ++ "Data Tag Cache Bank A ECC or parity error", ++ "Data Tag Cache Bank B ECC or parity error", ++ "Instruction Cache Bank A ECC or parity error", ++ "Instruction Cache Bank B ECC or parity error", ++ "Instruction Tag Cache Bank A ECC or parity error", ++ "Instruction Tag Cache Bank B ECC or parity error", ++ "Data Cache Bank A ECC or parity error", ++ "Data Cache Bank B ECC or parity error", ++ "Data Tag Cache Bank A ECC or parity error", ++ "Data Tag Cache Bank B ECC or parity error", ++ "Instruction Cache Bank A ECC or parity error", ++ "Instruction Cache Bank B ECC or parity error", ++ "Instruction Tag Cache Bank A ECC or parity error", ++ "Instruction Tag Cache Bank B ECC or parity error", ++ "Data Cache Bank A ECC or parity error", ++ "Data Cache Bank B ECC or parity error", ++ "Data Tag Cache Bank A ECC or parity error", ++ "Data Tag Cache Bank B ECC or parity error", ++ "Instruction Cache Bank A ECC or parity error", ++ "Instruction Cache Bank B ECC or parity error", ++ "Instruction Tag Cache Bank A ECC or parity error", ++ "Instruction Tag Cache Bank B ECC or parity error", ++ "System Hub Read Buffer ECC or parity error", ++ "MPDMA TVF DVSEC Memory ECC or parity error", ++ "MPDMA TVF MMIO Mailbox0 ECC or parity error", ++ "MPDMA TVF MMIO Mailbox1 ECC or parity error", ++ "MPDMA TVF Doorbell Memory ECC or parity error", ++ "MPDMA TVF SDP Slave Memory 0 ECC or parity error", ++ "MPDMA TVF SDP Slave Memory 1 ECC or parity error", ++ "MPDMA TVF SDP Slave Memory 2 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 0 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 1 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 2 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 3 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 4 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 5 ECC or parity error", ++ "MPDMA TVF SDP Master Memory 6 ECC or parity error", ++ "SDP Watchdog Timer expired", ++ "MPDMA PTE Command FIFO ECC or parity error", ++ "MPDMA PTE Hub Data FIFO ECC or parity error", ++ "MPDMA PTE Internal Data FIFO ECC or parity error", ++ "MPDMA PTE Command Memory DMA ECC or parity error", ++ "MPDMA PTE Command Memory Internal ECC or parity error", ++}; ++ + static const char * const smca_nbio_mce_desc[] = { + "ECC or Parity error", + "PCIE error", +- "SDP ErrEvent error", +- "SDP Egress Poison Error", +- "IOHC Internal Poison Error", ++ "External SDP ErrEvent error", ++ "SDP Egress Poison error", ++ "Internal Poison error", ++ "Internal system fatal error event", + }; +-/* PCI Express Unit */ ++ + static const char * const smca_pcie_mce_desc[] = { + "CCIX PER Message logging", + "CCIX Read Response with Status: Non-Data Error", +@@ -345,7 +429,7 @@ static const char * const smca_pcie_mce_desc[] = { + }; + + static const char * const smca_pcie2_mce_desc[] = { +- "SDP Parity Error logging", ++ "SDP Data Parity Error logging", + }; + + static const char * const smca_xgmipcs_mce_desc[] = { +@@ -387,11 +471,66 @@ static const char * const smca_xgmiphy_mce_desc[] = { + "PHY APB error", + }; + +-static const char * const smca_waflphy_mce_desc[] = { +- "RAM ECC Error", +- "ARC instruction buffer parity error", +- "ARC data buffer parity error", +- "PHY APB error", ++static const char * const smca_nbif_mce_desc[] = { ++ "Timeout error from GMI", ++ "SRAM ECC error", ++ "NTB Error Event", ++ "SDP Parity error", ++}; ++ ++static const char * const smca_sata_mce_desc[] = { ++ "Parity error for port 0", ++ "Parity error for port 1", ++ "Parity error for port 2", ++ "Parity error for port 3", ++ "Parity error for port 4", ++ "Parity error for port 5", ++ "Parity error for port 6", ++ "Parity error for port 7", ++}; ++ ++static const char * const smca_usb_mce_desc[] = { ++ "Parity error or ECC error for S0 RAM0", ++ "Parity error or ECC error for S0 RAM1", ++ "Parity error or ECC error for S0 RAM2", ++ "Parity error for PHY RAM0", ++ "Parity error for PHY RAM1", ++ "AXI Slave Response error", ++}; ++ ++static const char * const smca_gmipcs_mce_desc[] = { ++ "Data Loss Error", ++ "Training Error", ++ "Replay Parity Error", ++ "Rx Fifo Underflow Error", ++ "Rx Fifo Overflow Error", ++ "CRC Error", ++ "BER Exceeded Error", ++ "Tx Fifo Underflow Error", ++ "Replay Buffer Parity Error", ++ "Tx Overflow Error", ++ "Replay Fifo Overflow Error", ++ "Replay Fifo Underflow Error", ++ "Elastic Fifo Overflow Error", ++ "Deskew Error", ++ "Offline Error", ++ "Data Startup Limit Error", ++ "FC Init Timeout Error", ++ "Recovery Timeout Error", ++ "Ready Serial Timeout Error", ++ "Ready Serial Attempt Error", ++ "Recovery Attempt Error", ++ "Recovery Relock Attempt Error", ++ "Deskew Abort Error", ++ "Rx Buffer Error", ++ "Rx LFDS Fifo Overflow Error", ++ "Rx LFDS Fifo Underflow Error", ++ "LinkSub Tx Timeout Error", ++ "LinkSub Rx Timeout Error", ++ "Rx CMD Packet Error", ++ "LFDS Training Timeout Error", ++ "LFDS FC Init Timeout Error", ++ "Data Loss Error", + }; + + struct smca_mce_desc { +@@ -419,12 +558,21 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, + [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)}, + [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, ++ [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) }, + [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, + [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, + [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, + [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, ++ /* NBIF and SHUB have the same error descriptions, for now. */ ++ [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, ++ [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, ++ [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, ++ [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, ++ [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, ++ /* All the PHY bank types have the same error descriptions, for now. */ + [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, +- [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, ++ [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, ++ [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + }; + + struct smca_hwid { +@@ -470,6 +618,9 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + /* Microprocessor 5 Unit MCA type */ + { SMCA_MP5, 0x00020001 }, + ++ /* MPDMA MCA Type */ ++ { SMCA_MPDMA, 0x00030001 }, ++ + /* Northbridge IO Unit MCA type */ + { SMCA_NBIO, 0x00000018 }, + +@@ -480,11 +631,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + /* Ext Global Memory Interconnect PCS MCA type */ + { SMCA_XGMI_PCS, 0x00000050 }, + ++ { SMCA_NBIF, 0x0000006C }, ++ ++ { SMCA_SHUB, 0x00000080 }, ++ { SMCA_SATA, 0x000000A8 }, ++ { SMCA_USB, 0x000000AA }, ++ { SMCA_GMI_PCS, 0x00000241 }, ++ + /* Ext Global Memory Interconnect PHY MCA type */ + { SMCA_XGMI_PHY, 0x00000259 }, + + /* WAFL PHY MCA type */ + { SMCA_WAFL_PHY, 0x00000267 }, ++ ++ { SMCA_GMI_PHY, 0x00000269 }, + }; + + struct smca_bank_name { +@@ -508,12 +668,18 @@ static struct smca_bank_name smca_names[] = { + [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, + [SMCA_MP5] = { "Microprocessor 5 Unit" }, ++ [SMCA_MPDMA] = { "MPDMA Unit" }, + [SMCA_NBIO] = { "Northbridge IO Unit" }, + [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, + [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, ++ [SMCA_NBIF] = { "NBIF Unit" }, ++ [SMCA_SHUB] = { "System Hub Unit" }, ++ [SMCA_SATA] = { "SATA Unit" }, ++ [SMCA_USB] = { "USB Unit" }, ++ [SMCA_GMI_PCS] = { "Global Memory Interconnect PCS Unit" }, + [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, + [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, +- ++ [SMCA_GMI_PHY] = { "Global Memory Interconnect PHY Unit" }, + }; + + static void amd_decode_errcode(struct mce_event *e) diff --git a/SOURCES/50565005b10fe909c66f1c90f2feb95712427c7d.patch b/SOURCES/50565005b10fe909c66f1c90f2feb95712427c7d.patch new file mode 100644 index 0000000..dba0116 --- /dev/null +++ b/SOURCES/50565005b10fe909c66f1c90f2feb95712427c7d.patch @@ -0,0 +1,43 @@ +commit 50565005b10fe909c66f1c90f2feb95712427c7d +Author: DmNosachev +Date: Tue Jun 29 14:07:54 2021 +0300 + + labels/supermicro: added Supermicro X11DDW-NT(-L) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 86e4617..373de07 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -69,7 +69,7 @@ Vendor: Supermicro + P2_DIMM4B: 2.0.1; + P2_DIMM4B: 2.1.1; + +- Model: X11DPH-i ++ Model: X11DPH-i, X11DPH-T, X11DPH-TQ + P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; + P1-DIMMB1: 0.1.0; + P1-DIMMC1: 0.2.0; +@@ -91,4 +91,18 @@ Vendor: Supermicro + P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1; + P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; + P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; +- P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; +\ No newline at end of file ++ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; ++ ++ Model: X11DDW-NT, X11DDW-L ++ P1-DIMMA1: 0.0.0; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 1.0.0; ++ P1-DIMME1: 1.1.0; ++ P1-DIMMF1: 1.2.0; ++ P2-DIMMA1: 2.0.0; ++ P2-DIMMB1: 2.1.0; ++ P2-DIMMC1: 2.2.0; ++ P2-DIMMD1: 3.0.0; ++ P2-DIMME1: 3.1.0; ++ P2-DIMMF1: 3.2.0; +\ No newline at end of file diff --git a/SOURCES/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch b/SOURCES/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch new file mode 100644 index 0000000..2d3bd32 --- /dev/null +++ b/SOURCES/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch @@ -0,0 +1,37 @@ +commit 6bc43db1b6b3d73805179c21d1dd5521e8dc0f74 +Author: DmNosachev +Date: Fri Jul 2 13:13:46 2021 +0300 + + labels/supermicro: added Supermicro X11SCA(-F) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index b924a32..1e7761f 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -10,11 +10,7 @@ + # + + Vendor: Supermicro +- Model: A2SDi-8C-HLN4F +- DIMMA1: 0.0.0; DIMMA2: 0.0.1; +- DIMMB1: 0.1.0; DIMMB2: 0.1.1; +- +- Model: A2SDi-8C+-HLN4F ++ Model: A2SDi-8C-HLN4F, A2SDi-8C+-HLN4F + DIMMA1: 0.0.0; DIMMA2: 0.0.1; + DIMMB1: 0.1.0; DIMMB2: 0.1.1; + +@@ -115,4 +111,8 @@ Vendor: Supermicro + P2_DIMME1: 1.0.0; + P2_DIMMF1: 1.1.0; + P2_DIMMG1: 1.2.0; +- P2_DIMMH1: 1.3.0; +\ No newline at end of file ++ P2_DIMMH1: 1.3.0; ++ ++ Model: X11SCA, X11SCA-F ++ DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0; ++ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; +\ No newline at end of file diff --git a/SOURCES/738bafafdcb2e8b0ced32fff31b13754d571090b.patch b/SOURCES/738bafafdcb2e8b0ced32fff31b13754d571090b.patch new file mode 100644 index 0000000..a3ba324 --- /dev/null +++ b/SOURCES/738bafafdcb2e8b0ced32fff31b13754d571090b.patch @@ -0,0 +1,610 @@ +commit 738bafafdcb2e8b0ced32fff31b13754d571090b +Author: Jason Tian +Date: Fri May 28 11:35:43 2021 +0800 + + Add error handling for Ampere-specific errors. + + Save Ampere-specific errors' decode into sqlite3 data + base and log PCIe segment, bus/device/function number + into BMC SEL. + + Signed-off-by: Jason Tian + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/non-standard-ampere.c b/non-standard-ampere.c +index 8cceb26..05b5252 100644 +--- a/non-standard-ampere.c ++++ b/non-standard-ampere.c +@@ -216,6 +216,13 @@ static const char * const err_bert_sub_type[] = { + "PMPRO Fatal", + }; + ++static char *sqlite3_table_list[] = { ++ "amp_payload0_event_tab", ++ "amp_payload1_event_tab", ++ "amp_payload2_event_tab", ++ "amp_payload3_event_tab", ++}; ++ + struct amp_ras_type_info { + int id; + const char *name; +@@ -352,6 +359,359 @@ static const char *oem_subtype_name(const struct amp_ras_type_info *info, + return "unknown"; + } + ++#ifdef HAVE_SQLITE3 ++/*key pair definition for ampere specific error payload type 0*/ ++static const struct db_fields amp_payload0_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "status_reg", .type = "INTEGER" }, ++ { .name = "addr_reg", .type = "INTEGER" }, ++ { .name = "misc0", .type = "INTEGER" }, ++ { .name = "misc1", .type = "INTEGER" }, ++ { .name = "misc2", .type = "INTEGER" }, ++ { .name = "misc3", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload0_event_tab = { ++ .name = "amp_payload0_event", ++ .fields = amp_payload0_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload0_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 1*/ ++static const struct db_fields amp_payload1_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "uncore_err_status", .type = "INTEGER" }, ++ { .name = "uncore_err_mask", .type = "INTEGER" }, ++ { .name = "uncore_err_sev", .type = "INTEGER" }, ++ { .name = "core_err_status", .type = "INTEGER" }, ++ { .name = "core_err_mask", .type = "INTEGER" }, ++ { .name = "root_err_cmd", .type = "INTEGER" }, ++ { .name = "root_err_status", .type = "INTEGER" }, ++ { .name = "src_id", .type = "INTEGER" }, ++ { .name = "reserved1", .type = "INTEGER" }, ++ { .name = "reserverd2", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload1_event_tab = { ++ .name = "amp_payload1_event", ++ .fields = amp_payload1_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload1_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 2*/ ++static const struct db_fields amp_payload2_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "ce_report_reg", .type = "INTEGER" }, ++ { .name = "ce_location", .type = "INTEGER" }, ++ { .name = "ce_addr", .type = "INTEGER" }, ++ { .name = "ue_report_reg", .type = "INTEGER" }, ++ { .name = "ue_location", .type = "INTEGER" }, ++ { .name = "ue_addr", .type = "INTEGER" }, ++ { .name = "reserved1", .type = "INTEGER" }, ++ { .name = "reserved2", .type = "INTEGER" }, ++ { .name = "reserved2", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload2_event_tab = { ++ .name = "amp_payload2_event", ++ .fields = amp_payload2_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload2_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 3*/ ++static const struct db_fields amp_payload3_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "fw_spec_data0", .type = "INTEGER" }, ++ { .name = "fw_spec_data1", .type = "INTEGER" }, ++ { .name = "fw_spec_data2", .type = "INTEGER" }, ++ { .name = "fw_spec_data3", .type = "INTEGER" }, ++ { .name = "fw_spec_data4", .type = "INTEGER" }, ++ { .name = "fw_spec_data5", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload3_event_tab = { ++ .name = "amp_payload3_event", ++ .fields = amp_payload3_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload3_event_fields), ++}; ++ ++/*Save data with different type into sqlite3 db*/ ++static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, ++ enum amp_oem_data_type data_type, ++ int id, int64_t data, const char *text) ++{ ++ switch (data_type) { ++ case AMP_OEM_DATA_TYPE_INT: ++ sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); ++ break; ++ case AMP_OEM_DATA_TYPE_INT64: ++ sqlite3_bind_int64(ev_decoder->stmt_dec_record, id, data); ++ break; ++ case AMP_OEM_DATA_TYPE_TEXT: ++ sqlite3_bind_text(ev_decoder->stmt_dec_record, id, ++ text, -1, NULL); ++ break; ++ default: ++ break; ++ } ++} ++ ++static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, ++ const char *name) ++{ ++ int rc; ++ ++ rc = sqlite3_step(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do %s step on sqlite: error = %d\n", name, rc); ++ ++ rc = sqlite3_reset(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to reset %s on sqlite: error = %d\n", name, rc); ++ ++ rc = sqlite3_clear_bindings(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to clear bindings %s on sqlite: error = %d\n", ++ name, rc); ++ ++ return rc; ++} ++ ++/*save all Ampere Specific Error Payload type 0 to sqlite3 database*/ ++static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload0_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD0_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD0_FIELD_SUB_TYPE, 0, subtype_str); ++ ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_INS, INSTANCE(err->instance), NULL); ++ ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_STATUS_REG, err->err_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_ADDR_REG, ++ err->err_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC0, ++ err->err_misc_0, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC1, ++ err->err_misc_1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC2, ++ err->err_misc_2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC3, ++ err->err_misc_3, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload0_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 1 to sqlite3 database*/ ++static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload1_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD1_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD1_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_INS, ++ INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS, ++ err->uncore_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK, ++ err->uncore_mask, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV, ++ err->uncore_sev, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS, ++ err->core_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK, ++ err->core_mask, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD, ++ err->root_err_cmd, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS, ++ err->root_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_SRC_ID, ++ err->src_id, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_RESERVED1, ++ err->reserved1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD1_FIELD_RESERVED2, ++ err->reserved2, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload1_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 2 to sqlite3 database*/ ++static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD2_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD2_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_INS, INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_REPORT_REG, ++ err->ce_register, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_LOACATION, ++ err->ce_location, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_ADDR, ++ err->ce_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_REPORT_REG, ++ err->ue_register, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_LOCATION, ++ err->ue_location, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_ADDR, ++ err->ue_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_RESERVED1, ++ err->reserved1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD2_FIELD_RESERVED2, ++ err->reserved2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD2_FIELD_RESERVED3, ++ err->reserved3, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload2_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 3 to sqlite3 database*/ ++static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload3_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD3_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD3_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_INS, INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, ++ err->fw_speci_data0, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, ++ err->fw_speci_data1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, ++ err->fw_speci_data2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, ++ err->fw_speci_data3, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, ++ err->fw_speci_data4, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5, ++ err->fw_speci_data5, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload3_event_tab"); ++ } ++} ++ ++#else ++static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, ++ enum amp_oem_data_type data_type, ++ int id, int64_t data, const char *text) ++{ ++ return 0; ++} ++ ++static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload0_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload1_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload3_type_sec *err) ++{ ++ return 0; ++} ++ ++static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, char *name) ++{ ++ return 0; ++} ++#endif + + /*decode ampere specific error payload type 0, the CPU's data is save*/ + /*to sqlite by ras-arm-handler, others are saved by this function.*/ +@@ -434,6 +794,7 @@ void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload0_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -517,6 +878,7 @@ static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload1_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -601,6 +963,7 @@ static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload2_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -673,6 +1036,7 @@ static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload3_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -687,6 +1051,38 @@ static int decode_amp_oem_type_error(struct ras_events *ras, + { + int payload_type = PAYLOAD_TYPE(event->error[0]); + ++#ifdef HAVE_SQLITE3 ++ struct db_table_descriptor db_tab; ++ int id = 0; ++ ++ if (payload_type == PAYLOAD_TYPE_0) { ++ db_tab = amp_payload0_event_tab; ++ id = AMP_PAYLOAD0_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_1) { ++ db_tab = amp_payload1_event_tab; ++ id = AMP_PAYLOAD1_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_2) { ++ db_tab = amp_payload2_event_tab; ++ id = AMP_PAYLOAD2_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_3) { ++ db_tab = amp_payload3_event_tab; ++ id = AMP_PAYLOAD3_FIELD_TIMESTAMP; ++ } else ++ return -1; ++ ++ if (!ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &db_tab) != SQLITE_OK) { ++ trace_seq_printf(s, ++ "create sql %s fail\n", ++ sqlite3_table_list[payload_type]); ++ return -1; ++ } ++ } ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ id, 0, event->timestamp); ++#endif ++ + if (payload_type == PAYLOAD_TYPE_0) { + const struct amp_payload0_type_sec *err = + (struct amp_payload0_type_sec *)event->error; +diff --git a/non-standard-ampere.h b/non-standard-ampere.h +index aacf3a8..f463c53 100644 +--- a/non-standard-ampere.h ++++ b/non-standard-ampere.h +@@ -102,6 +102,79 @@ struct amp_payload3_type_sec { + uint64_t fw_speci_data5; + }; + ++enum amp_oem_data_type { ++ AMP_OEM_DATA_TYPE_INT, ++ AMP_OEM_DATA_TYPE_INT64, ++ AMP_OEM_DATA_TYPE_TEXT, ++}; ++ ++enum { ++ AMP_PAYLOAD0_FIELD_ID, ++ AMP_PAYLOAD0_FIELD_TIMESTAMP, ++ AMP_PAYLOAD0_FIELD_TYPE, ++ AMP_PAYLOAD0_FIELD_SUB_TYPE, ++ AMP_PAYLOAD0_FIELD_INS, ++ AMP_PAYLOAD0_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD0_FIELD_STATUS_REG, ++ AMP_PAYLOAD0_FIELD_ADDR_REG, ++ AMP_PAYLOAD0_FIELD_MISC0, ++ AMP_PAYLOAD0_FIELD_MISC1, ++ AMP_PAYLOAD0_FIELD_MISC2, ++ AMP_PAYLOAD0_FIELD_MISC3, ++}; ++ ++enum { ++ AMP_PAYLOAD1_FIELD_ID, ++ AMP_PAYLOAD1_FIELD_TIMESTAMP, ++ AMP_PAYLOAD1_FIELD_TYPE, ++ AMP_PAYLOAD1_FIELD_SUB_TYPE, ++ AMP_PAYLOAD1_FIELD_INS, ++ AMP_PAYLOAD1_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_SRC_ID, ++ AMP_PAYLOAD1_FIELD_RESERVED1, ++ AMP_PAYLOAD1_FIELD_RESERVED2, ++}; ++ ++enum { ++ AMP_PAYLOAD2_FIELD_ID, ++ AMP_PAYLOAD2_FIELD_TIMESTAMP, ++ AMP_PAYLOAD2_FIELD_TYPE, ++ AMP_PAYLOAD2_FIELD_SUB_TYPE, ++ AMP_PAYLOAD2_FIELD_INS, ++ AMP_PAYLOAD2_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD2_FIELD_CE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_CE_LOACATION, ++ AMP_PAYLOAD2_FIELD_CE_ADDR, ++ AMP_PAYLOAD2_FIELD_UE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_UE_LOCATION, ++ AMP_PAYLOAD2_FIELD_UE_ADDR, ++ AMP_PAYLOAD2_FIELD_RESERVED1, ++ AMP_PAYLOAD2_FIELD_RESERVED2, ++ AMP_PAYLOAD2_FIELD_RESERVED3, ++}; ++ ++enum { ++ AMP_PAYLOAD3_FIELD_ID, ++ AMP_PAYLOAD3_FIELD_TIMESTAMP, ++ AMP_PAYLOAD3_FIELD_TYPE, ++ AMP_PAYLOAD3_FIELD_SUB_TYPE, ++ AMP_PAYLOAD3_FIELD_INS, ++ AMP_PAYLOAD3_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5 ++}; ++ + void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload0_type_sec *err); +diff --git a/ras-aer-handler.c b/ras-aer-handler.c +index 8ddd439..6f4cb2b 100644 +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -67,6 +67,9 @@ int ras_aer_event_handler(struct trace_seq *s, + struct tm *tm; + struct ras_aer_event ev; + char buf[BUF_LEN]; ++ char ipmi_add_sel[105]; ++ uint8_t sel_data[5]; ++ int seg, bus, dev, fn; + + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. +@@ -129,15 +132,19 @@ int ras_aer_event_handler(struct trace_seq *s, + switch (severity_val) { + case HW_EVENT_AER_UNCORRECTED_NON_FATAL: + ev.error_type = "Uncorrected (Non-Fatal)"; ++ sel_data[0] = 0xca; + break; + case HW_EVENT_AER_UNCORRECTED_FATAL: + ev.error_type = "Uncorrected (Fatal)"; ++ sel_data[0] = 0xca; + break; + case HW_EVENT_AER_CORRECTED: + ev.error_type = "Corrected"; ++ sel_data[0] = 0xbf; + break; + default: + ev.error_type = "Unknown severity"; ++ sel_data[0] = 0xbf; + } + trace_seq_puts(s, ev.error_type); + +@@ -151,5 +158,29 @@ int ras_aer_event_handler(struct trace_seq *s, + ras_report_aer_event(ras, &ev); + #endif + ++#ifdef HAVE_AMP_NS_DECODE ++ /* ++ * Get PCIe AER error source seg/bus/dev/fn and save it into ++ * BMC OEM SEL, ipmitool raw 0x0a 0x44 is IPMI command-Add SEL ++ * entry, please refer IPMI specificaiton chapter 31.6. 0xcd3a ++ * is manufactuer ID(ampere),byte 12 is sensor num(CE is 0xBF, ++ * UE is 0xCA), byte 13~14 is segment number, byte 15 is bus ++ * number, byte 16[7:3] is device number, byte 16[2:0] is ++ * function number ++ */ ++ sscanf(ev.dev_name, "%x:%x:%x.%x", &seg, &bus, &dev, &fn); ++ ++ sel_data[1] = seg & 0xff; ++ sel_data[2] = (seg & 0xff00) >> 8; ++ sel_data[3] = bus; ++ sel_data[4] = (((dev & 0x1f) << 3) | (fn & 0x7)); ++ ++ sprintf(ipmi_add_sel, ++ "ipmitool raw 0x0a 0x44 0x00 0x00 0xc0 0x00 0x00 0x00 0x00 0x3a 0xcd 0x00 0xc0 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x", ++ sel_data[0], sel_data[1], sel_data[2], sel_data[3], sel_data[4]); ++ ++ system(ipmi_add_sel); ++#endif ++ + return 0; + } diff --git a/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch b/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch new file mode 100644 index 0000000..76afc8e --- /dev/null +++ b/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch @@ -0,0 +1,24 @@ +commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4 +Author: Muralidhara M K +Date: Wed Jul 28 01:52:12 2021 -0500 + + rasdaemon: Support MCE for AMD CPU family 19h + + Add support for family 19h x86 CPUs from AMD. + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-mce-handler.c b/ras-mce-handler.c +index 805004a..f2b53d4 100644 +--- a/ras-mce-handler.c ++++ b/ras-mce-handler.c +@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras) + mce->cputype = CPU_AMD_SMCA; + goto ret; + } +- if (mce->family > 23) { ++ if (mce->family > 25) { + log(ALL, LOG_INFO, + "Can't parse MCE for this AMD CPU yet %d\n", + mce->family); diff --git a/SOURCES/7ccf12f5ae26a055926d175d908c7930293438c4.patch b/SOURCES/7ccf12f5ae26a055926d175d908c7930293438c4.patch new file mode 100644 index 0000000..5a7a860 --- /dev/null +++ b/SOURCES/7ccf12f5ae26a055926d175d908c7930293438c4.patch @@ -0,0 +1,26 @@ +commit 7ccf12f5ae26a055926d175d908c7930293438c4 +Author: DmNosachev +Date: Fri Jul 23 17:28:33 2021 +0300 + + labels/supermicro: added Supermicro X11SCW + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 990fc9e..aea7c3c 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -133,4 +133,10 @@ Vendor: Supermicro + + Model: X11SCA, X11SCA-F + DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0; +- DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; +\ No newline at end of file ++ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; ++ ++ Model: X11SCW-F ++ DIMMA1: 0.1.0; ++ DIMMA2: 0.0.0; ++ DIMMB1: 0.1.1; ++ DIMMB2: 0.0.1; +\ No newline at end of file diff --git a/SOURCES/932118b04a04104dfac6b8536419803f236e6118.patch b/SOURCES/932118b04a04104dfac6b8536419803f236e6118.patch new file mode 100644 index 0000000..b88923f --- /dev/null +++ b/SOURCES/932118b04a04104dfac6b8536419803f236e6118.patch @@ -0,0 +1,411 @@ +commit 932118b04a04104dfac6b8536419803f236e6118 +Author: Avadhut Naik +Date: Mon May 22 22:13:17 2023 +0000 + + rasdaemon: Add support for post-processing MCA errors + + Currently, the rasdaemon performs detailed error decoding of received + MCA errors on the system only whence it is running, either as a daemon + or in the foreground. + + As such, error decoding cannot be undertaken for any MCA errors received + whence the rasdaemon wasn't running. Additionally, if the error decoding + modules like edac_mce_amd too have not been loaded, error records in the + demsg buffer might correspond to raw values in associated MSRs, compelling + users to undertake decoding manually. The scenario seems more plausible on + AMD systems with Scalabale MCA (SMCA) with plans in place to remove SMCA + Extended Error Descriptions from the edac_mce_amd module in an effort to + offload SMCA Error Decoding to the rasdaemon. + + As such, add support to post-process and decode MCA Errors received on AMD + SMCA systems from raw MSR values. Support for post-processing and decoding + of MCA Errors received on CPUs of other vendors can be added in the future, + as needed. + + Suggested-by: Yazen Ghannam + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + mce-amd-smca.c | 8 ++- + ras-events.h | 1 + ras-mce-handler.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++---- + ras-mce-handler.h | 4 + + ras-record.h | 10 ++++ + rasdaemon.c | 94 +++++++++++++++++++++++++++++++++++++++++++++- + 6 files changed, 216 insertions(+), 11 deletions(-) + +--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/mce-amd-smca.c 2023-10-27 12:44:58.549049019 -0400 +@@ -710,7 +710,7 @@ static struct smca_bank_name smca_names[ + [SMCA_GMI_PHY] = { "Global Memory Interconnect PHY Unit" }, + }; + +-static void amd_decode_errcode(struct mce_event *e) ++void amd_decode_errcode(struct mce_event *e) + { + + decode_amd_errcode(e); +@@ -782,7 +782,7 @@ *hwid_mcatype = 0x00010000; + } + + /* Decode extended errors according to Scalable MCA specification */ +-static void decode_smca_error(struct mce_event *e, struct mce_priv* m) ++void decode_smca_error(struct mce_event *e, struct mce_priv *m) + { + enum smca_bank_types bank_type; + const char *ip_name; +@@ -827,7 +827,9 @@ for (i = 0; i < ARRAY_SIZE(smca_hwid_mca + /* Only print the descriptor of valid extended error code */ + if (xec < smca_mce_descs[bank_type].num_descs) + mce_snprintf(e->mcastatus_msg, +- " %s.\n", smca_mce_descs[bank_type].descs[xec]); ++ "%s. Ext Err Code: %d", ++ smca_mce_descs[bank_type].descs[xec], ++ xec); + + if (bank_type == SMCA_UMC && xec == 0) { + channel = find_umc_channel(e); +--- rasdaemon-0.6.7.orig/ras-events.h 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/ras-events.h 2023-10-27 12:44:58.549049019 -0400 +@@ -100,6 +100,7 @@ enum ghes_severity { + + /* Function prototypes */ + int toggle_ras_mc_event(int enable); ++int ras_offline_mce_event(struct ras_mc_offline_event *event); + int handle_ras_events(int record_events); + + #endif +--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.c 2023-10-27 12:45:27.159776011 -0400 +@@ -63,10 +63,8 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series + [CPU_SAPPHIRERAPIDS] = "Sapphirerapids server", + }; + +-static enum cputype select_intel_cputype(struct ras_events *ras) ++static enum cputype select_intel_cputype(struct mce_priv *mce) + { +- struct mce_priv *mce = ras->mce_priv; +- + if (mce->family == 15) { + if (mce->model == 6) + return CPU_TULSA; +@@ -140,9 +138,8 @@ if (mce->model > 0x1a) { + return mce->family == 6 ? CPU_P6OLD : CPU_GENERIC; + } + +-static int detect_cpu(struct ras_events *ras) ++static int detect_cpu(struct mce_priv *mce) + { +- struct mce_priv *mce = ras->mce_priv; + FILE *f; + int ret = 0; + char *line = NULL; +@@ -221,7 +218,7 @@ ret = 0; + } + goto ret; + } else if (!strcmp(mce->vendor,"GenuineIntel")) { +- mce->cputype = select_intel_cputype(ras); ++ mce->cputype = select_intel_cputype(mce); + } else { + ret = EINVAL; + } +@@ -246,7 +243,7 @@ int register_mce_handler(struct ras_even + + mce = ras->mce_priv; + +- rc = detect_cpu(ras); ++ rc = detect_cpu(mce); + if (rc) { + if (mce->processor_flags) + free (mce->processor_flags); +@@ -383,6 +380,105 @@ #if 0 + */ + } + ++static int report_mce_offline(struct trace_seq *s, ++ struct mce_event *mce, ++ struct mce_priv *priv) ++{ ++ time_t now; ++ struct tm *tm; ++ ++ time(&now); ++ tm = localtime(&now); ++ ++ if (tm) ++ strftime(mce->timestamp, sizeof(mce->timestamp), ++ "%Y-%m-%d %H:%M:%S %z", tm); ++ trace_seq_printf(s, "%s,", mce->timestamp); ++ ++ if (*mce->bank_name) ++ trace_seq_printf(s, " %s,", mce->bank_name); ++ else ++ trace_seq_printf(s, " bank=%x,", mce->bank); ++ ++ if (*mce->mcastatus_msg) ++ trace_seq_printf(s, " mca: %s,", mce->mcastatus_msg); ++ ++ if (*mce->mcistatus_msg) ++ trace_seq_printf(s, " mci: %s,", mce->mcistatus_msg); ++ ++ if (*mce->mc_location) ++ trace_seq_printf(s, " Locn: %s,", mce->mc_location); ++ ++ if (*mce->error_msg) ++ trace_seq_printf(s, " Error Msg: %s\n", mce->error_msg); ++ ++ return 0; ++} ++ ++int ras_offline_mce_event(struct ras_mc_offline_event *event) ++{ ++ int rc = 0; ++ struct trace_seq s; ++ struct mce_event *mce = NULL; ++ struct mce_priv *priv = NULL; ++ ++ mce = (struct mce_event *)calloc(1, sizeof(struct mce_event)); ++ if (!mce) { ++ log(TERM, LOG_ERR, "Can't allocate memory for mce struct\n"); ++ return errno; ++ } ++ ++ priv = (struct mce_priv *)calloc(1, sizeof(struct mce_priv)); ++ if (!priv) { ++ log(TERM, LOG_ERR, "Can't allocate memory for mce_priv struct\n"); ++ free(mce); ++ return errno; ++ } ++ ++ if (event->smca) { ++ priv->cputype = CPU_AMD_SMCA; ++ priv->family = event->family; ++ priv->model = event->model; ++ } else { ++ rc = detect_cpu(priv); ++ if (rc) { ++ log(TERM, LOG_ERR, "Failed to detect CPU\n"); ++ goto free_mce; ++ } ++ } ++ ++ mce->status = event->status; ++ mce->bank = event->bank; ++ ++ switch (priv->cputype) { ++ case CPU_AMD_SMCA: ++ mce->synd = event->synd; ++ mce->ipid = event->ipid; ++ if (!mce->ipid || !mce->status) { ++ log(TERM, LOG_ERR, "%s MSR required.\n", ++ mce->ipid ? "Status" : "Ipid"); ++ rc = -EINVAL; ++ goto free_mce; ++ } ++ decode_smca_error(mce, priv); ++ amd_decode_errcode(mce); ++ break; ++ default: ++ break; ++ } ++ ++ trace_seq_init(&s); ++ report_mce_offline(&s, mce, priv); ++ trace_seq_do_printf(&s); ++ fflush(stdout); ++ trace_seq_destroy(&s); ++ ++free_mce: ++ free(priv); ++ free(mce); ++ return rc; ++} ++ + int ras_mce_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.h 2023-10-27 12:44:58.550049010 -0400 +@@ -118,6 +118,10 @@ int ras_mce_event_handler(struct trace_s + /* enables intel iMC logs */ + int set_intel_imc_log(enum cputype cputype, unsigned ncpus); + ++/* Undertake AMD SMCA Error Decoding */ ++void decode_smca_error(struct mce_event *e, struct mce_priv *m); ++void amd_decode_errcode(struct mce_event *e); ++ + /* Per-CPU-type decoders for Intel CPUs */ + void p4_decode_model(struct mce_event *e); + void core2_decode_model(struct mce_event *e); +--- rasdaemon-0.6.7.orig/ras-record.h 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/ras-record.h 2023-10-27 12:44:58.550049010 -0400 +@@ -21,6 +21,7 @@ * Foundation, Inc., 51 Franklin Street, + #define __RAS_RECORD_H + + #include ++#include + #include "config.h" + + #define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) +@@ -39,6 +40,15 @@ struct ras_mc_event { + const char *driver_detail; + }; + ++struct ras_mc_offline_event { ++ unsigned int family, model; ++ bool smca; ++ uint8_t bank; ++ uint64_t ipid; ++ uint64_t synd; ++ uint64_t status; ++}; ++ + struct ras_aer_event { + char timestamp[64]; + const char *error_type; +--- rasdaemon-0.6.7.orig/rasdaemon.c 2023-10-27 12:44:55.541077722 -0400 ++++ rasdaemon-0.6.7/rasdaemon.c 2023-10-27 12:44:58.550049010 -0400 +@@ -41,8 +41,21 @@ struct arguments { + int record_events; + int enable_ras; + int foreground; ++ int offline; + }; + ++enum OFFLINE_ARG_KEYS { ++ SMCA = 0x100, ++ MODEL, ++ FAMILY, ++ BANK_NUM, ++ IPID_REG, ++ STATUS_REG, ++ SYNDROME_REG ++}; ++ ++struct ras_mc_offline_event event; ++ + static error_t parse_opt(int k, char *arg, struct argp_state *state) + { + struct arguments *args = state->input; +@@ -62,18 +75,84 @@ static error_t parse_opt(int k, char *ar + case 'f': + args->foreground++; + break; ++#ifdef HAVE_MCE ++ case 'p': ++ if (state->argc < 4) ++ argp_state_help(state, stdout, ARGP_HELP_LONG | ARGP_HELP_EXIT_ERR); ++ args->offline++; ++ break; ++#endif + default: + return ARGP_ERR_UNKNOWN; + } + return 0; + } + ++#ifdef HAVE_MCE ++static error_t parse_opt_offline(int key, char *arg, ++ struct argp_state *state) ++{ ++ switch (key) { ++ case SMCA: ++ event.smca = true; ++ break; ++ case MODEL: ++ event.model = strtoul(state->argv[state->next], NULL, 0); ++ break; ++ case FAMILY: ++ event.family = strtoul(state->argv[state->next], NULL, 0); ++ break; ++ case BANK_NUM: ++ event.bank = atoi(state->argv[state->next]); ++ break; ++ case IPID_REG: ++ event.ipid = strtoull(state->argv[state->next], NULL, 0); ++ break; ++ case STATUS_REG: ++ event.status = strtoull(state->argv[state->next], NULL, 0); ++ break; ++ case SYNDROME_REG: ++ event.synd = strtoull(state->argv[state->next], NULL, 0); ++ break; ++ default: ++ return ARGP_ERR_UNKNOWN; ++ } ++ return 0; ++} ++#endif ++ + long user_hz; + + int main(int argc, char *argv[]) + { + struct arguments args; + int idx = -1; ++ ++#ifdef HAVE_MCE ++ const struct argp_option offline_options[] = { ++ {"smca", SMCA, 0, 0, "AMD SMCA Error Decoding"}, ++ {"model", MODEL, 0, 0, "CPU Model"}, ++ {"family", FAMILY, 0, 0, "CPU Family"}, ++ {"bank", BANK_NUM, 0, 0, "Bank Number"}, ++ {"ipid", IPID_REG, 0, 0, "IPID Register (for SMCA systems only)"}, ++ {"status", STATUS_REG, 0, 0, "Status Register"}, ++ {"synd", SYNDROME_REG, 0, 0, "Syndrome Register"}, ++ {0, 0, 0, 0, 0, 0}, ++ }; ++ ++ struct argp offline_argp = { ++ .options = offline_options, ++ .parser = parse_opt_offline, ++ .doc = TOOL_DESCRIPTION, ++ .args_doc = ARGS_DOC, ++ }; ++ ++ struct argp_child offline_parser[] = { ++ {&offline_argp, 0, "Post-Processing Options:", 0}, ++ {0, 0, 0, 0}, ++ }; ++#endif ++ + const struct argp_option options[] = { + {"enable", 'e', 0, 0, "enable RAS events and exit", 0}, + {"disable", 'd', 0, 0, "disable RAS events and exit", 0}, +@@ -81,6 +160,10 @@ {"disable", 'd', 0, 0, "disable RAS even + {"record", 'r', 0, 0, "record events via sqlite3", 0}, + #endif + {"foreground", 'f', 0, 0, "run foreground, not daemonize"}, ++#ifdef HAVE_MCE ++ {"post-processing", 'p', 0, 0, ++ "Post-processing MCE's with raw register values"}, ++#endif + + { 0, 0, 0, 0, 0, 0 } + }; +@@ -89,7 +172,9 @@ { 0, 0, 0, 0, 0, 0 } + .parser = parse_opt, + .doc = TOOL_DESCRIPTION, + .args_doc = ARGS_DOC, +- ++#ifdef HAVE_MCE ++ .children = offline_parser, ++#endif + }; + memset (&args, 0, sizeof(args)); + +@@ -111,6 +196,13 @@ enable = (args.enable_ras > 0) ? 1 : 0; + return 0; + } + ++#ifdef HAVE_MCE ++ if (args.offline) { ++ ras_offline_mce_event(&event); ++ return 0; ++ } ++#endif ++ + openlog(TOOL_NAME, 0, LOG_DAEMON); + if (!args.foreground) + if (daemon(0,0)) diff --git a/SOURCES/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch b/SOURCES/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch new file mode 100644 index 0000000..5267fc8 --- /dev/null +++ b/SOURCES/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch @@ -0,0 +1,51 @@ +commit 9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b +Author: Muralidhara M K +Date: Tue Jul 27 06:36:45 2021 -0500 + + rasdaemon: ras-mc-ctl: Fix script to parse dimm sizes + + Removes trailing spaces at the end of a line from + file location and fixes --layout option to parse dimm nodes + to get the size of each dimm from ras-mc-ctl. + + Issue is reported https://github.com/mchehab/rasdaemon/issues/43 + Where '> ras-mc-ctl --layout' reports all 0s + + With this change the layout option prints the correct dimm sizes + > sudo ras-mc-ctl --layout + +-----------------------------------------------+ + | mc0 | + | csrow0 | csrow1 | csrow2 | csrow3 | + ----------+-----------------------------------------------+ + ... + channel7: | 16384 MB | 0 MB | 0 MB | 0 MB | + channel6: | 16384 MB | 0 MB | 0 MB | 0 MB | + ... + ----------+-----------------------------------------------+ + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Cc: Yazen Ghannam + Signed-off-by: Mauro Carvalho Chehab + Link: https://lkml.kernel.org/r/20210810183855.129076-1-nchatrad@amd.com/ + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 1e3aeb7..b22dd60 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -246,6 +246,7 @@ sub parse_dimm_nodes + if (($file =~ /max_location$/)) { + open IN, $file; + my $location = ; ++ $location =~ s/\s+$//; + close IN; + my @temp = split(/ /, $location); + +@@ -288,6 +289,7 @@ sub parse_dimm_nodes + + open IN, $file; + my $location = ; ++ $location =~ s/\s+$//; + close IN; + + my @pos; diff --git a/SOURCES/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch b/SOURCES/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch new file mode 100644 index 0000000..1a221ea --- /dev/null +++ b/SOURCES/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch @@ -0,0 +1,40 @@ +commit 9a5baed97b21af31064d9995ffcfaac0e9d7983e +Author: DmNosachev +Date: Tue Jun 29 13:37:48 2021 +0300 + + labels/supermicro: supermicro db syntax + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index bfaed93..47ea05f 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -18,17 +18,17 @@ Vendor: Supermicro + DIMMA1: 0.0.0; DIMMA2: 0.0.1; + DIMMB1: 0.1.0; DIMMB2: 0.1.1; + +- Product: X10SRA-F +- DIMMA1: 0.0.0 +- DIMMA2: 0.0.1 +- DIMMB1: 0.1.0 +- DIMMB2: 0.1.1 +- DIMMC1: 1.0.0 +- DIMMC2: 1.0.1 +- DIMMD1: 1.1.0 +- DIMMD2: 1.1.1 ++ Model: X10SRA-F ++ DIMMA1: 0.0.0; ++ DIMMA2: 0.0.1; ++ DIMMB1: 0.1.0; ++ DIMMB2: 0.1.1; ++ DIMMC1: 1.0.0; ++ DIMMC2: 1.0.1; ++ DIMMD1: 1.1.0; ++ DIMMD2: 1.1.1; + +- Product: H8DGU ++ Model: H8DGU + P1_DIMM1A: 0.2.0; + P1_DIMM1A: 0.3.0; + P2_DIMM1A: 3.2.0; diff --git a/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch b/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch new file mode 100644 index 0000000..c4c8af1 --- /dev/null +++ b/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch @@ -0,0 +1,230 @@ +commit 9acef39f13833f7d53ef96abc5a72e79384260f4 +Author: Naveen Krishna Chatradhi +Date: Tue Jun 1 11:01:17 2021 +0530 + + rasdaemon: Add new SMCA bank types with error decoding + + Upcoming systems with Scalable Machine Check Architecture (SMCA) have + new MCA banks added. + + This patch adds the (HWID, MCATYPE) tuple, name and error decoding for + those new SMCA banks. + While at it, optimize the string names in smca_bank_name[]. + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 7c619fd..e0cf512 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -47,7 +47,7 @@ + /* These may be used by multiple smca_hwid_mcatypes */ + enum smca_bank_types { + SMCA_LS = 0, /* Load Store */ +- SMCA_LS_V2, /* Load Store */ ++ SMCA_LS_V2, + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ +@@ -56,17 +56,22 @@ enum smca_bank_types { + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ +- SMCA_CS_V2, /* Coherent Slave V2 */ ++ SMCA_CS_V2, + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ ++ SMCA_UMC_V2, + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ +- SMCA_PSP_V2, /* Platform Security Processor V2 */ ++ SMCA_PSP_V2, + SMCA_SMU, /* System Management Unit */ +- SMCA_SMU_V2, /* System Management Unit V2 */ ++ SMCA_SMU_V2, + SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_NBIO, /* Northbridge IO Unit */ + SMCA_PCIE, /* PCI Express Unit */ ++ SMCA_PCIE_V2, ++ SMCA_XGMI_PCS, /* xGMI PCS Unit */ ++ SMCA_XGMI_PHY, /* xGMI PHY Unit */ ++ SMCA_WAFL_PHY, /* WAFL PHY Unit */ + N_SMCA_BANK_TYPES + }; + +@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = { + "Command/address parity error", + "Write data CRC error", + }; ++ ++static const char * const smca_umc2_mce_desc[] = { ++ "DRAM ECC error", ++ "Data poison error", ++ "SDP parity error", ++ "Reserved", ++ "Address/Command parity error", ++ "Write data parity error", ++ "DCQ SRAM ECC error", ++ "Reserved", ++ "Read data parity error", ++ "Rdb SRAM ECC error", ++ "RdRsp SRAM ECC error", ++ "LM32 MP errors", ++}; ++ + /* Parameter Block */ + static const char * const smca_pb_mce_desc[] = { + "Parameter Block RAM ECC error", +@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = { + "CCIX Non-okay write response with data error", + }; + ++static const char * const smca_pcie2_mce_desc[] = { ++ "SDP Parity Error logging", ++}; ++ ++static const char * const smca_xgmipcs_mce_desc[] = { ++ "Data Loss Error", ++ "Training Error", ++ "Flow Control Acknowledge Error", ++ "Rx Fifo Underflow Error", ++ "Rx Fifo Overflow Error", ++ "CRC Error", ++ "BER Exceeded Error", ++ "Tx Vcid Data Error", ++ "Replay Buffer Parity Error", ++ "Data Parity Error", ++ "Replay Fifo Overflow Error", ++ "Replay Fifo Underflow Error", ++ "Elastic Fifo Overflow Error", ++ "Deskew Error", ++ "Flow Control CRC Error", ++ "Data Startup Limit Error", ++ "FC Init Timeout Error", ++ "Recovery Timeout Error", ++ "Ready Serial Timeout Error", ++ "Ready Serial Attempt Error", ++ "Recovery Attempt Error", ++ "Recovery Relock Attempt Error", ++ "Replay Attempt Error", ++ "Sync Header Error", ++ "Tx Replay Timeout Error", ++ "Rx Replay Timeout Error", ++ "LinkSub Tx Timeout Error", ++ "LinkSub Rx Timeout Error", ++ "Rx CMD Pocket Error", ++}; ++ ++static const char * const smca_xgmiphy_mce_desc[] = { ++ "RAM ECC Error", ++ "ARC instruction buffer parity error", ++ "ARC data buffer parity error", ++ "PHY APB error", ++}; ++ ++static const char * const smca_waflphy_mce_desc[] = { ++ "RAM ECC Error", ++ "ARC instruction buffer parity error", ++ "ARC data buffer parity error", ++ "PHY APB error", ++}; + + struct smca_mce_desc { + const char * const *descs; +@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, ++ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, + [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, +@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, + [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, + [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, ++ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, ++ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, ++ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, ++ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, + }; + + struct smca_hwid { +@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* Unified Memory Controller MCA type */ + { SMCA_UMC, 0x00000096 }, ++ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ ++ { SMCA_UMC_V2, 0x00010096 }, + + /* Parameter Block MCA type */ + { SMCA_PB, 0x00000005 }, +@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* PCI Express Unit MCA type */ + { SMCA_PCIE, 0x00000046 }, ++ { SMCA_PCIE_V2, 0x00010046 }, ++ ++ /* Ext Global Memory Interconnect PCS MCA type */ ++ { SMCA_XGMI_PCS, 0x00000050 }, ++ ++ /* Ext Global Memory Interconnect PHY MCA type */ ++ { SMCA_XGMI_PHY, 0x00000259 }, ++ ++ /* WAFL PHY MCA type */ ++ { SMCA_WAFL_PHY, 0x00000267 }, + }; + + struct smca_bank_name { +@@ -396,27 +483,28 @@ struct smca_bank_name { + }; + + static struct smca_bank_name smca_names[] = { +- [SMCA_LS] = { "Load Store Unit" }, +- [SMCA_LS_V2] = { "Load Store Unit" }, +- [SMCA_IF] = { "Instruction Fetch Unit" }, +- [SMCA_L2_CACHE] = { "L2 Cache" }, +- [SMCA_DE] = { "Decode Unit" }, +- [SMCA_RESERVED] = { "Reserved" }, +- [SMCA_EX] = { "Execution Unit" }, +- [SMCA_FP] = { "Floating Point Unit" }, +- [SMCA_L3_CACHE] = { "L3 Cache" }, +- [SMCA_CS] = { "Coherent Slave" }, +- [SMCA_CS_V2] = { "Coherent Slave" }, +- [SMCA_PIE] = { "Power, Interrupts, etc." }, +- [SMCA_UMC] = { "Unified Memory Controller" }, +- [SMCA_PB] = { "Parameter Block" }, +- [SMCA_PSP] = { "Platform Security Processor" }, +- [SMCA_PSP_V2] = { "Platform Security Processor" }, +- [SMCA_SMU] = { "System Management Unit" }, +- [SMCA_SMU_V2] = { "System Management Unit" }, +- [SMCA_MP5] = { "Microprocessor 5 Unit" }, +- [SMCA_NBIO] = { "Northbridge IO Unit" }, +- [SMCA_PCIE] = { "PCI Express Unit" }, ++ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" }, ++ [SMCA_IF] = { "Instruction Fetch Unit" }, ++ [SMCA_L2_CACHE] = { "L2 Cache" }, ++ [SMCA_DE] = { "Decode Unit" }, ++ [SMCA_RESERVED] = { "Reserved" }, ++ [SMCA_EX] = { "Execution Unit" }, ++ [SMCA_FP] = { "Floating Point Unit" }, ++ [SMCA_L3_CACHE] = { "L3 Cache" }, ++ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" }, ++ [SMCA_PIE] = { "Power, Interrupts, etc." }, ++ [SMCA_UMC] = { "Unified Memory Controller" }, ++ [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, ++ [SMCA_PB] = { "Parameter Block" }, ++ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, ++ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, ++ [SMCA_MP5] = { "Microprocessor 5 Unit" }, ++ [SMCA_NBIO] = { "Northbridge IO Unit" }, ++ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, ++ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, ++ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, ++ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, ++ + }; + + static void amd_decode_errcode(struct mce_event *e) diff --git a/SOURCES/aa36c96cd52d775570dae989dd95a060f1149077.patch b/SOURCES/aa36c96cd52d775570dae989dd95a060f1149077.patch new file mode 100644 index 0000000..5655bc1 --- /dev/null +++ b/SOURCES/aa36c96cd52d775570dae989dd95a060f1149077.patch @@ -0,0 +1,159 @@ +commit aa36c96cd52d775570dae989dd95a060f1149077 +Author: Avadhut Naik +Date: Mon Apr 24 20:35:56 2023 +0000 + + rasdaemon: Handle reassigned bit definitions for CS SMCA + + Currently, on AMD systems with Scalable MCA (SMCA), each machine check + error of a SMCA bank type has an associated bit position in the bank's + control (CTL) register used for enabling / disabling reporting of the + very error. An error's bit position in the CTL register is also used + during error decoding for offsetting into the corresponding bank's error + description structure. As new errors are being added in newer AMD systems + for existing SMCA bank types, the underlying SMCA architecture guarantees + that the bit positions of existing errors are not altered. + + However, on some AMD systems viz. Genoa, some of the existing bit + definitions in the CTL register of the Coherent Slave (CS) SMCA bank type + are reassigned without defining new HWID and McaType. Consequently, the + very errors whose bit definitions have been reassigned in the CTL register + are being erroneously decoded. + + As a solution, create a new software defined SMCA bank type by utilizing + one of the hardware-reserved values for HWID. The new SMCA bank type will + only be employed for CS error decoding on affected CPU models. + + Additionally, since the existing error description structure for the CS + SMCA bank type is still valid, add new error description structure to + compensate for the reassigned bit definitions. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 7ec787a..e81f732 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -57,6 +57,7 @@ enum smca_bank_types { + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ + SMCA_CS_V2, ++ SMCA_CS_V2_QUIRK, + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, +@@ -259,6 +260,31 @@ static const char * const smca_cs2_mce_desc[] = { + "Hardware Assert Error", + }; + ++/* ++ * Per Genoa's revision guide, erratum 1384, existing bit definitions ++ * are reassigned for SMCA CS bank type. ++ */ ++static const char * const smca_cs2_quirk_mce_desc[] = { ++ "Illegal Request", ++ "Address Violation", ++ "Security Violation", ++ "Illegal Response", ++ "Unexpected Response", ++ "Request or Probe Parity Error", ++ "Read Response Parity Error", ++ "Atomic Request Parity Error", ++ "SDP read response had no match in the CS queue", ++ "SDP read response had an unexpected RETRY error", ++ "Counter overflow error", ++ "Counter underflow error", ++ "Probe Filter Protocol Error", ++ "Probe Filter ECC Error", ++ "Illegal Request on the no data channel", ++ "Address Violation on the no data channel", ++ "Security Violation on the no data channel", ++ "Hardware Assert Error", ++}; ++ + static const char * const smca_pie_mce_desc[] = { + "Hardware assert", + "Register security violation", +@@ -549,6 +575,7 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, + [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, + [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, ++ [SMCA_CS_V2_QUIRK] = { smca_cs2_quirk_mce_desc, ARRAY_SIZE(smca_cs2_quirk_mce_desc)}, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, +@@ -597,6 +624,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + /* Data Fabric MCA types */ + { SMCA_CS, 0x0000002E }, + { SMCA_CS_V2, 0x0002002E }, ++ {SMCA_CS_V2_QUIRK, 0x00010000 }, + { SMCA_PIE, 0x0001002E }, + + /* Unified Memory Controller MCA type */ +@@ -660,7 +688,7 @@ static struct smca_bank_name smca_names[] = { + [SMCA_EX] = { "Execution Unit" }, + [SMCA_FP] = { "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "L3 Cache" }, +- [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" }, ++ [SMCA_CS ... SMCA_CS_V2_QUIRK] = { "Coherent Slave" }, + [SMCA_PIE] = { "Power, Interrupts, etc." }, + [SMCA_UMC] = { "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, +@@ -723,8 +751,38 @@ static int find_hbm_channel(struct mce_event *e) + return (umc % 2) ? tmp + 4 : tmp; + } + ++static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype) ++{ ++ if (m->family == 0x19) { ++ switch (m->model) { ++ /* ++ * Per Genoa's revision guide, erratum 1384, some SMCA Extended ++ * Error Codes and SMCA Control bits are incorrect for SMCA CS ++ * bank type. ++ */ ++ case 0x10 ... 0x1F: ++ case 0x60 ... 0x7B: ++ case 0xA0 ... 0xAF: ++ if (*hwid_mcatype == 0x0002002E) ++ *hwid_mcatype = 0x00010000; ++ break; ++ default: ++ break; ++ } ++ } else if (m->family == 0x1A) { ++ switch (m->model) { ++ case 0x40 ... 0x4F: ++ if (*hwid_mcatype == 0x0002002E) ++ *hwid_mcatype = 0x00010000; ++ break; ++ default: ++ break; ++ } ++ } ++} ++ + /* Decode extended errors according to Scalable MCA specification */ +-static void decode_smca_error(struct mce_event *e) ++static void decode_smca_error(struct mce_event *e, struct mce_priv* m) + { + enum smca_bank_types bank_type; + const char *ip_name; +@@ -735,6 +793,8 @@ static void decode_smca_error(struct mce_event *e) + unsigned int csrow = -1, channel = -1; + unsigned int i; + ++ fixup_hwid(m, &mcatype_hwid); ++ + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { + s_hwid = &smca_hwid_mcatypes[i]; + if (mcatype_hwid == s_hwid->mcatype_hwid) { +@@ -801,7 +861,7 @@ int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) + if (mcgstatus & MCG_STATUS_MCIP) + mce_snprintf(e->mcgstatus_msg, "MCIP"); + +- decode_smca_error(e); ++ decode_smca_error(e, ras->mce_priv); + amd_decode_errcode(e); + return 0; + } diff --git a/SOURCES/aecf33aa70331670c06db6b652712b476e24051c.patch b/SOURCES/aecf33aa70331670c06db6b652712b476e24051c.patch new file mode 100644 index 0000000..fd557ec --- /dev/null +++ b/SOURCES/aecf33aa70331670c06db6b652712b476e24051c.patch @@ -0,0 +1,107 @@ +commit aecf33aa70331670c06db6b652712b476e24051c +Author: Muralidhara M K +Date: Mon Jul 12 05:40:46 2021 -0500 + + rasdaemon: Enumerate memory on noncpu nodes + + On newer heterogeneous systems from AMD with GPU nodes (with HBM2 memory + banks) connected via xGMI links to the CPUs. + + The node id information is available in the InstanceHI[47:44] of + the IPID register. + + The UMC Phys on Aldeberan nodes are enumerated as csrow + The UMC channels connected to HBMs are enumerated as ranks. + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 3c346f4..f3379fc 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -78,6 +78,12 @@ enum smca_bank_types { + /* Maximum number of MCA banks per CPU. */ + #define MAX_NR_BANKS 64 + ++/* ++ * On Newer heterogeneous systems from AMD with CPU and GPU nodes connected ++ * via xGMI links, the NON CPU Nodes are enumerated from index 8 ++ */ ++#define NONCPU_NODE_INDEX 8 ++ + /* SMCA Extended error strings */ + /* Load Store */ + static const char * const smca_ls_mce_desc[] = { +@@ -531,6 +537,26 @@ static int find_umc_channel(struct mce_event *e) + { + return EXTRACT(e->ipid, 0, 31) >> 20; + } ++ ++/* ++ * The HBM memory managed by the UMCCH of the noncpu node ++ * can be calculated based on the [15:12]bits of IPID ++ */ ++static int find_hbm_channel(struct mce_event *e) ++{ ++ int umc, tmp; ++ ++ umc = EXTRACT(e->ipid, 0, 31) >> 20; ++ ++ /* ++ * The HBM channel managed by the UMC of the noncpu node ++ * can be calculated based on the [15:12]bits of IPID as follows ++ */ ++ tmp = ((e->ipid >> 12) & 0xf); ++ ++ return (umc % 2) ? tmp + 4 : tmp; ++} ++ + /* Decode extended errors according to Scalable MCA specification */ + static void decode_smca_error(struct mce_event *e) + { +@@ -539,6 +565,7 @@ static void decode_smca_error(struct mce_event *e) + unsigned short xec = (e->status >> 16) & 0x3f; + const struct smca_hwid *s_hwid; + uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63); ++ uint8_t mcatype_instancehi = EXTRACT(e->ipid, 44, 47); + unsigned int csrow = -1, channel = -1; + unsigned int i; + +@@ -548,14 +575,16 @@ static void decode_smca_error(struct mce_event *e) + bank_type = s_hwid->bank_type; + break; + } ++ if (mcatype_instancehi >= NONCPU_NODE_INDEX) ++ bank_type = SMCA_UMC_V2; + } + +- if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) { ++ if (i >= MAX_NR_BANKS) { + strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID"); + return; + } + +- if (bank_type >= N_SMCA_BANK_TYPES) { ++ if (bank_type >= MAX_NR_BANKS) { + strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); + return; + } +@@ -580,6 +609,16 @@ static void decode_smca_error(struct mce_event *e) + mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", + channel, csrow); + } ++ ++ if (bank_type == SMCA_UMC_V2 && xec == 0) { ++ /* The UMCPHY is reported as csrow in case of noncpu nodes */ ++ csrow = find_umc_channel(e) / 2; ++ /* UMCCH is managing the HBM memory */ ++ channel = find_hbm_channel(e); ++ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", ++ channel, csrow); ++ } ++ + } + + int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) diff --git a/SOURCES/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch b/SOURCES/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch new file mode 100644 index 0000000..30cc19e --- /dev/null +++ b/SOURCES/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch @@ -0,0 +1,30 @@ +commit b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4 +Author: DmNosachev +Date: Tue Jun 29 13:48:55 2021 +0300 + + labels/supermicro: added Supermicro X10DRI(-T) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 47ea05f..86e4617 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -81,4 +81,14 @@ Vendor: Supermicro + P2-DIMMC1: 2.2.0; + P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1; + P2-DIMME1: 3.1.0; +- P2-DIMMF1: 3.2.0; +\ No newline at end of file ++ P2-DIMMF1: 3.2.0; ++ ++ Model: X10DRI, X10DRI-T ++ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; ++ P1-DIMMB1: 0.1.0; P1-DIMMB2: 0.1.1; ++ P1-DIMMC1: 0.2.0; P1-DIMMC2: 0.2.1; ++ P1-DIMMD1: 0.3.0; P1-DIMMD2: 0.3.1; ++ P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1; ++ P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; ++ P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; ++ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; +\ No newline at end of file diff --git a/SOURCES/b6a64416ab31b66ce92cabcc7fa1f3c5e9db2e87.patch b/SOURCES/b6a64416ab31b66ce92cabcc7fa1f3c5e9db2e87.patch new file mode 100644 index 0000000..fa55654 --- /dev/null +++ b/SOURCES/b6a64416ab31b66ce92cabcc7fa1f3c5e9db2e87.patch @@ -0,0 +1,208 @@ +commit b6a64416ab31b66ce92cabcc7fa1f3c5e9db2e87 +Author: Avadhut Naik +Date: Thu Aug 31 02:23:48 2023 -0500 + + rasdaemon: Fix SMCA bank type decoding + + On AMD systems with Scalable MCA (SMCA), the (HWID, MCATYPE) tuple from + the MCA_IPID MSR, bits 43:32 and 63:48 respectively, are used for SMCA + bank type decoding. On occurrence of an SMCA error, the cached tuples are + compared against the tuple read from the MCA_IPID MSR to determine the + SMCA bank type. + + Currently however, all high 32 bits of the MCA_IPID register are cached in + the rasdaemon for all SMCA bank types. Bits 47:44 which do not play a part + in bank type decoding are zeroed out. Likewise, when an SMCA error occurs, + all high 32 bits of the MCA_IPID register are read and compared against + the cached values in smca_hwid_mcatypes array. + + This can lead to erroneous bank type decoding since the bits 47:44 are + not guaranteed to be zero. They are either reserved or, on some modern + AMD systems viz. Genoa, denote the InstanceIdHi value. The bits therefore, + should not be associated with SMCA bank type decoding. + + Import the HWID_MCATYPE macro from the kernel to ensure that only the + relevant fields i.e. (HWID, MCATYPE) tuples are used for SMCA bank type + decoding on occurrence of an SMCA error. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index a20f03c..55620e2 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -90,6 +90,12 @@ enum smca_bank_types { + /* Maximum number of MCA banks per CPU. */ + #define MAX_NR_BANKS 64 + ++#define MCI_IPID_MCATYPE 0xFFFF0000 ++#define MCI_IPID_HWID 0xFFF ++ ++/* Obtain HWID_MCATYPE Tuple on SMCA Systems */ ++#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) ++ + /* + * On Newer heterogeneous systems from AMD with CPU and GPU nodes connected + * via xGMI links, the NON CPU Nodes are enumerated from index 8 +@@ -699,76 +705,76 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + /* { bank_type, mcatype_hwid } */ + + /* ZN Core (HWID=0xB0) MCA types */ +- { SMCA_LS, 0x000000B0 }, +- { SMCA_LS_V2, 0x001000B0 }, +- { SMCA_IF, 0x000100B0 }, +- { SMCA_L2_CACHE, 0x000200B0 }, +- { SMCA_DE, 0x000300B0 }, ++ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) }, ++ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) }, ++ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) }, ++ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) }, ++ { SMCA_DE, HWID_MCATYPE(0xB0, 0x3) }, + /* HWID 0xB0 MCATYPE 0x4 is Reserved */ +- { SMCA_EX, 0x000500B0 }, +- { SMCA_FP, 0x000600B0 }, +- { SMCA_L3_CACHE, 0x000700B0 }, ++ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5) }, ++ { SMCA_FP, HWID_MCATYPE(0xB0, 0x6) }, ++ { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) }, + + /* Data Fabric MCA types */ +- { SMCA_CS, 0x0000002E }, +- { SMCA_CS_V2, 0x0002002E }, +- {SMCA_CS_V2_QUIRK, 0x00010000 }, +- { SMCA_PIE, 0x0001002E }, ++ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) }, ++ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) }, ++ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) }, ++ { SMCA_CS_V2_QUIRK, HWID_MCATYPE(0x0, 0x1) }, + + /* Unified Memory Controller MCA type */ +- { SMCA_UMC, 0x00000096 }, +- { SMCA_UMC_QUIRK, 0x00020000 }, ++ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, ++ { SMCA_UMC_QUIRK, HWID_MCATYPE(0x0, 0x2) }, + /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ +- { SMCA_UMC_V2, 0x00010096 }, ++ { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) }, + /* Memory Attached Last Level Cache */ +- { SMCA_MA_LLC, 0x0004002E }, ++ { SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) }, + + /* Parameter Block MCA type */ +- { SMCA_PB, 0x00000005 }, ++ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) }, + + /* Platform Security Processor MCA type */ +- { SMCA_PSP, 0x000000FF }, +- { SMCA_PSP_V2, 0x000100FF }, ++ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) }, ++ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) }, + + /* System Management Unit MCA type */ +- { SMCA_SMU, 0x00000001 }, +- { SMCA_SMU_V2, 0x00010001 }, ++ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) }, ++ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) }, + + /* Microprocessor 5 Unit MCA type */ +- { SMCA_MP5, 0x00020001 }, ++ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) }, + + /* MPDMA MCA Type */ +- { SMCA_MPDMA, 0x00030001 }, ++ { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) }, + + /* Northbridge IO Unit MCA type */ +- { SMCA_NBIO, 0x00000018 }, ++ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) }, + + /* PCI Express Unit MCA type */ +- { SMCA_PCIE, 0x00000046 }, +- { SMCA_PCIE_V2, 0x00010046 }, ++ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, ++ { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, + + /* Ext Global Memory Interconnect PCS MCA type */ +- { SMCA_XGMI_PCS, 0x00000050 }, ++ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, + +- { SMCA_NBIF, 0x0000006C }, ++ { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) }, + +- { SMCA_SHUB, 0x00000080 }, +- { SMCA_SATA, 0x000000A8 }, +- { SMCA_USB, 0x000000AA }, ++ { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) }, ++ { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) }, ++ { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) }, + + /* Ultra Short Reach Data and Control Plane Controller */ +- { SMCA_USR_DP, 0x00000170 }, +- { SMCA_USR_CP, 0x00000180 }, ++ { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) }, ++ { SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) }, + +- { SMCA_GMI_PCS, 0x00000241 }, ++ { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) }, + + /* Ext Global Memory Interconnect PHY MCA type */ +- { SMCA_XGMI_PHY, 0x00000259 }, ++ { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, + + /* WAFL PHY MCA type */ +- { SMCA_WAFL_PHY, 0x00000267 }, ++ { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, + +- { SMCA_GMI_PHY, 0x00000269 }, ++ { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) }, + }; + + struct smca_bank_name { +@@ -862,12 +868,12 @@ static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype) + case 0x10 ... 0x1F: + case 0x60 ... 0x7B: + case 0xA0 ... 0xAF: +- if (*hwid_mcatype == 0x0002002E) +- *hwid_mcatype = 0x00010000; ++ if (*hwid_mcatype == HWID_MCATYPE(0x2E, 0x2)) ++ *hwid_mcatype = HWID_MCATYPE(0x0, 0x1); + break; + case 0x90 ... 0x9F: +- if ((*hwid_mcatype & 0xFF) == 0x00000096) +- *hwid_mcatype = 0x00020000; ++ if (*hwid_mcatype == HWID_MCATYPE(0x96, 0x0)) ++ *hwid_mcatype = HWID_MCATYPE(0x0, 0x2); + break; + default: + break; +@@ -875,8 +881,8 @@ static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype) + } else if (m->family == 0x1A) { + switch (m->model) { + case 0x40 ... 0x4F: +- if (*hwid_mcatype == 0x0002002E) +- *hwid_mcatype = 0x00010000; ++ if (*hwid_mcatype == HWID_MCATYPE(0x2E, 0x2)) ++ *hwid_mcatype = HWID_MCATYPE(0x0, 0x1); + break; + default: + break; +@@ -889,13 +895,17 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m) + { + enum smca_bank_types bank_type; + const char *ip_name; ++ uint32_t mcatype_hwid = 0; + unsigned short xec = (e->status >> 16) & 0x3f; + const struct smca_hwid *s_hwid; +- uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63); ++ uint32_t ipid_high = EXTRACT(e->ipid, 32, 63); + uint8_t mcatype_instancehi = EXTRACT(e->ipid, 44, 47); + unsigned int csrow = -1, channel = -1; + unsigned int i; + ++ mcatype_hwid = HWID_MCATYPE(ipid_high & MCI_IPID_HWID, ++ (ipid_high & MCI_IPID_MCATYPE) >> 16); ++ + fixup_hwid(m, &mcatype_hwid); + + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { diff --git a/SOURCES/c785d309dcbdeb7ecd219975244f3944a8d047e9.patch b/SOURCES/c785d309dcbdeb7ecd219975244f3944a8d047e9.patch new file mode 100644 index 0000000..1d8d01e --- /dev/null +++ b/SOURCES/c785d309dcbdeb7ecd219975244f3944a8d047e9.patch @@ -0,0 +1,37 @@ +commit c785d309dcbdeb7ecd219975244f3944a8d047e9 +Author: Muralidhara M K +Date: Thu Jul 27 10:18:12 2023 +0000 + + rasdaemon: Identify the DIe Number in multidie system + + Some AMD systems have 4 dies in each socket and Die ID represents + whether the error occured on cpu die or gpu die. + Also, respective Die used for FRU identification. + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 54060ee..a20f03c 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -935,10 +935,15 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m) + xec); + + if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_QUIRK) && xec == 0) { +- channel = find_umc_channel(e); +- csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */ +- mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", +- channel, csrow); ++ if ((m->family == 0x19) && (m->model >= 0x90 && m->model <= 0x9f)) { ++ /* MCA_IPID[InstanceIdHi] give the AMD Node Die ID */ ++ mce_snprintf(e->mc_location, "memory_die_id=%d", mcatype_instancehi / 4); ++ } else { ++ channel = find_umc_channel(e); ++ csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */ ++ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", ++ channel, csrow); ++ } + } + + if (bank_type == SMCA_UMC_V2 && xec == 0) { diff --git a/SOURCES/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch b/SOURCES/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch new file mode 100644 index 0000000..d28ce9c --- /dev/null +++ b/SOURCES/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch @@ -0,0 +1,42 @@ +commit d0e0bb3d73c4bc5060da20270a089857bba2a64c +Author: Justin Vreeland +Date: Tue Nov 2 19:51:50 2021 -0700 + + Update ras-mc-ctl manpage to match current options + + Signed-off-by: Justin Vreeland + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/man/ras-mc-ctl.8.in b/man/ras-mc-ctl.8.in +index 26230e0..a605122 100644 +--- a/man/ras-mc-ctl.8.in ++++ b/man/ras-mc-ctl.8.in +@@ -79,9 +79,27 @@ Specify an alternate location for the labels database. + Specify a delay of \fBtime\fR seconds before registering DIMM labels. + Only meaninful if used together with --register-labels. + .TP +-.BI "--layout ++.BI "--layout" + Prints the memory layout as detected by the EDAC driver. Useful to check + if the EDAC driver is properly detecting the memory controller architecture. ++.TP ++.BI "--summary" ++Presents a summary of the logged errors. ++.TP ++.BI "--errors" ++Shows the errors stored at the error database. ++.TP ++.BI "--error-count" ++Shows the corrected and uncorrected error counts using sysfs. ++.TP ++.BI "--vendor-errors-summary="platform-id ++Pressents a summary of the vendor-specific logged errors. ++.TP ++.BI "--vendor-errors="platform-id ++Shows the vendor-specific errors stored in the error database. ++.TP ++.BI "--vendor-platforms" ++Shows the supported platforms with platform-ids for the vendor-specific errors. + + .SH MAINBOARD CONFIGURATION + .PP diff --git a/SOURCES/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch b/SOURCES/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch new file mode 100644 index 0000000..b9eec5a --- /dev/null +++ b/SOURCES/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch @@ -0,0 +1,27 @@ +commit dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:55:54 2021 +0200 + + Add support for multi-arch builds + + Allow building rasdaemon on several architectures: + - x86_64 + - arm 64 + - ppc 64 LE + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +index 747a844..898687c 100644 +--- a/.github/workflows/ci.yml ++++ b/.github/workflows/ci.yml +@@ -9,6 +9,9 @@ jobs: + Ubuntu: + name: Ubuntu + runs-on: ubuntu-latest ++ strategy: ++ matrix: ++ arch: [x64_64, aarch64, ppc64le] + steps: + - uses: actions/checkout@v2 + - name: prepare diff --git a/SOURCES/ec443ec0add059fa897f844349e1a2345d81713c.patch b/SOURCES/ec443ec0add059fa897f844349e1a2345d81713c.patch new file mode 100644 index 0000000..cf778c1 --- /dev/null +++ b/SOURCES/ec443ec0add059fa897f844349e1a2345d81713c.patch @@ -0,0 +1,31 @@ +commit ec443ec0add059fa897f844349e1a2345d81713c +Author: DmNosachev +Date: Tue Jun 29 11:33:10 2021 +0300 + + labels/supermicro: added x11dph-i labels + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 3fd6fee..bfaed93 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -68,3 +68,17 @@ Vendor: Supermicro + P1_DIMM4B: 1.1.1; + P2_DIMM4B: 2.0.1; + P2_DIMM4B: 2.1.1; ++ ++ Model: X11DPH-i ++ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 1.0.0; P1-DIMMD2: 1.0.1; ++ P1-DIMME1: 1.1.0; ++ P1-DIMMF1: 1.2.0; ++ P2-DIMMA1: 2.0.0; P2-DIMMA2: 2.0.1; ++ P2-DIMMB1: 2.1.0; ++ P2-DIMMC1: 2.2.0; ++ P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1; ++ P2-DIMME1: 3.1.0; ++ P2-DIMMF1: 3.2.0; +\ No newline at end of file diff --git a/SOURCES/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch b/SOURCES/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch new file mode 100644 index 0000000..c2732e8 --- /dev/null +++ b/SOURCES/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch @@ -0,0 +1,48 @@ +commit f7cdd720297cd17e405a7170c04df89d1d9536f8 +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:35:55 2021 +0200 + + Add a github workflow for CI automation + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +new file mode 100644 +index 0000000..5b3e757 +--- /dev/null ++++ b/.github/workflows/ci.yml +@@ -0,0 +1,34 @@ ++name: CI ++ ++# Should run only on branches and PR, as "on_tag.yml" will handle tags ++on: ++ push: ++ branches: master test ++ pull_request: ++ branches: master ++ ++jobs: ++ ++# ++# Linux ++# ++ Ubuntu: ++ name: Ubuntu ++ runs-on: ubuntu-20.04 ++ strategy: ++ matrix: ++ arch: [x64_64, aarch64, armv7, ppc64le] ++ steps: ++ - uses: actions/checkout@v2 ++ with: ++ arch: ${{ matrix.arch }} ++ - name: prepare ++ run: | ++ sudo apt-get update ++ sudo apt-get install -y build-essential sqlite3 ++ - name: build ++ run: | ++ autoreconf -vfi ++ ./configure --enable-all ++ make ++ sudo make install diff --git a/SOURCES/fc1dd37d422fc907416afd028514fff59b63ae12.patch b/SOURCES/fc1dd37d422fc907416afd028514fff59b63ae12.patch new file mode 100644 index 0000000..460d2c1 --- /dev/null +++ b/SOURCES/fc1dd37d422fc907416afd028514fff59b63ae12.patch @@ -0,0 +1,30 @@ +commit fc1dd37d422fc907416afd028514fff59b63ae12 +Author: DmNosachev +Date: Wed Jun 30 16:49:18 2021 +0300 + + labels/supermicro: added Supermicro B1DRi + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 373de07..b924a32 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -105,4 +105,14 @@ Vendor: Supermicro + P2-DIMMC1: 2.2.0; + P2-DIMMD1: 3.0.0; + P2-DIMME1: 3.1.0; +- P2-DIMMF1: 3.2.0; +\ No newline at end of file ++ P2-DIMMF1: 3.2.0; ++ ++ Model: B1DRi ++ P1_DIMMA1: 0.0.0; ++ P1_DIMMB1: 0.1.0; ++ P1_DIMMC1: 0.2.0; ++ P1_DIMMD1: 0.3.0; ++ P2_DIMME1: 1.0.0; ++ P2_DIMMF1: 1.1.0; ++ P2_DIMMG1: 1.2.0; ++ P2_DIMMH1: 1.3.0; +\ No newline at end of file diff --git a/SOURCES/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch b/SOURCES/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch new file mode 100644 index 0000000..a549df7 --- /dev/null +++ b/SOURCES/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch @@ -0,0 +1,28 @@ +commit fcdffdcb28ece67ed78e3575a3dce45d9dd4f015 +Author: Mauro Carvalho Chehab +Date: Wed May 26 10:37:52 2021 +0200 + + rasdaemon.spec.in: Fix the description on this example file + + While this is used just to test if building it is OK, better + to keep the logs nice ;-) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in +index 6ef223f..afa4359 100644 +--- a/misc/rasdaemon.spec.in ++++ b/misc/rasdaemon.spec.in +@@ -61,10 +61,10 @@ rm INSTALL %{buildroot}/usr/include/*.h + %changelog + + * Wed May 26 2021 Mauro Carvalho Chehab 0.6.7-1 +-- Bump to version 0.6.5 with several fixes and additions ++- Bump to version 0.6.7 with several fixes and additions + + * Tue Jul 21 2020 Mauro Carvalho Chehab 0.6.6-1 +-- Bump to version 0.6.5 with several fixes, new hip08 events and memory prediction analysis ++- Bump to version 0.6.6 with several fixes, new hip08 events and memory prediction analysis + + * Wed Nov 20 2019 Mauro Carvalho Chehab 0.6.5-1 + - Bump to version 0.6.5 with several fixes and improves PCIe events record diff --git a/SOURCES/labels.patch b/SOURCES/labels.patch new file mode 100644 index 0000000..3eb072e --- /dev/null +++ b/SOURCES/labels.patch @@ -0,0 +1,263 @@ +Add labels directory from upstream + +Labels directory doesn't get exported by tarball releases. + +Signed-off-by: Aristeu Rozanski + +--- + labels/asus | 20 +++++++ + labels/dell | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + labels/supermicro | 70 ++++++++++++++++++++++++ + 3 files changed, 242 insertions(+) + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ rasdaemon-0.6.7/labels/asus 2022-02-08 15:44:53.563362010 -0500 +@@ -0,0 +1,20 @@ ++# RASDAEMON Motherboard DIMM labels Database file. ++# ++# Vendor-name and model-name are found from the program 'dmidecode' ++# labels are found from the silk screen on the motherboard. ++# ++#Vendor: ++# Product: ++# Model: ++#