From ba3234a696f9cbd936b9981640189b9dc722ab05 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Fri, 22 Sep 2023 19:22:33 +0300 Subject: [PATCH] import papi-6.0.0-15.el9 --- .gitignore | 1 + .papi.metadata | 1 + SOURCES/papi-701eventupdate.patch | 796 ++++++++++++++++++++++++++++++ SOURCES/papi-arm64fastread.patch | 637 ++++++++++++++++++++++++ SOURCES/papi-config.patch | 348 +++++++++++++ SOURCES/papi-lto.patch | 124 +++++ SOURCES/papi-nostatic.patch | 30 ++ SOURCES/papi-python3.patch | 10 + SOURCES/papi-rhbz1923967.patch | 23 + SOURCES/papi-thread_init.patch | 106 ++++ SPECS/papi.spec | 496 +++++++++++++++++++ 11 files changed, 2572 insertions(+) create mode 100644 .gitignore create mode 100644 .papi.metadata create mode 100644 SOURCES/papi-701eventupdate.patch create mode 100644 SOURCES/papi-arm64fastread.patch create mode 100644 SOURCES/papi-config.patch create mode 100644 SOURCES/papi-lto.patch create mode 100644 SOURCES/papi-nostatic.patch create mode 100644 SOURCES/papi-python3.patch create mode 100644 SOURCES/papi-rhbz1923967.patch create mode 100644 SOURCES/papi-thread_init.patch create mode 100644 SPECS/papi.spec diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..866657f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/papi-6.0.0-noiozone.tar.gz diff --git a/.papi.metadata b/.papi.metadata new file mode 100644 index 0000000..090cd0f --- /dev/null +++ b/.papi.metadata @@ -0,0 +1 @@ +96415af51f021dcbb71c04ac236037941cf5babc SOURCES/papi-6.0.0-noiozone.tar.gz diff --git a/SOURCES/papi-701eventupdate.patch b/SOURCES/papi-701eventupdate.patch new file mode 100644 index 0000000..46d592b --- /dev/null +++ b/SOURCES/papi-701eventupdate.patch @@ -0,0 +1,796 @@ +commit ae449f73abd0849f05ab3e1f3a64bde0c670c645 +Author: Anthony +Date: Fri Jul 17 12:05:14 2020 -0400 + + Separated the cache preset events of AMD Zen1 and Zen2 and added some more. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8e96adfbd..2325bd4dc 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE + # + CPU,amd64_fam17h + CPU,amd64_fam17h_zen1 +-CPU,amd64_fam17h_zen2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT +@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT + PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS + PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" + PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" ++# Events discovered via CAT ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++# ++# ++CPU,amd64_fam17h_zen2 ++# Events copied from zen1 that also exist on zen2 ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++# Events discovered via CAT ++PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++ + # + # + CPU,Intel architectural PMU +@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD + PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD + ++######################### ++# ARM Fujitsu A64FX # ++######################### ++CPU,arm_a64fx ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++ + # + CPU,mips_74k + # +commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d +Author: Heike Jagode +Date: Thu Sep 24 13:33:38 2020 -0400 + + Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS) + for AMD zen2. + + PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) + explains that FLOP events require MergeEvent support, which was included + in the 5.6 kernel. + + ===>>> Hence, a kernel version 5.6 or greater is required. + + NOTE: without the MergeEvent support in the kernel, + there is no guarantee that the SSE/AVX FLOP + events produce any useful data whatsoever. + + These events have been tested and verified for + scalar flops, SSE, AVX, and FMA: + + (1) for one AVX instruction (e.g. _mm256_add_pd()), + the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns + a count of 4 (in the case of double precision), and + a count of 8 (in the case of single precision). + + (2) for one AVX FMA instruction (e.g. _mm256_macc_pd()), + the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns + a count of 8 (in the case of double precision), and + a count of 16 (in the case of single precision). + + (3) for one SSE instruction (e.g. _mm_mul_pd()), + the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns + a count of 2 (in the case of double precision), and + a count of 4 (in the case of single precision). + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2325bd4dc..2ff3e4d16 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X + +-# +-# ++# New FLOP event on zen2 ++# PPR (under section 2.1.15.3. -- ++# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) ++# explains that FLOP events require MergeEvent support, which was included ++# in the 5.6 kernel. ++# Hence, a kernel version 5.6 or greater is required. ++# NOTE: without the MergeEvent support in the kernel, there is no guarantee ++# that this SSE/AVX FLOP event produces any useful data whatsoever. ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++ ++ + CPU,Intel architectural PMU + CPU,ix86arch + # +commit 35f93252a6e222299c03f2c94912334488e76b02 +Author: Heike Jagode +Date: Thu Sep 24 18:40:59 2020 -0400 + + Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP) + for AMD zen2. + + For unoptimized code (like native MMM), these events may include + non-numeric floating-point instructions, e.g. MOVSD: move or merge + scalar double-precision floating-point value instructions. + + Tested with: + 1) SSE double: _mm_mul_pd / _mm_add_pd + 2) SSE single: _mm_mul_ps / _mm_add_ps + 3) AVX double: _mm256_mul_pd / _mm256_add_pd + 4) AVX single: _mm256_mul_ps / _mm256_add_ps + 5) FMA double: _mm256_macc_pd + 6) FMA single: _mm256_macc_pd + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2ff3e4d16..60a64564d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Floating-point instructions (including non-numeric floating-point instructions, ++# e.g. Move or Merge Scalar Double-Precision Floating-Point values) ++PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 344f6493425d865577508ff32b6f65516b1b4394 +Author: Heike Jagode +Date: Thu Sep 24 19:03:31 2020 -0400 + + Added missing 'PRESET' to csv file. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 60a64564d..724d520f0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) +-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 4616aa717c5301a9a478876661eb8ac1f18c0333 +Author: Heike Jagode +Date: Thu Oct 8 11:36:23 2020 -0400 + + For zen2, since FP_OPS counts both single- and double-prec operations + correctly, we don't need to confuse the user with additional + DP_OPS and SP_OPS events. So, I'm taking them out. + + Same applies for events counting FP instructions. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 724d520f0..9ebf557e1 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + # NOTE: without the MergeEvent support in the kernel, there is no guarantee + # that this SSE/AVX FLOP event produces any useful data whatsoever. + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Since FP_OPS counts both single- and double-prec operations ++# correctly, we don't need to confuse the user with additional ++# DP_OPS and SP_OPS events. So, I'm taking them out. ++#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) + PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# Since FP_INS counts both single- and double-prec instuctions ++# correctly, we don't need to confuse the user with additional ++# VEC_DP and VEC_SP events. So, I'm taking them out. ++#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 274219e85ba8adcd2e9c78507adf7edb05b71daa +Author: Sebastian Mobo +Date: Thu Oct 8 13:40:21 2020 -0400 + + Added instruction-cache preset events for the Zen2. + + Signed-off-by: Anthony + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 9ebf557e1..fd75f9371 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS + PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X +- ++# ++PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++# ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S + # New FLOP event on zen2 + # PPR (under section 2.1.15.3. -- + # https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) +commit b87ac4beda096086e0040f8ec1b44c4791a9739c +Author: Masahiko, Yamada +Date: Mon Dec 14 14:06:22 2020 +0900 + + Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index fd75f9371..164f05641 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL + + # +commit 869864f813f0681b5c9a4b65de2135c8708a2afb +Author: Masahiko, Yamada +Date: Mon Dec 14 19:34:59 2020 +0900 + + Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS). + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 164f05641..9192b1041 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD + ######################### + CPU,arm_a64fx + # ++PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT ++PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES + PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED + PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE ++PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE +-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL ++PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC + + # + CPU,mips_74k +commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de +Author: Masahiko, Yamada +Date: Tue Dec 15 13:43:43 2020 +0900 + + modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 9192b1041..7b4ceb674 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED + PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC + PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC + PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED + PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND + PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63 +Author: Masahiko, Yamada +Date: Fri Feb 12 15:01:21 2021 +0900 + + remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx + + There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts. + I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file. + I will issue the pullrequest again once I have identified how to handle the overcount. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 7b4ceb674..0f5ec8344 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED + PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND + PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL ++#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL +commit 340f68940234f2db181147fc249907b4f1293e62 +Author: Masahiko, Yamada +Date: Tue Feb 16 17:16:24 2021 +0900 + + remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx + + PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH, + so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 0f5ec8344..4ef647959 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE +-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL ++#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE ++#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +commit 02f34baafb868d183f21bebfd3c46574847b9929 +Author: Swarup Sahoo +Date: Tue May 18 02:51:56 2021 +0530 + + Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip + + Signed-off-by: Swarup Sahoo + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..d9e9da8a3 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X + # VEC_DP and VEC_SP events. So, I'm taking them out. + #PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + #PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# ++# ++CPU,amd64_fam19h_zen3 ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K ++PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH ++PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S ++# RETIRED_SSE_AVX_FLOPS requires MergeEvent support. ++PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS ++PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS ++PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS ++PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS + + + CPU,Intel architectural PMU +commit 6964aa356fa606f320c7b871123aceb5c1f21999 +Author: Masahiko, Yamada +Date: Tue Aug 24 14:17:29 2021 +0900 + + Fix the PAPI_FUL_CCY setting for a64fx + + In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT + + The correct settings are:. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..74deb712f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF + PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT +-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT ++PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f +Author: Masahiko, Yamada +Date: Fri Mar 4 15:41:30 2022 +0900 + + Add PAPI idle-related preset events for a64fx + + For a64fx, add four PAPI idle-related preset events + (PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL). + + PAPI_BRU_IDL = BR_COMP_WAIT + PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT + PAPI_FPU_IDL = FL_COMP_WAIT + PAPI_LSU_IDL = LD_COMP_WAIT + + The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT, + and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting" + on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:. + https://github.com/fujitsu/A64FX/blob/master/doc + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74deb712f..1cd498e91 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT + PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT ++PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT ++PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT ++PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT ++PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +commit 3c5364839f583185c1e8dca58d5fe36c9ec82876 +Author: Daniel Barry +Date: Tue Aug 30 23:17:30 2022 +0000 + + papi_avail: add presets for Intel Ice Lake SP + + Define preset events for the Intel Ice Lake SP processor. + These presets have been verified using the Counter Analysis Toolkit benchmarks. + + These changes have been tested on the Intel Ice Lake architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index a013f58af..8f23e030c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD + + # End of hsw,bdw,skl,clx list + # ++ ++# Intel Ice Lake SP events ++CPU,icx ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++# Loads and stores ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES ++# L1 cache ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD ++# L2 cache ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT ++PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT ++#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD ++# L3 cache ++PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD ++PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS ++PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES ++# SMP ++PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++#FLOPs ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++# End of icx list ++ + # + # Intel MIC / Xeon-Phi / Knights Landing + # Intel Knights Mill +commit d4da29b07befb9f7c11e351dbfef835b74cdd67a +Author: John Linford +Date: Mon Mar 20 17:11:37 2023 -0500 + + Add minimal events for Arm Neoverse N1 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8f23e030c..a4d5a9756 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX + PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC + ++######################### ++# ARM Neoverse N1 # ++######################### ++CPU,arm_n1 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # +commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6 +Author: John Linford +Date: Mon Mar 20 14:54:41 2023 -0500 + + Add minimal events for Arm Neoverse V1 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index a4d5a9756..207d6d1db 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse V1 # ++######################### ++CPU,arm_v1 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # +commit e911f951115bb551925c5b07e7f5b721d5fe3bbe +Author: John Linford +Date: Mon Mar 20 17:14:18 2023 -0500 + + Add minimal events for Arm Neoverse N2 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 207d6d1db..d27d956c1 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse N2 # ++######################### ++CPU,arm_n2 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + ######################### + # ARM Neoverse V1 # + ######################### +commit 05dc580247cb18fca882a33d8e356d79032d2ed1 +Author: John Linford +Date: Mon Mar 20 17:08:35 2023 -0500 + + Add minimal events for Arm Neoverse V2 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index d27d956c1..549e337c7 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse V2 # ++######################### ++CPU,arm_v2 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # diff --git a/SOURCES/papi-arm64fastread.patch b/SOURCES/papi-arm64fastread.patch new file mode 100644 index 0000000..986743b --- /dev/null +++ b/SOURCES/papi-arm64fastread.patch @@ -0,0 +1,637 @@ +commit 9a1f2d897f4086bc1d60102de984c849445b5e97 +Author: Masahiko, Yamada +Date: Tue Feb 21 19:18:40 2023 +0900 + + PAPI_read performance improvement for the arm64 processor + + We developed PAPI_read performance improvements for the arm64 processor + with a plan to port direct user space PMU register access processing from + libperf to the papi library without using libperf. + + The workaround has been implemented that stores the counter value at the + time of reset and subtracts the counter value at the time of reset from + the read counter value at the next read. + When reset processing is called, the value of pc->offset is cleared to 0, + and only the counter value read from the PMU counter is referenced. + There was no problem with the counters FAILED with negative values during + the multiplex+reset test, except for sdsc2-mpx and sdsc4-mpx. + To apply the workaround only during reset, the _pe_reset function call sets + the reset_flag and the next _pe_start function call clears the reset_flag. + The workaround works if the mmap_read_self function is called between calls + to the _pe_reset function and the next call to the _pe_start function. + + Switching PMU register direct access from user space from OFF to ON is done by + changing the setting of the kernel variable "/proc/sys/kernel/perf_user_access". + + Setting PMU Register Direct Access from User Space Off + $ echo 0 > /proc/sys/kernel/perf_user_access + $ cat /proc/sys/kernel/perf_user_access + 0 + + Setting PMU Register Direct Access from User Space ON + $ echo 1 > /proc/sys/kernel/perf_user_access + $ cat /proc/sys/kernel/perf_user_access + 1 + + Performance of PAPI_read has been improved as expected from the execution + result of the papi_cost command. + + Improvement effect of switching PMU register direct access from user space + from OFF to ON + + Total cost for PAPI_read (2 counters) over 1000000 iterations + min cycles: 689 -> 28 + max cycles: 3876 -> 1323 + mean cycles: 724.471979 -> 28.888076 + + Total cost for PAPI_read_ts (2 counters) over 1000000 iterations + min cycles: 693 -> 29 + max cycles: 4066 -> 3718 + mean cycles: 726.753003 -> 29.977226 + + Total cost for PAPI_read (1 derived_[add|sub] counter) over 1000000 iterations + min cycles: 698 -> 28 + max cycles: 7406 -> 2346 + mean cycles: 728.527079 -> 28.880691 + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c +index b4877d18e..331288c55 100644 +--- a/src/components/perf_event/perf_event.c ++++ b/src/components/perf_event/perf_event.c +@@ -682,6 +682,12 @@ set_up_mmap( pe_control_t *ctl, int evt_idx) + + + ++/* Request user access for arm64 */ ++static inline void arm64_request_user_access(struct perf_event_attr *hw_event) ++{ ++ hw_event->config1=0x2; /* Request user access */ ++} ++ + /* Open all events in the control state */ + static int + open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) +@@ -735,6 +741,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) + if (( i == 0 ) || (ctl->multiplexed)) { + ctl->events[i].attr.pinned = !ctl->multiplexed; + ctl->events[i].attr.disabled = 1; ++#if defined(__aarch64__) ++ if (_perf_event_vector.cmp_info.fast_counter_read) { ++ arm64_request_user_access(&ctl->events[i].attr); ++ } ++#endif + ctl->events[i].group_leader_fd=-1; + ctl->events[i].attr.read_format = get_read_format( + ctl->multiplexed, +@@ -743,6 +754,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) + } else { + ctl->events[i].attr.pinned=0; + ctl->events[i].attr.disabled = 0; ++#if defined(__aarch64__) ++ if (_perf_event_vector.cmp_info.fast_counter_read) { ++ arm64_request_user_access(&ctl->events[i].attr); ++ } ++#endif + ctl->events[i].group_leader_fd=ctl->events[0].event_fd; + ctl->events[i].attr.read_format = get_read_format( + ctl->multiplexed, +@@ -1047,8 +1063,16 @@ _pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) + + /* We need to reset all of the events, not just the group leaders */ + for( i = 0; i < pe_ctl->num_events; i++ ) { +- ret = ioctl( pe_ctl->events[i].event_fd, +- PERF_EVENT_IOC_RESET, NULL ); ++ if (_perf_event_vector.cmp_info.fast_counter_read) { ++ ret = ioctl( pe_ctl->events[i].event_fd, ++ PERF_EVENT_IOC_RESET, NULL ); ++ pe_ctl->reset_counts[i] = mmap_read_reset_count( ++ pe_ctl->events[i].mmap_buf); ++ pe_ctl->reset_flag = 1; ++ } else { ++ ret = ioctl( pe_ctl->events[i].event_fd, ++ PERF_EVENT_IOC_RESET, NULL ); ++ } + if ( ret == -1 ) { + PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) " + "returned error, Linux says: %s", +@@ -1119,6 +1143,8 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + for ( i = 0; i < pe_ctl->num_events; i++ ) { + + count = mmap_read_self(pe_ctl->events[i].mmap_buf, ++ pe_ctl->reset_flag, ++ pe_ctl->reset_counts[i], + &enabled,&running); + + if (count==0xffffffffffffffffULL) { +@@ -1438,6 +1464,10 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) + pe_ctl->events[i].event_fd); + ret=ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_ENABLE, NULL) ; ++ if (_perf_event_vector.cmp_info.fast_counter_read) { ++ pe_ctl->reset_counts[i] = 0LL; ++ pe_ctl->reset_flag = 0; ++ } + + /* ioctls always return -1 on failure */ + if (ret == -1) { +@@ -2297,6 +2327,29 @@ _pe_shutdown_component( void ) { + } + + ++#if defined(__aarch64__) ++/* Check access PMU counter from User space for arm64 support */ ++static int _pe_detect_arm64_access(void) { ++ ++ FILE *fff; ++ int perf_user_access; ++ int retval; ++ ++ fff=fopen("/proc/sys/kernel/perf_user_access","r"); ++ if (fff==NULL) { ++ return 0; ++ } ++ ++ /* 1 means you can access PMU counter from User space */ ++ /* 0 means you can not access PMU counter from User space */ ++ retval=fscanf(fff,"%d",&perf_user_access); ++ if (retval!=1) fprintf(stderr,"Error reading /proc/sys/kernel/perf_user_access\n"); ++ fclose(fff); ++ ++ return perf_user_access; ++} ++#endif ++ + /* Check the mmap page for rdpmc support */ + static int _pe_detect_rdpmc(void) { + +@@ -2305,10 +2358,13 @@ static int _pe_detect_rdpmc(void) { + void *addr; + struct perf_event_mmap_page *our_mmap; + int page_size=getpagesize(); ++#if defined(__aarch64__) ++ int retval; ++#endif + +-#if defined(__i386__) || defined (__x86_64__) ++#if defined(__i386__) || defined (__x86_64__) || defined(__aarch64__) + #else +- /* We only support rdpmc on x86 for now */ ++ /* We support rdpmc on x86 and arm64 for now */ + return 0; + #endif + +@@ -2318,12 +2374,23 @@ static int _pe_detect_rdpmc(void) { + return 0; + } + ++#if defined(__aarch64__) ++ /* Detect if we can use PMU counter from User space for arm64 */ ++ retval = _pe_detect_arm64_access(); ++ if (retval == 0) { ++ return 0; ++ } ++#endif ++ + /* Create a fake instructions event so we can read a mmap page */ + memset(&pe,0,sizeof(struct perf_event_attr)); + + pe.type=PERF_TYPE_HARDWARE; + pe.size=sizeof(struct perf_event_attr); + pe.config=PERF_COUNT_HW_INSTRUCTIONS; ++#if defined(__aarch64__) ++ arm64_request_user_access(&pe); ++#endif + pe.exclude_kernel=1; + pe.disabled=1; + +diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h +index 0c50ab9f0..cfba8ac49 100644 +--- a/src/components/perf_event/perf_event_lib.h ++++ b/src/components/perf_event/perf_event_lib.h +@@ -36,6 +36,8 @@ typedef struct { + pid_t tid; /* thread we are monitoring */ + pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]; + long long counts[PERF_EVENT_MAX_MPX_COUNTERS]; ++ unsigned int reset_flag; ++ long long reset_counts[PERF_EVENT_MAX_MPX_COUNTERS]; + } pe_control_t; + + +diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h +index 92dca4fd0..097286865 100644 +--- a/src/components/perf_event/perf_helpers.h ++++ b/src/components/perf_event/perf_helpers.h +@@ -29,6 +29,74 @@ sys_perf_event_open( struct perf_event_attr *hw_event, + return ret; + } + ++ ++/* ++ * We define u64 as uint64_t for every architecture ++ * so that we can print it with "%"PRIx64 without getting warnings. ++ * ++ * typedef __u64 u64; ++ * typedef __s64 s64; ++ */ ++typedef uint64_t u64; ++typedef int64_t s64; ++ ++typedef __u32 u32; ++typedef __s32 s32; ++ ++typedef __u16 u16; ++typedef __s16 s16; ++ ++typedef __u8 u8; ++typedef __s8 s8; ++ ++ ++#ifdef __SIZEOF_INT128__ ++static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) ++{ ++ return (u64)(((unsigned __int128)a * b) >> shift); ++} ++ ++#else ++ ++#ifdef __i386__ ++static inline u64 mul_u32_u32(u32 a, u32 b) ++{ ++ u32 high, low; ++ ++ asm ("mull %[b]" : "=a" (low), "=d" (high) ++ : [a] "a" (a), [b] "rm" (b) ); ++ ++ return low | ((u64)high) << 32; ++} ++#else ++static inline u64 mul_u32_u32(u32 a, u32 b) ++{ ++ return (u64)a * b; ++} ++#endif ++ ++static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) ++{ ++ u32 ah, al; ++ u64 ret; ++ ++ al = a; ++ ah = a >> 32; ++ ++ ret = mul_u32_u32(al, b) >> shift; ++ if (ah) ++ ret += mul_u32_u32(ah, b) << (32 - shift); ++ ++ return ret; ++} ++ ++#endif /* __SIZEOF_INT128__ */ ++ ++#ifndef ARRAY_SIZE ++#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) ++#endif ++ ++ + #if defined(__x86_64__) || defined(__i386__) + + +@@ -52,19 +120,140 @@ static inline unsigned long long rdpmc(unsigned int counter) { + + #define barrier() __asm__ volatile("" ::: "memory") + ++ ++#elif defined(__aarch64__) ++ ++/* Indirect stringification. Doing two levels allows the parameter to be a ++ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) ++ * converts to "bar". ++ */ ++ ++#define __stringify_1(x...) #x ++#define __stringify(x...) __stringify_1(x) ++ ++#define read_sysreg(r) ({ \ ++ u64 __val; \ ++ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ ++ __val; \ ++}) ++ ++static u64 read_pmccntr(void) ++{ ++ return read_sysreg(pmccntr_el0); ++} ++ ++#define PMEVCNTR_READ(idx) \ ++ static u64 read_pmevcntr_##idx(void) { \ ++ return read_sysreg(pmevcntr##idx##_el0); \ ++ } ++ ++PMEVCNTR_READ(0); ++PMEVCNTR_READ(1); ++PMEVCNTR_READ(2); ++PMEVCNTR_READ(3); ++PMEVCNTR_READ(4); ++PMEVCNTR_READ(5); ++PMEVCNTR_READ(6); ++PMEVCNTR_READ(7); ++PMEVCNTR_READ(8); ++PMEVCNTR_READ(9); ++PMEVCNTR_READ(10); ++PMEVCNTR_READ(11); ++PMEVCNTR_READ(12); ++PMEVCNTR_READ(13); ++PMEVCNTR_READ(14); ++PMEVCNTR_READ(15); ++PMEVCNTR_READ(16); ++PMEVCNTR_READ(17); ++PMEVCNTR_READ(18); ++PMEVCNTR_READ(19); ++PMEVCNTR_READ(20); ++PMEVCNTR_READ(21); ++PMEVCNTR_READ(22); ++PMEVCNTR_READ(23); ++PMEVCNTR_READ(24); ++PMEVCNTR_READ(25); ++PMEVCNTR_READ(26); ++PMEVCNTR_READ(27); ++PMEVCNTR_READ(28); ++PMEVCNTR_READ(29); ++PMEVCNTR_READ(30); ++ ++/* ++ * Read a value direct from PMEVCNTR ++ */ ++static u64 rdpmc(unsigned int counter) ++{ ++ static u64 (* const read_f[])(void) = { ++ read_pmevcntr_0, ++ read_pmevcntr_1, ++ read_pmevcntr_2, ++ read_pmevcntr_3, ++ read_pmevcntr_4, ++ read_pmevcntr_5, ++ read_pmevcntr_6, ++ read_pmevcntr_7, ++ read_pmevcntr_8, ++ read_pmevcntr_9, ++ read_pmevcntr_10, ++ read_pmevcntr_11, ++ read_pmevcntr_13, ++ read_pmevcntr_12, ++ read_pmevcntr_14, ++ read_pmevcntr_15, ++ read_pmevcntr_16, ++ read_pmevcntr_17, ++ read_pmevcntr_18, ++ read_pmevcntr_19, ++ read_pmevcntr_20, ++ read_pmevcntr_21, ++ read_pmevcntr_22, ++ read_pmevcntr_23, ++ read_pmevcntr_24, ++ read_pmevcntr_25, ++ read_pmevcntr_26, ++ read_pmevcntr_27, ++ read_pmevcntr_28, ++ read_pmevcntr_29, ++ read_pmevcntr_30, ++ read_pmccntr ++ }; ++ ++ if (counter < ARRAY_SIZE(read_f)) ++ return (read_f[counter])(); ++ ++ return 0; ++} ++ ++static u64 rdtsc(void) { return read_sysreg(cntvct_el0); } ++ ++#define barrier() asm volatile("dmb ish" : : : "memory") ++ ++#endif ++ ++#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) ++ ++static inline u64 adjust_cap_usr_time_short(u64 a, u64 b, u64 c) ++{ ++ u64 ret; ++ ret = b + ((a - b) & c); ++ return ret; ++} ++ + /* based on the code in include/uapi/linux/perf_event.h */ + static inline unsigned long long mmap_read_self(void *addr, ++ int user_reset_flag, ++ unsigned long long reset, + unsigned long long *en, + unsigned long long *ru) { + + struct perf_event_mmap_page *pc = addr; + +- uint32_t seq, time_mult, time_shift, index, width; ++ uint32_t seq, time_mult = 0, time_shift = 0, index, width; + int64_t count; + uint64_t enabled, running; +- uint64_t cyc, time_offset; ++ uint64_t cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL; + int64_t pmc = 0; +- uint64_t quot, rem; + uint64_t delta = 0; + + +@@ -96,12 +285,11 @@ static inline unsigned long long mmap_read_self(void *addr, + time_mult = pc->time_mult; + time_shift = pc->time_shift; + +- quot=(cyc>>time_shift); +- rem = cyc & (((uint64_t)1 << time_shift) - 1); +- delta = time_offset + (quot * time_mult) + +- ((rem * time_mult) >> time_shift); ++ if (pc->cap_user_time_short) { ++ time_cycles = pc->time_cycles; ++ time_mask = pc->time_mask; ++ } + } +- enabled+=delta; + + /* actually do the measurement */ + +@@ -116,8 +304,9 @@ static inline unsigned long long mmap_read_self(void *addr, + /* numbers which break if an IOC_RESET is done */ + width = pc->pmc_width; + count = pc->offset; +- count<<=(64-width); +- count>>=(64-width); ++ if (user_reset_flag == 1) { ++ count = 0; ++ } + + /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */ + /* while actual perf_event.h has cap_user_rdpmc */ +@@ -130,14 +319,14 @@ static inline unsigned long long mmap_read_self(void *addr, + pmc = rdpmc(index-1); + + /* sign extend result */ ++ if (user_reset_flag == 1) { ++ pmc-=reset; ++ } + pmc<<=(64-width); + pmc>>=(64-width); + + /* add current count into the existing kernel count */ + count+=pmc; +- +- /* Only adjust if index is valid */ +- running+=delta; + } else { + /* Falling back because rdpmc not supported */ + /* for this event. */ +@@ -148,14 +337,66 @@ static inline unsigned long long mmap_read_self(void *addr, + + } while (pc->lock != seq); + ++ if (enabled != running) { ++ ++ /* Adjust for cap_usr_time_short, a nop if not */ ++ cyc = adjust_cap_usr_time_short(cyc, time_cycles, time_mask); ++ ++ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); ++ ++ enabled+=delta; ++ if (index) ++ /* Only adjust if index is valid */ ++ running+=delta; ++ } ++ + if (en) *en=enabled; + if (ru) *ru=running; + + return count; + } + ++static inline unsigned long long mmap_read_reset_count(void *addr) { ++ ++ struct perf_event_mmap_page *pc = addr; ++ uint32_t seq, index; ++ uint64_t count = 0; ++ ++ if (pc == NULL) { ++ return count; ++ } ++ ++ do { ++ /* The barrier ensures we get the most up to date */ ++ /* version of the pc->lock variable */ ++ ++ seq=pc->lock; ++ barrier(); ++ ++ /* actually do the measurement */ ++ ++ /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */ ++ /* while actual perf_event.h has cap_user_rdpmc */ ++ ++ /* Index of register to read */ ++ /* 0 means stopped/not-active */ ++ /* Need to subtract 1 to get actual index to rdpmc() */ ++ index = pc->index; ++ ++ if (pc->cap_usr_rdpmc && index) { ++ /* Read counter value */ ++ count = rdpmc(index-1); ++ } ++ barrier(); ++ ++ } while (pc->lock != seq); ++ ++ return count; ++} ++ + #else + static inline unsigned long long mmap_read_self(void *addr, ++ int user_reset_flag, + unsigned long long *en, + unsigned long long *ru) { + +commit 693dd5c014d1f0b9a3eae63de051389ed8eb338b +Author: Giuseppe Congiu +Date: Tue Feb 21 07:46:14 2023 -0500 + + perf_event: bug fix in mmap_read_self + + Commit 9a1f2d897 broke the perf_event component for power cpus. The + mmap_read_self function is missing one argument. This patch restores the + missing argument in the function. + +diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h +index 097286865..7ad3524f0 100644 +--- a/src/components/perf_event/perf_helpers.h ++++ b/src/components/perf_event/perf_helpers.h +@@ -397,6 +397,7 @@ static inline unsigned long long mmap_read_reset_count(void *addr) { + #else + static inline unsigned long long mmap_read_self(void *addr, + int user_reset_flag, ++ unsigned long long reset, + unsigned long long *en, + unsigned long long *ru) { + +commit 1b3e75b7f11c7e2b7c590948216d6aaeec299010 +Author: Giuseppe Congiu +Date: Tue Feb 21 14:21:03 2023 +0100 + + perf_event: add missing mmap_read_reset_count for non default cpus + + Power cpus do not have a version of mmap_read_reset_count. Implement the + missing function. + +diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h +index 7ad3524f0..73e82c8ae 100644 +--- a/src/components/perf_event/perf_helpers.h ++++ b/src/components/perf_event/perf_helpers.h +@@ -409,6 +409,11 @@ static inline unsigned long long mmap_read_self(void *addr, + return (unsigned long long)(-1); + } + ++static inline unsigned long long mmap_read_reset_count(void *addr __attribute__((unused))) { ++ ++ return (unsigned long long)(-1); ++} ++ + #endif + + /* These functions are based on builtin-record.c in the */ +commit 37d0c77b7b4d00a958dff50dc715cf63e0cd6084 +Author: Giuseppe Congiu +Date: Tue Feb 21 14:22:53 2023 +0100 + + perf_event: used unused attribute in mmap_read_self + +diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h +index 73e82c8ae..59c8a2fc8 100644 +--- a/src/components/perf_event/perf_helpers.h ++++ b/src/components/perf_event/perf_helpers.h +@@ -395,16 +395,11 @@ static inline unsigned long long mmap_read_reset_count(void *addr) { + } + + #else +-static inline unsigned long long mmap_read_self(void *addr, +- int user_reset_flag, +- unsigned long long reset, +- unsigned long long *en, +- unsigned long long *ru) { +- +- (void)addr; +- +- *en=0; +- *ru=0; ++static inline unsigned long long mmap_read_self(void *addr __attribute__((unused)), ++ int user_reset_flag __attribute__((unused)), ++ unsigned long long reset __attribute__((unused)), ++ unsigned long long *en __attribute__((unused)), ++ unsigned long long *ru __attribute__((unused))) { + + return (unsigned long long)(-1); + } diff --git a/SOURCES/papi-config.patch b/SOURCES/papi-config.patch new file mode 100644 index 0000000..9b34cf5 --- /dev/null +++ b/SOURCES/papi-config.patch @@ -0,0 +1,348 @@ +commit 38290c41abbb105ca198411ec3c466ac027f5b8f +Author: Frank Winkler +Date: Fri Apr 24 16:18:22 2020 +0200 + + Fixed configure options for shared and static builds. + + 1) --with-static-lib=no (force PAPI to build shared libraries and tools) + 2) --with-shlib-tools (use internal libpfm via rpath-link) + +diff --git a/src/configure.in b/src/configure.in +index 3cf47edc1..1f58f7c8e 100644 +--- a/src/configure.in ++++ b/src/configure.in +@@ -200,9 +200,13 @@ else + AC_MSG_ERROR([cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl]) + fi + fi ++ ++# Disable LDL for static builds ++# if test "x${STATIC}" = "x"; then ++# LDL="" ++# fi + AC_SUBST(LDL) +- +- ++ + if test "$OS" = "CLE"; then + virtualtimer=times + tls=__thread +@@ -827,10 +831,6 @@ AC_ARG_WITH(static_tools, + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) + +-if test "$static_lib" = "no"; then +- AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) +-fi +- + AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) + AC_ARG_WITH(shlib_tools, + [ --with-shlib-tools Specify linking with papi library of tests and utilities], +@@ -839,6 +839,14 @@ AC_ARG_WITH(shlib_tools, + [shlib_tools=no + AC_MSG_RESULT(no)]) + ++if test "$static_lib" = "no"; then ++ shlib_tools=yes ++fi ++ ++if test "$static_lib" = "no" -a "$shlib_tools" = "no"; then ++ AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) ++fi ++ + if test "$shlib_tools" = "yes"; then + if test "$shared_lib" != "yes"; then + AC_MSG_ERROR(Building static but specified shared linking for tests and utilities) +@@ -847,6 +855,8 @@ if test "$shlib_tools" = "yes"; then + AC_MSG_ERROR([Building shared but specified static linking]) + fi + LINKLIB='$(SHLIB)' ++ #WORKAROUND: if libpfm cannot be found at link time ++ LDFLAGS="$LDFLAGS -Wl,-rpath-link,$PWD/libpfm4/lib" + elif test "$shlib_tools" = "no"; then + if test "$static_lib" != "yes"; then + AC_MSG_ERROR([Building shared but specified static linking for tests and utilities]) +commit d6f4e34d083f18cfdba38dd5e4bbfb2a580b8a9e +Author: Frank Winkler +Date: Fri Apr 24 16:38:18 2020 +0200 + + Another test for "--with-static-tools". + +diff --git a/src/configure.in b/src/configure.in +index 1f58f7c8e..e8d769578 100644 +--- a/src/configure.in ++++ b/src/configure.in +@@ -201,10 +201,6 @@ else + fi + fi + +-# Disable LDL for static builds +-# if test "x${STATIC}" = "x"; then +-# LDL="" +-# fi + AC_SUBST(LDL) + + if test "$OS" = "CLE"; then +@@ -831,6 +827,11 @@ AC_ARG_WITH(static_tools, + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) + ++# Disable LDL for static builds ++# if test "$STATIC" = "-static"; then ++# LDL="" ++# fi ++ + AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) + AC_ARG_WITH(shlib_tools, + [ --with-shlib-tools Specify linking with papi library of tests and utilities], +commit 1c333c9954b872cda1b4d873fa81b14ec58a58a7 +Author: Frank Winkler +Date: Thu Apr 30 18:51:34 2020 +0200 + + Fixed static build. + - SDE component is disabled + - "ctest" shlib is disabled + +diff --git a/src/configure.in b/src/configure.in +index e8d769578..0eee98ea1 100644 +--- a/src/configure.in ++++ b/src/configure.in +@@ -827,10 +827,11 @@ AC_ARG_WITH(static_tools, + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) + +-# Disable LDL for static builds +-# if test "$STATIC" = "-static"; then +-# LDL="" +-# fi ++# Disable LDL AND SDE for static builds ++if test "$STATIC" = "-static"; then ++ LDL="" ++ SDE_ENABLED= ++fi + + AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) + AC_ARG_WITH(shlib_tools, +@@ -1768,6 +1769,7 @@ for comp in $components; do + if test "x$comp" = "xsde" ; then + LDFLAGS="$LDFLAGS $LRT" + LIBS="$LIBS $LRT" ++ SDE_ENABLED=1 + fi + done + +@@ -1862,6 +1864,7 @@ AC_SUBST(BGP_SYSDIR) + AC_SUBST(BITFLAGS) + AC_SUBST(COMPONENT_RULES) + AC_SUBST(COMPONENTS) ++AC_SUBST(SDE_ENABLED) + AC_SUBST(FTEST_TARGETS) + AC_SUBST(HAVE_NO_OVERRIDE_INIT) + AC_SUBST(BGPM_INSTALL_DIR) +diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies +index b7c1963d7..44e19b398 100644 +--- a/src/ctests/Makefile.recipies ++++ b/src/ctests/Makefile.recipies +@@ -11,7 +11,11 @@ MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \ + MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin + MPI = mpi_hl mpi_omp_hl \ + mpifirst ++ ++ifeq ($(STATIC),) + SHARED = shlib ++endif ++ + SERIAL = serial_hl serial_hl_ll_comb\ + all_events all_native_events branches calibrate case1 case2 \ + cmpinfo code2name derived describe destroy disable_component \ +@@ -344,8 +348,10 @@ case2: case2.c $(TESTLIB) $(PAPILIB) + low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level + ++ifeq ($(STATIC),) + shlib: shlib.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) ++endif + + exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exeinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exeinfo +diff --git a/src/ctests/Makefile.target.in b/src/ctests/Makefile.target.in +index edc04f1b7..af64e157c 100644 +--- a/src/ctests/Makefile.target.in ++++ b/src/ctests/Makefile.target.in +@@ -10,6 +10,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir) -I$(validationlibdir) + LIBDIR = @libdir@ + LIBRARY = @LIBRARY@ + SHLIB = @SHLIB@ ++STATIC = @STATIC@ + PAPILIB = ../@LINKLIB@ + TESTLIB = $(testlibdir)/libtestlib.a + LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@ +diff --git a/src/utils/Makefile b/src/utils/Makefile +index 4abfd6cb8..64a2b8f9f 100644 +--- a/src/utils/Makefile ++++ b/src/utils/Makefile +@@ -48,8 +48,13 @@ papi_mem_info: papi_mem_info.o $(PAPILIB) + papi_multiplex_cost: papi_multiplex_cost.o $(PAPILIB) cost_utils.o + $(CC) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm $(LDFLAGS) + ++ifneq ($(SDE_ENABLED),) + papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o papi_sde_interface.o + $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) papi_sde_interface.o ++else ++papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o ++ $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) ++endif + + papi_version: papi_version.o $(PAPILIB) + $(CC) -o papi_version papi_version.o $(PAPILIB) $(LDFLAGS) +@@ -65,8 +70,10 @@ cost_utils.o: ../testlib/papi_test.h cost_utils.c + print_header.o: print_header.h print_header.c + $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c print_header.c + ++ifneq ($(SDE_ENABLED),) + papi_sde_interface.o: papi_sde_interface.c + $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c papi_sde_interface.c ++endif + + clean: + rm -f *.o *.stderr *.stdout core *~ $(ALL) +diff --git a/src/utils/Makefile.target.in b/src/utils/Makefile.target.in +index bcdbe94e9..9c76b37af 100644 +--- a/src/utils/Makefile.target.in ++++ b/src/utils/Makefile.target.in +@@ -9,6 +9,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir) + LIBDIR = @libdir@ + LIBRARY = @LIBRARY@ + SHLIB = @SHLIB@ ++SDE_ENABLED = @SDE_ENABLED@ + PAPILIB = ../@LINKLIB@ + TESTLIB = $(testlibdir)/libtestlib.a + LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@ +diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c +index ae6dbb9e5..902ed7996 100644 +--- a/src/utils/papi_native_avail.c ++++ b/src/utils/papi_native_avail.c +@@ -51,8 +51,9 @@ + + #include "papi.h" + #include "print_header.h" ++#ifdef SDE_ENABLED + #include "components/sde/interface/papi_sde_interface.h" +- ++#endif + #define EVT_LINE 80 + #define EVT_LINE_BUF_SIZE 4096 + +@@ -84,7 +85,9 @@ print_help( char **argv ) + printf( "\nGeneral command options:\n" ); + printf( "\t-h, --help print this help message\n" ); + printf( "\t-c, --check attempts to add each event\n"); ++#ifdef SDE_ENABLED + printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" ); ++#endif + printf( "\t-e EVENTNAME display detailed information about named native event\n" ); + printf( "\t-i EVENTSTR include only event names that contain EVENTSTR\n" ); + printf( "\t-x EVENTSTR exclude any event names that contain EVENTSTR\n" ); +@@ -368,6 +371,7 @@ parse_event_qualifiers( PAPI_event_info_t * info ) + return ( 1 ); + } + ++#ifdef SDE_ENABLED + void + invoke_hook_fptr( char *lib_path ) + { +@@ -394,6 +398,7 @@ invoke_hook_fptr( char *lib_path ) + dlclose(dl_handle); + return; + } ++#endif + + int + main( int argc, char **argv ) +@@ -444,6 +449,7 @@ main( int argc, char **argv ) + return 2; + } + ++#ifdef SDE_ENABLED + /* + The following code will execute if the user wants to list the SDEs in the + library (or executable) stored in flags.path. This code will not list the +@@ -514,6 +520,7 @@ skip_lib: + if( NULL != cmd ) free(cmd); + } + no_sdes: ++#endif //SDE_ENABLED + + /* Do this code if the event name option was specified on the commandline */ + if ( flags.named ) { +commit b5111efaf1b234541c94b8ef7e5791bf8eb094b3 +Author: Frank Winkler +Date: Thu May 7 09:00:53 2020 +0200 + + Added CFLAG -DSDE. + +diff --git a/src/configure.in b/src/configure.in +index 0eee98ea1..781148e5b 100644 +--- a/src/configure.in ++++ b/src/configure.in +@@ -1767,6 +1767,7 @@ tests="$tests comp_tests" + # check for SDE component to determine if we need -lrt in LDFLAGS + for comp in $components; do + if test "x$comp" = "xsde" ; then ++ CFLAGS="$CFLAGS -DSDE" + LDFLAGS="$LDFLAGS $LRT" + LIBS="$LIBS $LRT" + SDE_ENABLED=1 +diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c +index 902ed7996..7d90c4064 100644 +--- a/src/utils/papi_native_avail.c ++++ b/src/utils/papi_native_avail.c +@@ -51,9 +51,10 @@ + + #include "papi.h" + #include "print_header.h" +-#ifdef SDE_ENABLED ++#if SDE + #include "components/sde/interface/papi_sde_interface.h" + #endif ++ + #define EVT_LINE 80 + #define EVT_LINE_BUF_SIZE 4096 + +@@ -85,7 +86,7 @@ print_help( char **argv ) + printf( "\nGeneral command options:\n" ); + printf( "\t-h, --help print this help message\n" ); + printf( "\t-c, --check attempts to add each event\n"); +-#ifdef SDE_ENABLED ++#if SDE + printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" ); + #endif + printf( "\t-e EVENTNAME display detailed information about named native event\n" ); +@@ -371,7 +372,7 @@ parse_event_qualifiers( PAPI_event_info_t * info ) + return ( 1 ); + } + +-#ifdef SDE_ENABLED ++#if SDE + void + invoke_hook_fptr( char *lib_path ) + { +@@ -449,7 +450,7 @@ main( int argc, char **argv ) + return 2; + } + +-#ifdef SDE_ENABLED ++#if SDE + /* + The following code will execute if the user wants to list the SDEs in the + library (or executable) stored in flags.path. This code will not list the +@@ -520,7 +521,7 @@ skip_lib: + if( NULL != cmd ) free(cmd); + } + no_sdes: +-#endif //SDE_ENABLED ++#endif //SDE + + /* Do this code if the event name option was specified on the commandline */ + if ( flags.named ) { diff --git a/SOURCES/papi-lto.patch b/SOURCES/papi-lto.patch new file mode 100644 index 0000000..17bd7cf --- /dev/null +++ b/SOURCES/papi-lto.patch @@ -0,0 +1,124 @@ +commit cbca67dae5722d65590e33b8b885a561ac3fff5d +Author: William Cohen +Date: Tue Jun 15 21:48:15 2021 -0400 + + Use numeric local labels to allow compilation with LTO enabled + + Some assembly snippets in instructions_testcode.c used regular label + names. Unfortunately, when multiple copies of the snippets are + inlined in different places with LTO enabled the multiple copies of a + label by the same name cause the build to fail because of the + redefinition of the label. To avoid this problem all those labels + have been converted to numeric local labels to allow multiple copies + to peacefully coexist in the LTO enabled code. + +diff --git a/src/validation_tests/instructions_testcode.c b/src/validation_tests/instructions_testcode.c +index 3634b1f90..128127c25 100644 +--- a/src/validation_tests/instructions_testcode.c ++++ b/src/validation_tests/instructions_testcode.c +@@ -10,9 +10,9 @@ int instructions_million(void) { + #if defined(__i386__) || (defined __x86_64__) + asm( " xor %%ecx,%%ecx\n" + " mov $499999,%%ecx\n" +- "test_loop:\n" ++ "55:\n" + " dec %%ecx\n" +- " jnz test_loop\n" ++ " jnz 55b\n" + : /* no output registers */ + : /* no inputs */ + : "cc", "%ecx" /* clobbered */ +@@ -47,9 +47,9 @@ int instructions_million(void) { + #elif defined(__sparc__) + asm( " sethi %%hi(333333), %%l0\n" + " or %%l0,%%lo(333333),%%l0\n" +- "test_loop:\n" ++ "55:\n" + " deccc %%l0 ! decrement count\n" +- " bnz test_loop ! repeat until zero\n" ++ " bnz 55b ! repeat until zero\n" + " nop ! branch delay slot\n" + : /* no output registers */ + : /* no inputs */ +@@ -57,13 +57,13 @@ int instructions_million(void) { + ); + return 0; + #elif defined(__arm__) +- asm( " ldr r2,count @ set count\n" +- " b test_loop\n" +- "count: .word 333332\n" +- "test_loop:\n" ++ asm( " ldr r2,42f @ set count\n" ++ " b 55f\n" ++ "42: .word 333332\n" ++ "55:\n" + " add r2,r2,#-1\n" + " cmp r2,#0\n" +- " bne test_loop @ repeat till zero\n" ++ " bne 55b @ repeat till zero\n" + : /* no output registers */ + : /* no inputs */ + : "cc", "r2" /* clobbered */ +@@ -71,10 +71,10 @@ int instructions_million(void) { + return 0; + #elif defined(__aarch64__) + asm( " ldr x2,=333332 // set count\n" +- "test_loop:\n" ++ "55:\n" + " add x2,x2,#-1\n" + " cmp x2,#0\n" +- " bne test_loop // repeat till zero\n" ++ " bne 55b // repeat till zero\n" + : /* no output registers */ + : /* no inputs */ + : "cc", "r2" /* clobbered */ +@@ -97,7 +97,7 @@ int instructions_fldcw(void) { + double three=3.0; + + asm( " mov $100000,%%ecx\n" +- "big_loop:\n" ++ "44:\n" + " fldl %1 # load value onto fp stack\n" + " fnstcw %0 # store control word to mem\n" + " movzwl %0, %%eax # load cw from mem, zero extending\n" +@@ -106,7 +106,7 @@ int instructions_fldcw(void) { + " fldcw %3 # save new rounding mode\n" + " fistpl %2 # save stack value as integer to mem\n" + " fldcw %0 # restore old cw\n" +- " loop big_loop # loop to make the count more obvious\n" ++ " loop 44b # loop to make the count more obvious\n" + : /* no output registers */ + : "m"(saved_cw), "m"(three), "m"(result), "m"(cw) /* inputs */ + : "cc", "%ecx","%eax" /* clobbered */ +@@ -129,13 +129,13 @@ int instructions_rep(void) { + + asm( " mov $1000,%%edx\n" + " cld\n" +- "loadstore: # test 8-bit store\n" ++ "66: # test 8-bit store\n" + " mov $0xd, %%al # set eax to d\n" + " mov $16384, %%ecx\n" + " mov %0, %%edi # set destination\n" + " rep stosb # store d 16384 times, auto-increment\n" + " dec %%edx\n" +- " jnz loadstore\n" ++ " jnz 66b\n" + : /* outputs */ + : "rm" (buffer_out) /* inputs */ + : "cc", "%esi","%edi","%edx","%ecx","%eax","memory" /* clobbered */ +@@ -147,13 +147,13 @@ int instructions_rep(void) { + + asm( " mov $1000,%%edx\n" + " cld\n" +- "loadstore: # test 8-bit store\n" ++ "66: # test 8-bit store\n" + " mov $0xd, %%al # set eax to d\n" + " mov $16384, %%ecx\n" + " mov %0, %%rdi # set destination\n" + " rep stosb # store d 16384 times, auto-increment\n" + " dec %%edx\n" +- " jnz loadstore\n" ++ " jnz 66b\n" + : /* outputs */ + : "rm" (buffer_out) /* inputs */ + : "cc", "%esi","%edi","%edx","%ecx","%eax","memory" /* clobbered */ diff --git a/SOURCES/papi-nostatic.patch b/SOURCES/papi-nostatic.patch new file mode 100644 index 0000000..435aa5e --- /dev/null +++ b/SOURCES/papi-nostatic.patch @@ -0,0 +1,30 @@ +commit cc34c978778adb40df1a200059a31c8d628b10ee +Author: William Cohen +Date: Thu Jan 21 14:48:01 2021 -0500 + + Only check for libpfm.a if static libraries are being used. + + Even when static libraries are not be used papi was checking for + libpfm.a, this would cause a failure if libpfm.a wasn't installed. + Exclude checking for libpfm.a if no static libpfm library is needed. + +diff --git a/src/Rules.pfm4_pe b/src/Rules.pfm4_pe +index 61eedc8a3..65a9635c6 100644 +--- a/src/Rules.pfm4_pe ++++ b/src/Rules.pfm4_pe +@@ -32,6 +32,7 @@ ifeq (yes,$(MIC)) + FORCE_PFM_ARCH="CONFIG_PFMLIB_ARCH_X86=y" + endif + ++ifneq (,$(STATIC)) + ifeq (,$(PFM_OBJS)) + $(PFM_LIB_PATH)/libpfm.a: + ifneq (,${PFM_ROOT}) +@@ -49,6 +50,7 @@ else + endif + $(MAKE) + endif ++endif + + include Makefile.inc + diff --git a/SOURCES/papi-python3.patch b/SOURCES/papi-python3.patch new file mode 100644 index 0000000..c5e7509 --- /dev/null +++ b/SOURCES/papi-python3.patch @@ -0,0 +1,10 @@ +diff --git a/src/high-level/scripts/papi_hl_output_writer.py b/src/high-level/scripts/papi_hl_output_writer.py +index 123d2cd0..34bfbd73 100755 +--- a/src/high-level/scripts/papi_hl_output_writer.py ++++ b/src/high-level/scripts/papi_hl_output_writer.py +@@ -1,4 +1,4 @@ +-#!/usr/bin/python ++#!/usr/bin/python3 + from __future__ import division + from collections import OrderedDict + diff --git a/SOURCES/papi-rhbz1923967.patch b/SOURCES/papi-rhbz1923967.patch new file mode 100644 index 0000000..9770f8e --- /dev/null +++ b/SOURCES/papi-rhbz1923967.patch @@ -0,0 +1,23 @@ +diff -up papi-6.0.0/src/papi_events.csv.rhbz1923967 papi-6.0.0/src/papi_events.csv +--- papi-6.0.0/src/papi_events.csv.rhbz1923967 2022-05-26 11:20:59.138469200 -0400 ++++ papi-6.0.0/src/papi_events.csv 2022-05-26 11:23:30.686302618 -0400 +@@ -1588,8 +1588,8 @@ PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF + #PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1_ALT + PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS +-PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS +-PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS ++#PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS ++#PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS + PRESET,PAPI_L2_DCR,NOT_DERIVED,PM_DATA_FROM_L2 + PRESET,PAPI_L2_DCW,NOT_DERIVED,PM_L2_ST_HIT + PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS +@@ -1598,7 +1598,7 @@ PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_F + PRESET,PAPI_L1_ICH,NOT_DERIVED,PM_INST_FROM_L1 + PRESET,PAPI_L1_ICM,NOT_DERIVED,PM_L1_ICACHE_MISS + PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_INST_FROM_L2MISS +-PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS ++#PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS + PRESET,PAPI_L2_ICH,NOT_DERIVED,PM_INST_FROM_L2 + PRESET,PAPI_L3_ICA,NOT_DERIVED,PM_INST_FROM_L2MISS + PRESET,PAPI_L3_ICH,NOT_DERIVED,PM_INST_FROM_L3 diff --git a/SOURCES/papi-thread_init.patch b/SOURCES/papi-thread_init.patch new file mode 100644 index 0000000..0e790c6 --- /dev/null +++ b/SOURCES/papi-thread_init.patch @@ -0,0 +1,106 @@ +commit 3625bdbad9fd57d1cdb1e5615854545167d4adcb +Author: Anthony Castaldo +Date: Wed Aug 26 17:18:29 2020 -0400 + + This modifies PAPI_library_init() to initialize components in two classes, + separated by the initialization of the papi thread structure. The first class + is those that need no thread structure, currently everything but perf_event and + perf_event_uncore. Following the init of the threading structure, we init the + second class (perf_event and perf_event_uncore) that DOES need the thread + structure to successfully init_component(). This required a change to + _papi_hwi_init_global(), to add an argument to distinguish which class it + should initialize. + +diff --git a/src/papi.c b/src/papi.c +index 33cc2993..ed75af49 100644 +--- a/src/papi.c ++++ b/src/papi.c +@@ -1151,7 +1151,23 @@ PAPI_library_init( int version ) + papi_return( init_retval ); + } + +- /* Initialize thread globals, including the main threads */ ++ /* Initialize component globals EXCEPT for perf_event, perf_event_uncore. ++ * To avoid race conditions, these components use the thread local storage ++ * construct initialized by _papi_hwi_init_global_threads(), from within ++ * their init_component(). So these must have init_component() run AFTER ++ * _papi_hwi_init_global_threads. Other components demand that init threads ++ * run AFTER init_component(), which sets up globals they need. ++ */ ++ ++ tmp = _papi_hwi_init_global( 0 ); /* Selector 0 to skip perf_event, perf_event_uncore */ ++ if ( tmp ) { ++ init_retval = tmp; ++ _papi_hwi_shutdown_global_internal( ); ++ _in_papi_library_init_cnt--; ++ papi_return( init_retval ); ++ } ++ ++ /* Initialize thread globals, including the main threads */ + + tmp = _papi_hwi_init_global_threads( ); + if ( tmp ) { +@@ -1161,9 +1177,9 @@ PAPI_library_init( int version ) + papi_return( init_retval ); + } + +- /* Initialize component globals */ ++ /* Initialize perf_event, perf_event_uncore components */ + +- tmp = _papi_hwi_init_global( ); ++ tmp = _papi_hwi_init_global( 1 ); /* Selector 1 for only perf_event, perf_event_uncore */ + if ( tmp ) { + init_retval = tmp; + _papi_hwi_shutdown_global_internal( ); +diff --git a/src/papi_internal.c b/src/papi_internal.c +index 5a1ccd43..e6dd319c 100644 +--- a/src/papi_internal.c ++++ b/src/papi_internal.c +@@ -1928,11 +1928,13 @@ int papi_num_components = ( sizeof ( _papi_hwd ) / sizeof ( *_papi_hwd ) ) - 1; + * Routine that initializes all available components. + * A component is available if a pointer to its info vector + * appears in the NULL terminated_papi_hwd table. ++ * Modified to accept an arg: 0=do not init perf_event or ++ * perf_event_uncore. 1=init ONLY perf_event or perf_event_uncore. + */ + int +-_papi_hwi_init_global( void ) ++_papi_hwi_init_global( int PE_OR_PEU ) + { +- int retval, i = 0; ++ int retval, is_pe_peu, i = 0; + + retval = _papi_hwi_innoculate_os_vector( &_papi_os_vector ); + if ( retval != PAPI_OK ) { +@@ -1940,14 +1942,16 @@ _papi_hwi_init_global( void ) + } + + while ( _papi_hwd[i] ) { +- ++ is_pe_peu = 0; ++ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event") == 0) is_pe_peu=1; ++ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event_uncore") == 0) is_pe_peu=1; + retval = _papi_hwi_innoculate_vector( _papi_hwd[i] ); + if ( retval != PAPI_OK ) { + return retval; + } + + /* We can be disabled by user before init */ +- if (!_papi_hwd[i]->cmp_info.disabled) { ++ if (!_papi_hwd[i]->cmp_info.disabled && (PE_OR_PEU == is_pe_peu)) { + retval = _papi_hwd[i]->init_component( i ); + _papi_hwd[i]->cmp_info.disabled=retval; + +diff --git a/src/papi_internal.h b/src/papi_internal.h +index 6492fea4..e0f5acd7 100644 +--- a/src/papi_internal.h ++++ b/src/papi_internal.h +@@ -467,7 +467,7 @@ int _papi_hwi_read( hwd_context_t * context, EventSetInfo_t * ESI, + long long *values ); + int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI ); + int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx ); +-int _papi_hwi_init_global( void ); ++int _papi_hwi_init_global( int PE_OR_PEU ); + int _papi_hwi_init_global_internal( void ); + int _papi_hwi_init_os(void); + void _papi_hwi_init_errors(void); diff --git a/SPECS/papi.spec b/SPECS/papi.spec new file mode 100644 index 0000000..80dd15c --- /dev/null +++ b/SPECS/papi.spec @@ -0,0 +1,496 @@ +# Default to no static libraries +%{!?with_static: %global with_static 0} +%bcond_with bundled_libpfm +# rdma is not available +%ifarch %{arm} +%{!?with_rdma: %global with_rdma 0} +%else +%{!?with_rdma: %global with_rdma 1} +%endif +%{!?with_pcp: %global with_pcp 1} +Summary: Performance Application Programming Interface +Name: papi +Version: 6.0.0 +Release: 15%{?dist} +License: BSD +Requires: papi-libs = %{version}-%{release} +URL: http://icl.cs.utk.edu/papi/ +# The upstream papi tar.gz file include iozone source code in it. +# The license for iozone source code is not compatible, so it needs +# to be eliminated from the srpm. +# The iozone code has been removed from the upstream papi git repo +# so when papi is rebased to a newer version it can be used as is. +Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}-noiozone.tar.gz +Patch1: papi-python3.patch +Patch4: papi-config.patch +Patch5: papi-nostatic.patch +Patch6: papi-lto.patch +Patch7: papi-rhbz1923967.patch +Patch21: papi-arm64fastread.patch +Patch31: papi-701eventupdate.patch +Patch40: papi-thread_init.patch +BuildRequires: make +BuildRequires: autoconf +BuildRequires: doxygen +BuildRequires: ncurses-devel +BuildRequires: gcc-gfortran +BuildRequires: kernel-headers >= 2.6.32 +BuildRequires: chrpath +BuildRequires: lm_sensors-devel +%if %{without bundled_libpfm} +BuildRequires: libpfm-devel >= 4.13.0-1 +%if %{with_static} +BuildRequires: libpfm-static >= 4.13.0-1 +%endif +%endif +# Following required for net component +BuildRequires: net-tools +%if %{with_rdma} +# Following required for inifiband component +BuildRequires: rdma-core-devel +BuildRequires: infiniband-diags-devel +%endif +%if %{with_pcp} +BuildRequires: pcp-libs-devel +%endif +BuildRequires: perl-generators +#Right now libpfm does not know anything about s390 and will fail +ExcludeArch: s390 s390x + +%description +PAPI provides a programmer interface to monitor the performance of +running programs. + +%package libs +Summary: Libraries for PAPI clients +%description libs +This package contains the run-time libraries for any application that wishes +to use PAPI. + +%package devel +Summary: Header files for the compiling programs with PAPI +Requires: papi = %{version}-%{release} +Requires: papi-libs = %{version}-%{release} +Requires: pkgconfig +%description devel +PAPI-devel includes the C header files that specify the PAPI user-space +libraries and interfaces. This is required for rebuilding any program +that uses PAPI. + +%package testsuite +Summary: Set of tests for checking PAPI functionality +Requires: papi = %{version}-%{release} +Requires: papi-libs = %{version}-%{release} +%description testsuite +PAPI-testsuite includes compiled versions of papi tests to ensure +that PAPI functions on particular hardware. + +%if %{with_static} +%package static +Summary: Static libraries for the compiling programs with PAPI +Requires: papi = %{version}-%{release} +%description static +PAPI-static includes the static versions of the library files for +the PAPI user-space libraries and interfaces. +%endif + +%prep +%setup -q +%patch1 -p1 -b .python3 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch21 -p1 +%patch31 -p1 +%patch40 -p1 + +%build + +%if %{without bundled_libpfm} +# Build our own copy of libpfm. +%global libpfm_config --with-pfm-incdir=%{_includedir} --with-pfm-libdir=%{_libdir} +%endif + +%if %{with_static} +%global static_lib_config --with-static-lib=yes +%else +%global static_lib_config --with-static-lib=no +%endif + +# set up environment variable for the various components +# cuda +# host_micpower +%if %{with_rdma} + export PAPI_INFINIBAND_UMAD_ROOT=/usr +%endif +# lmsensors +export PAPI_LMSENSORS_ROOT=/usr +#pushd vmware; ./configure; popd +%if %{with_pcp} +%global pcp_enable pcp +export PAPI_PCP_ROOT=/usr +%endif + +cd src +autoconf +%configure --with-perf-events \ +%{?libpfm_config} \ +%{?static_lib_config} \ +--with-shared-lib=yes --with-shlib --with-shlib-tools \ +--with-components="appio coretemp example infiniband lmsensors lustre micpower mx net %{?pcp_enable} rapl stealtime" +# implicit enabled components: perf_event perf_event_uncore +#components currently left out because of build configure/build issues +# --with-components="bgpm coretemp_freebsd cuda host_micpower nvml vmware" + +#DBG workaround to make sure libpfm just uses the normal CFLAGS +DBG="" make %{?_smp_mflags} + +#generate updated versions of the documentation +#DBG workaround to make sure libpfm just uses the normal CFLAGS +pushd ../doc +DBG="" make +DBG="" make install +popd + +%install +rm -rf $RPM_BUILD_ROOT +cd src +make DESTDIR=$RPM_BUILD_ROOT LDCONFIG=/bin/true install-all + +chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* + +%files +%{_bindir}/* +%dir /usr/share/papi +/usr/share/papi/papi_events.csv +%doc INSTALL.txt README.md LICENSE.txt RELEASENOTES.txt +%doc %{_mandir}/man1/* + +%ldconfig_scriptlets libs + +%files libs +%{_libdir}/*.so.* +%doc INSTALL.txt README.md LICENSE.txt RELEASENOTES.txt + +%files devel +%{_includedir}/*.h +%if %{with bundled_libpfm} +%{_includedir}/perfmon/*.h +%endif +%{_libdir}/*.so +%{_libdir}/pkgconfig/papi*.pc +%doc %{_mandir}/man3/* + +%files testsuite +/usr/share/papi/run_tests* +/usr/share/papi/ctests +/usr/share/papi/ftests +/usr/share/papi/validation_tests +/usr/share/papi/components +/usr/share/papi/testlib + +%if %{with_static} +%files static +%{_libdir}/*.a +%endif + +%changelog +* Fri Jun 16 2023 William Cohen - 6.0.0-15 +- Address thread initialization order. (RHBZ#2215582) + +* Thu May 4 2023 William Cohen - 6.0.0-14 +- Update papi event presets (RHBZ#2111923, RHBZ#2111942, RHBZ#2111947) + +* Thu Apr 27 2023 William Cohen - 6.0.0-13 +- Improve aarch64 read speed. (rhbz2186927) + +* Thu May 26 2022 William Cohen - 6.0.0-12 +- Disable problematic IBM Power9 events. (RHBZ#1923967) + +* Thu Nov 04 2021 William Cohen - 6.0.0-11 +- Allow build with LTO enable. rhbz#1986635 + +* Mon Aug 09 2021 Mohan Boddu - 6.0.0-10 +- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags + Related: rhbz#1991688 + +* Wed Jun 16 2021 William Cohen - 6.0.0-9 +- Excise iozone code from papi source tarball. Resolves: rhbz#1972870 + +* Fri Apr 16 2021 Mohan Boddu - 6.0.0-8 +- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 + +* Thu Jan 28 2021 William Cohen - 6.0.0-7 +- By default disable genaration of static libraries. + +* Tue Jan 26 2021 Fedora Release Engineering - 6.0.0-6 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild + +* Thu Dec 17 2020 William Cohen - 6.0.0-5 +- Remove iozone source code. (#1901077) + +* Mon Nov 09 2020 William Cohen - 6.0.0-4 +- Add Fujitsu A64FX presets. + +* Tue Jul 28 2020 Fedora Release Engineering - 6.0.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild + +* Wed Jul 01 2020 Jeff Law - 6.0.0-2 +- Disable LTO + +* Wed Mar 04 2020 William Cohen - 6.0.0-1 +- Rebase to official papi-6.0.0. + +* Wed Jan 29 2020 Fedora Release Engineering - 5.7.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild + +* Fri Jul 26 2019 Fedora Release Engineering - 5.7.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild + +* Mon Mar 04 2019 William Cohen - 5.7.0-2 +- Rebase to official papi-5.7.0. + +* Mon Feb 18 2019 William Cohen - 5.7.0-1 +- Rebase to papi-5.7.0. + +* Fri Feb 01 2019 Fedora Release Engineering - 5.6.0-10 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild + +* Mon Jan 7 2019 William Cohen - 5.6.0-9 +- Correct typo in papi-testsuite description. +- Add papi-libs for papi-testsuite and papi-devel. + +* Fri Nov 2 2018 Fedora Release Engineering - 5.6.0-8 +- Pull in patch to avoid division-by-0. + +* Fri Jul 13 2018 Fedora Release Engineering - 5.6.0-7 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild + +* Thu May 17 2018 William Cohen - 5.6.0-6 +- Dynamically link utilities and tests to papi libraries. + +* Mon Apr 30 2018 William Cohen - 5.6.0-5 +- Include various LDFLAGS/CFLAGS. + +* Thu Feb 08 2018 Fedora Release Engineering - 5.6.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + +* Wed Jan 31 2018 William Cohen - 5.6.0-3 +- Bump and rebuild. + +* Thu Dec 21 2017 William Cohen - 5.6.0-2 +- Correct infiniband buildrequires. + +* Thu Dec 21 2017 William Cohen - 5.6.0-1 +- Rebase to papi-5.6.0. + +* Mon Aug 28 2017 Honggang LI - 5.5.1-6 +- Disable RDMA support on ARM32 + +* Thu Aug 03 2017 Fedora Release Engineering - 5.5.1-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + +* Thu Jul 27 2017 Fedora Release Engineering - 5.5.1-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + +* Sat Feb 11 2017 Fedora Release Engineering - 5.5.1-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + +* Thu Feb 2 2017 William Cohen - 5.5.1-2 +- Bump version and rebuild due to new libgfortan.so version. + +* Fri Nov 18 2016 Fedora Release Engineering - 5.5.1-1 +- Rebase to papi-5.5.1. + +* Wed Sep 14 2016 Fedora Release Engineering - 5.5.0-1 +- Rebase to papi-5.5.0. + +* Thu Feb 04 2016 Fedora Release Engineering - 5.4.3-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild + +* Tue Jan 26 2016 William Cohen - 5.4.3-1 +- Rebase to papi-5.4.3. + +* Thu Jun 18 2015 Fedora Release Engineering - 5.4.1-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Fri Mar 6 2015 William Cohen - 5.4.1-2 +- Make sure using libpfm-4.6.0. + +* Tue Mar 3 2015 William Cohen - 5.4.1-1 +- Rebase to papi-5.4.1. + +* Wed Feb 11 2015 William Cohen - 5.4.0-3 +- Bump version and rebuild. + +* Thu Dec 18 2014 William Cohen - 5.4.0-2 +- Split out papi-libs as separate subpackage. (#1172875) + +* Mon Nov 17 2014 William Cohen - 5.4.0-1 +- Rebase to papi-5.4.0. + +* Sun Aug 17 2014 Fedora Release Engineering - 5.3.2-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + +* Mon Aug 4 2014 William Cohen - 5.3.2-1 +- Rebase to 5.3.2. + +* Fri Jun 06 2014 Fedora Release Engineering - 5.3.0-2.16.ga7f6159 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Fri Jan 17 2014 Lukas Berk - 5.3.0-1.16.ga7f6159 +- Automated weekly rawhide release + +* Thu Jan 16 2014 William Cohen - 5.3.0-1 +- Rebase to 5.3.0. + +* Tue Jan 14 2014 William Cohen - 5.2.0-5 +- Add presets for Intel Silvermont. + +* Mon Jan 13 2014 William Cohen - 5.2.0-4 +- Add presets for Haswell and Ivy Bridge. + +* Wed Aug 14 2013 William Cohen - 5.2.0-2 +- Enable infiniband and stealtime components. + +* Wed Aug 07 2013 William Cohen - 5.2.0-1 +- Rebase to 5.2.0 + +* Sat Aug 03 2013 Fedora Release Engineering - 5.1.1-8 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + +* Wed Jul 24 2013 William Cohen - 5.1.1-7 +- rhbz830275 - Add support for POWER8 processor to PAPI + +* Mon Jul 22 2013 William Cohen - 5.1.1-6 +- Add autoconf buildrequires. + +* Mon Jul 22 2013 William Cohen - 5.1.1-5 +- rhbz986673 - /usr/lib64/libpapi.so is unowned +- Package files in /usr/share/papi only once. +- Avoid dependency problem with parallel make of man pages. + +* Fri Jul 19 2013 William Cohen - 5.1.1-4 +- Correct changelog. + +* Fri Jul 5 2013 William Cohen - 5.1.1-3 +- Add man page corrections/updates. + +* Fri Jun 28 2013 William Cohen - 5.1.1-2 +- Add testsuite subpackage. + +* Thu May 30 2013 William Cohen - 5.1.1-1 +- Rebase to 5.1.1 + +* Mon Apr 15 2013 William Cohen - 5.1.0.2-2 +- Fix arm FTBS rhbz 951806. + +* Tue Apr 9 2013 William Cohen - 5.1.0.2-1 +- Rebase to 5.1.0.2 + +* Thu Feb 14 2013 Fedora Release Engineering - 5.0.1-6 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Mon Jan 14 2013 William Cohen - 5.0.1-5 +- Add armv7 cortex a15 presets. + +* Tue Dec 04 2012 William Cohen - 5.0.1-4 +- Disable ldconfig on install. + +* Thu Nov 08 2012 William Cohen - 5.0.1-3 +- Avoid duplicated shared library. + +* Wed Oct 03 2012 William Cohen - 5.0.1-2 +- Make sure using compatible version of libpfm. + +* Thu Sep 20 2012 William Cohen - 5.0.1-1 +- Rebase to 5.0.1. + +* Mon Sep 10 2012 William Cohen - 5.0.0-6 +- Back port fixes for Intel Ivy Bridge event presets. + +* Thu Aug 30 2012 William Cohen - 5.0.0-5 +- Fixes to make papi with unbundled libpfm. + +* Mon Aug 27 2012 William Cohen - 5.0.0-2 +- Keep libpfm unbundled. + +* Fri Aug 24 2012 William Cohen - 5.0.0-1 +- Rebase to 5.0.0. + +* Fri Jul 20 2012 Fedora Release Engineering - 4.4.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Mon Jun 11 2012 William Cohen - 4.4.0-4 +- Use siginfo_t rather than struct siginfo. + +* Mon Jun 11 2012 William Cohen - 4.4.0-3 +- Correct build requires. + +* Mon Jun 11 2012 William Cohen - 4.4.0-2 +- Unbundle libpfm4 from papi. +- Correct description spellings. +- Remove unused test section. + +* Fri Apr 20 2012 William Cohen - 4.4.0-1 +- Rebase to 4.4.0. + +* Fri Mar 9 2012 William Cohen - 4.2.1-2 +- Fix overrun in lmsensor component. (rhbz797692) + +* Tue Feb 14 2012 William Cohen - 4.2.1-1 +- Rebase to 4.2.1. + +* Fri Jan 13 2012 Fedora Release Engineering - 4.2.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + +* Wed Nov 02 2011 William Cohen - 4.2.0-3 +- Remove unwanted man1/*.c.1 files. (rhbz749725) + +* Mon Oct 31 2011 William Cohen - 4.2.0-2 +- Include appropirate man pages with papi rpm. (rhbz749725) +- Rebase to papi-4.2.0, fixup for coretemp component. (rhbz746851) + +* Thu Oct 27 2011 William Cohen - 4.2.0-1 +- Rebase to papi-4.2.0. + +* Fri Aug 12 2011 William Cohen - 4.1.3-3 +- Provide papi-static. + +* Thu May 12 2011 William Cohen - 4.1.3-2 +- Use corrected papi-4.1.3. + +* Thu May 12 2011 William Cohen - 4.1.3-1 +- Rebase to papi-4.1.3 + +* Tue Feb 08 2011 Fedora Release Engineering - 4.1.2.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Mon Jan 24 2011 William Cohen - 4.1.2.1-1 +- Rebase to papi-4.1.2.1 + +* Fri Oct 1 2010 William Cohen - 4.1.1-1 +- Rebase to papi-4.1.1 + +* Tue Jun 22 2010 William Cohen - 4.1.0-1 +- Rebase to papi-4.1.0 + +* Mon May 17 2010 William Cohen - 4.0.0-5 +- Test run with upstream cvs version. + +* Wed Feb 10 2010 William Cohen - 4.0.0-4 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (correct ExcludeArch). + +* Wed Feb 10 2010 William Cohen - 4.0.0-3 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (bump nvr). + +* Wed Feb 10 2010 William Cohen - 4.0.0-2 +- correct the ctests/shlib test +- have PAPI_set_multiplex() return proper value +- properly handle event unit masks +- correct PAPI_name_to_code() to match events +- Resolves: rhbz562935 Rebase to papi-4.0.0 + +* Wed Jan 13 2010 William Cohen - 4.0.0-1 +- Generate papi.spec file for papi-4.0.0.