From 9418d8056bdb46b0d2d8b2cf24d3dcd16d9b473c Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 01 2017 03:26:27 +0000 Subject: import papi-5.2.0-23.el7 --- diff --git a/SOURCES/papi-avoid_libpfm_enum.patch b/SOURCES/papi-avoid_libpfm_enum.patch new file mode 100644 index 0000000..f109e92 --- /dev/null +++ b/SOURCES/papi-avoid_libpfm_enum.patch @@ -0,0 +1,170 @@ +diff -up papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c.orig papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c +--- papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c.orig 2013-08-06 12:12:20.000000000 -0400 ++++ papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c 2017-06-15 22:41:58.784904523 -0400 +@@ -236,14 +236,15 @@ static int find_next_no_aliases(int code + current_pmu++; + SUBDBG("Incrementing PMU: %#x\n",current_pmu); + ++ memset(&pinfo,0,sizeof(pfm_pmu_info_t)); ++ ret = pfm_get_pmu_info(current_pmu, &pinfo); ++ + /* Off the end, so done iterating */ +- if (current_pmu>PFM_PMU_MAX) { ++ if (ret==PFM_ERR_INVAL) { + return PFM_ERR_NOTFOUND; + } + +- memset(&pinfo,0,sizeof(pfm_pmu_info_t)); +- pfm_get_pmu_info(current_pmu, &pinfo); +- if (pmu_is_present_and_right_type(&pinfo,pmu_type)) break; ++ if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) break; + } + + current_event=pinfo.first_event; +@@ -533,12 +534,21 @@ get_event_first_active(int pmu_type) + + pmu_idx=0; + +- while(pmu_idxdefault_pmu)); + + SUBDBG("Detected pmus:\n"); +- for(i=0;idefault_pmu.num_fixed_cntrs; + + SUBDBG( "num_counters: %d\n", my_vector->cmp_info.num_cntrs ); +- ++ + /* Setup presets, only if Component 0 */ + if (cidx==0) { + retval = _papi_load_preset_table( (char *)event_table->default_pmu.name, +diff -up papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c.orig papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c +--- papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c.orig 2013-08-06 12:12:20.000000000 -0400 ++++ papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c 2017-06-15 22:50:08.700238377 -0400 +@@ -238,14 +238,15 @@ static int find_next_no_aliases(int code + current_pmu++; + SUBDBG("Incrementing PMU: %#x\n",current_pmu); + ++ memset(&pinfo,0,sizeof(pfm_pmu_info_t)); ++ ret=pfm_get_pmu_info(current_pmu, &pinfo); ++ + /* Off the end, so done iterating */ +- if (current_pmu>PFM_PMU_MAX) { ++ if (ret==PFM_ERR_INVAL) { + return PFM_ERR_NOTFOUND; + } + +- memset(&pinfo,0,sizeof(pfm_pmu_info_t)); +- pfm_get_pmu_info(current_pmu, &pinfo); +- if (pmu_is_present_and_right_type(&pinfo,pmu_type)) break; ++ if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) break; + } + + current_event=pinfo.first_event; +@@ -531,12 +532,20 @@ get_event_first_active(int pmu_type) + + pmu_idx=0; + +- while(pmu_idxcmp_info.num_cntrs=0; + + SUBDBG("Detected pmus:\n"); +- for(i=0;icmp_info.num_cntrs += pinfo.num_cntrs+ + pinfo.num_fixed_cntrs; + } ++ i++; + } + SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); + diff --git a/SOURCES/papi-intel_knl.patch b/SOURCES/papi-intel_knl.patch new file mode 100644 index 0000000..2532ceb --- /dev/null +++ b/SOURCES/papi-intel_knl.patch @@ -0,0 +1,401 @@ +commit adbae8cd948234539d3ad63363878011e5a59949 +Author: Heike McCraw +Date: Thu Dec 11 12:07:38 2014 -0500 + + Update presets for Intel Haswell and Haswell-EP + (according to the updates of the libpfm4 event table + for Intel Haswell and Haswell-EP). + These mods have not been tested due to lacking access + to an Intel Haswell system. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index d27a41d..0a17ab3 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -634,7 +634,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + # Intel Haswell events (and most likely also Sandy Bridge) + CPU,hsw + CPU,hsw_ep +-PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK + # Loads and stores +@@ -692,7 +692,7 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY + PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD + PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO +-PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK + +commit 1dbc7038450d53c7e8724f9e2cb0eb773bdc97bf +Author: Heike McCraw +Date: Thu May 28 13:46:52 2015 -0400 + + Temporary workaround: exclude_guest and exclude_host bits have to be + zero in the attribute structure (via :mg=1:mh=1). + + exclude_guest wasn't introduced until Linux 3.2, and so, running newer + PAPI versions with libpfm4 that allows exclude_guest to be set on older + kernels completely breaks all events unless :mg=1:mh=1 is passed. + + PAPI code passes the attribute block created by libpfm4 to the kernel + without modifying its contents. It would be better if libpfm4 provides + different defaults for these attribute bits. + This commit, however, enforces mg=1 and mh=1 for all Xeon Phi + predefined events. A problem with always enforcding mg=1 (i.e. + exclude_guest=0) is that if exclude_guest=1 (mg=0) is needed (e.g., + to use PEBS) users have to fall back to using native events with the + appropriate qualifier settings. + + This issue has been extensively discussed on the mailing list (Subject + “KNC events", discussion started on 11/26/14) where more details can be + found. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8fe0ae1..74da53c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1761,20 +1761,20 @@ PRESET,PAPI_TLB_TL,DERIVED_POSTFIX,N0|N1|+|N2|+|,PEVT_MMU_TLB_MISS_DIRECT_DERAT, + # Intel MIC / Xeon-Phi / Knights Corner + CPU,knc + # +-PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES +-PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED +-PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS +-PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK +-PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ +-PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED +-PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK +-PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ +-PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE +-PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS +-PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE +-PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED +-PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS +-PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES:mg=1:mh=1 ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED:mg=1:mh=1 ++PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS:mg=1:mh=1 ++PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK:mg=1:mh=1 ++PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ:mg=1:mh=1 ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:mg=1:mh=1 ++PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK:mg=1:mh=1 ++PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ:mg=1:mh=1 ++PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE:mg=1:mh=1 ++PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS:mg=1:mh=1 ++PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE:mg=1:mh=1 ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED:mg=1:mh=1 ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS:mg=1:mh=1 ++PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED:mg=1:mh=1 + + CPU,BGP + # The following PAPI presets are accurate for all application nodes + +commit f42eda64e7c3cc0784b3ce8b8a71f88647a61640 +Author: Heike McCraw +Date: Thu Jun 25 15:05:53 2015 -0400 + + Added definitions to Power8 for PAPI_SP_OPS, PAPI_DP_OPS. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74da53c..40c562b 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1378,6 +1378,8 @@ PRESET,PAPI_TOT_INS,NOT_DERIVED,PM_INST_CMPL + #n/aPRESET,PAPI_INT_INS,DERIVED_ADD,PM_FXU0_FIN,PM_FXU1_FIN + PRESET,PAPI_FP_OPS,NOT_DERIVED,PM_FLOP + PRESET,PAPI_FP_INS,NOT_DERIVED,PM_FLOP ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP + PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC + PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT + PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP + +commit 36c5b5b6b9bc90142743e4b62fa6cc8f99b3e46c +Author: Vince Weaver +Date: Thu Jun 25 22:20:17 2015 -0400 + + add broadwell predefined events + + For now they are the same as Haswell, as that's what the Linux kernel + does. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74da53c..2ffb6f2 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -631,9 +631,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS + # + +-# Intel Haswell events (and most likely also Sandy Bridge) ++# Intel Haswell events ++# Using also for Broadwell events, this is what the Linux kernel does + CPU,hsw + CPU,hsw_ep ++CPU,bdw + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK +@@ -692,7 +694,6 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY + PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD + PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO +-PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK +@@ -714,6 +715,13 @@ PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++ ++CPU,hsw ++CPU,hsw_ep ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM ++CPU,bdw ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++ + # End of hsw list + # + CPU,Intel Core2 + +commit 71dcdb92c477bf5d2f419c03f94783098a991214 +Merge: 36c5b5b f42eda6 +Author: Vince Weaver +Date: Thu Jun 25 22:21:06 2015 -0400 + + Merge branch 'master' of https://icl.cs.utk.edu/git/papi + +commit 0829a4f51b3de92de72f6c6185b99ece15e20254 +Author: Vince Weaver +Date: Fri Jun 26 11:41:42 2015 -0400 + + Add future broadwell-ep support. + + libpfm4 doesn't support it yet, but add it for when it appears. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index ca556c9..c38a892 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -636,6 +636,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + CPU,hsw + CPU,hsw_ep + CPU,bdw ++CPU,bdw_ep + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK +@@ -720,6 +721,7 @@ CPU,hsw + CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + CPU,bdw ++CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + + # End of hsw list + +commit a10e8331ced0173ead9982c3f78c2e5238b04d66 +Author: Vince Weaver +Date: Wed Oct 21 08:58:20 2015 -0400 + + papi_events: add Intel Skylake presets + + This just shares all of teh broadwell events with skylake. + Some quick tests show that this probably works. + Someone with skylake hardware should validate this at some point. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index c38a892..2865560 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -637,6 +637,7 @@ CPU,hsw + CPU,hsw_ep + CPU,bdw + CPU,bdw_ep ++CPU,skl + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK + +commit db9c70f517aae119145ef7ec5cded597b70b0437 +Author: Heike McCraw +Date: Fri Jun 17 18:11:07 2016 -0400 + + Added FP (SP, DP) presets for Skylake. Corrected L1_LDM|STM, L2_DCW|TCW, PRF_DM, STL_ICY presets for Skylake. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2865560..114149d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -651,8 +651,6 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_ + PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + # Added by FMB + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT +-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD +-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB + PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES +@@ -667,7 +665,6 @@ PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + #PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS + # Added by FMB + PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS +-PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO + PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + #PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS +@@ -675,7 +672,6 @@ PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD + PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES + PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD +-PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO + # L3 cache + #PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE + #PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT +@@ -700,12 +696,9 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK + PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK +-# Prefetcher +-PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS + # Stalls + PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB + PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY +-PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY + PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1 + PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS + PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4 +@@ -720,13 +713,41 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES + + CPU,hsw + CPU,hsw_ep ++CPU,bdw ++CPU,bdw_ep ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO ++PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY ++ ++CPU,hsw ++CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + CPU,bdw + CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + +-# End of hsw list ++CPU,skl ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++ ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_RQSTS:ALL_RFO ++PRESET,PAPI_L2_DCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT ++PRESET,PAPI_L2_TCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:PF_MISS ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE ++ ++# End of hsw,bdw,skl list + # ++ ++ + CPU,Intel Core2 + CPU,Intel Core + CPU,core + +commit 1c64bfc0d4aa17aa36b8ab542c841203518e6df7 +Author: Heike Jagode +Date: Thu Jun 23 15:26:26 2016 -0400 + + Added FP (SP, DP) presets for Broadwell. NOT TESTED yet due to lack of access to bdw hardware + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 114149d..5c04442 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -725,10 +725,18 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY + CPU,hsw + CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM ++ + CPU,bdw + CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE ++ + CPU,skl + # PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + +commit e9144b9bda355874a1cefd45285578f6c825cc31 +Author: Heike Jagode +Date: Thu Aug 18 16:34:54 2016 -0400 + + Added preset definitions for KNL. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2d2eca0..e3e80a4 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -754,7 +754,42 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE + + # End of hsw,bdw,skl list + # +- ++# ++# Intel MIC / Xeon-Phi / Knights Landing ++# ++CPU,knl ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES ++PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES ++PRESET,PAPI_L1_ICH,NOT_DERIVED,ICACHE:HIT ++# ++PRESET,PAPI_L1_DCA,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST ++PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS ++PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_UOPS_RETIRED:LD_DCU_MISS,ICACHE:MISSES ++PRESET,PAPI_L1_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS ++# ++PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES ++PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES ++PRESET,PAPI_L2_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_L2_MISS ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_LD ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_ST ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST ++# ++PRESET,PAPI_TLB_DM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_UTLB_MISS ++# ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BRANCH_INSTRUCTIONS_RETIRED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:TAKEN_JCC ++PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED:JCC,BR_INST_RETIRED:TAKEN_JCC ++# ++PRESET,PAPI_RES_STL,NOT_DERIVED,RS_FULL_STALL:ANY ++PRESET,PAPI_STL_ICY,NOT_DERIVED,NO_ALLOC_CYCLES:ANY ++# + + CPU,Intel Core2 + CPU,Intel Core diff --git a/SOURCES/papi-ppc64_cache.patch b/SOURCES/papi-ppc64_cache.patch new file mode 100644 index 0000000..99d0131 --- /dev/null +++ b/SOURCES/papi-ppc64_cache.patch @@ -0,0 +1,35 @@ +commit 45c2935e88d1eaf34c0769f9b514c0dcb0e43c1d +Author: William Cohen +Date: Wed Jun 22 14:08:30 2016 -0400 + + Correct IBM Power7 and Power8 computation of PAPI_L1_DCA + + When reviewing the test results for IBM Power7 and Power8 Michael + Petlan found that the PAPI_L1_DCA preset was incorrectly computed. + The L1 cache misses need to be subtracted rather than added to the + result. + + Signed-off-by: William Cohen + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 5c04442..2d2eca0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1349,7 +1349,7 @@ PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 +-PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS +@@ -1398,7 +1398,7 @@ PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 +-PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + #n/aPRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + #n/aPRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS diff --git a/SOURCES/papi-rhbz1362591.patch b/SOURCES/papi-rhbz1362591.patch new file mode 100644 index 0000000..389bab3 --- /dev/null +++ b/SOURCES/papi-rhbz1362591.patch @@ -0,0 +1,20 @@ +commit e9347373c8b18b5a2902e63dd5fd9df3e54f3216 +Author: James Ralph +Date: Mon Nov 18 10:39:42 2013 -0500 + + ctests/Makefile: Don't clobber value of LIBRARY + + TOOD: write a better message + +diff --git a/src/ctests/Makefile b/src/ctests/Makefile +index 0526555..5dba43b 100644 +--- a/src/ctests/Makefile ++++ b/src/ctests/Makefile +@@ -1,6 +1,6 @@ + # File: ctests/Makefile + INCLUDE = -I.. -I. -I../testlib +-PAPILIB = ../libpapi.a ++PAPILIB=$(LIBRARY) + CC = gcc + CC_R = $(CC) -pthread + CFLAGS = -g -O -Wall diff --git a/SPECS/papi.spec b/SPECS/papi.spec index f31f8b6..5e01795 100644 --- a/SPECS/papi.spec +++ b/SPECS/papi.spec @@ -2,7 +2,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 5.2.0 -Release: 19%{?dist} +Release: 23%{?dist} License: BSD Group: Development/System URL: http://icl.cs.utk.edu/papi/ @@ -24,6 +24,10 @@ Patch1003: papi-bz1313088.patch Patch1004: papi-postfixcalc.patch Patch1005: papi-errmsg.patch Patch1006: papi-schedule.patch +Patch1010: papi-rhbz1362591.patch +Patch1011: papi-ppc64_cache.patch +Patch1012: papi-intel_knl.patch +Patch2000: papi-avoid_libpfm_enum.patch BuildRequires: autoconf BuildRequires: doxygen BuildRequires: ncurses-devel @@ -89,6 +93,10 @@ the PAPI user-space libraries and interfaces. %patch1004 -p1 %patch1005 -p1 %patch1006 -p1 +%patch1010 -p1 -b .rhbz1362591 +%patch1011 -p1 -b .ppc64cache +%patch1012 -p1 -b .knl +%patch2000 -p1 -b .max %build %if %{without bundled_libpfm} @@ -101,7 +109,7 @@ autoconf %configure --with-perf-events \ %{?libpfm_config} \ --with-static-lib=yes --with-shared-lib=yes --with-shlib \ ---with-components="appio coretemp example infiniband lmsensors lustre micpower mx net rapl stealtime" +--with-components="appio coretemp example lmsensors lustre micpower mx net rapl stealtime" # implicit enabled components: perf_event perf_event_uncore #components currently left out because of build configure/build issues # --with-components="bgpm coretemp_freebsd cuda host_micpower nvml vmware" @@ -109,7 +117,7 @@ autoconf pushd components #pushd cuda; ./configure; popd #pushd host_micpower; ./configure; popd -pushd infiniband; %configure; popd +#pushd infiniband; ./configure; popd pushd lmsensors; \ %configure --with-sensors_incdir=/usr/include/sensors \ --with-sensors_libdir=%{_libdir}; \ @@ -170,6 +178,19 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.a %changelog +* Fri Jun 9 2017 William Cohen - 5.2.0-23 +- Avoid using PFM_PMU_MAX. + +* Wed Apr 26 2017 William Cohen - 5.2.0-22 +- Disable infiniband component. rhbz1445777 + +* Wed Mar 22 2017 William Cohen - 5.2.0-21 +- Correct ppc64 events. rhbz1385008 +- Add events for Intel KNL. rhbz1412952 + +* Tue Mar 21 2017 William Cohen - 5.2.0-20 +- Dynamically link the papi ctests. rhbz1362591 + * Tue Aug 2 2016 William Cohen - 5.2.0-19 - Rebuild with libpfm-4.7.0.