diff --git a/SOURCES/papi-intel.patch b/SOURCES/papi-intel.patch new file mode 100644 index 0000000..da2a8a1 --- /dev/null +++ b/SOURCES/papi-intel.patch @@ -0,0 +1,217 @@ +commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e +Author: James Ralph +Date: Mon Aug 26 10:23:52 2013 -0400 + + papi_events.csv: First draft preset events on HSW + + Contributed by Nils Smeds + ------------------------- + Here is a suggestion for addition to Hsw counters. These are not + rigorously tested. It compiles and loads. + I'm rather uncertain on many of the events so I am hoping that adding + events like this will get some useful + feedback from the community so that we can improve. + ------------------------- + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2e0da80..39ec16c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK + #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS + # ++ ++# Intel Haswell events (and most likely also Sandy Bridge) ++CPU,hsw ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK ++# Loads and stores ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES ++# L1 cache ++PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS ++PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT ++PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS ++# L2 cache ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES ++PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT ++PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES ++PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT ++PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++# L3 cache ++PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE ++PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS ++# SMP ++PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY ++PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD ++PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD ++# TLB ++PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK ++PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK ++# Prefetcher ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS ++# Stalls ++PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB ++PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY ++PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1 ++PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS ++PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4 ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++# End of hsw list ++# + CPU,Intel Core2 + CPU,Intel Core + CPU,core +commit f20568575d3d8023f4f97d3d968a606a51a1e01f +Author: James Ralph +Date: Tue Sep 17 09:06:50 2013 -0400 + + papi_events.csv: Add PAPI_L1_ICM for Haswell + + Thanks to Maurice Marks of Unisys for the contribution + ------------- + I've continued testing on Haswell. By comparison with Vtune and Emon on + Haswell I found that we can use + the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful + measure. + + Attached is my current version of papi_events.csv with Haswell fixes. + ------------- + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 39ec16c..01821a8 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_ + PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS + PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT + PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES + PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT +commit b2d643df6a20a85e24a2f797c6bea164ed099a84 +Author: Vince Weaver +Date: Tue Nov 5 16:09:11 2013 -0500 + + Add floating point events for IvyBridge + + Now that Intel has documented them and libpfm4 supports them, PAPI + can use them. We just use the same events as on sandybridge. + + Tested on an ivybridge system. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 01821a8..42c1da0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL + PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS + PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES + # ++# Counts scalars only; no SSE or AVX is counted; includes speculative ++PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ++# ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE ++# + # Intel SandyBridge only + CPU,snb + CPU,snb_ep +@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL + PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK + # +-# Counts scalars only; no SSE or AVX is counted; includes speculative +-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +-# +-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE +-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE +-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE +-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE +-# + # Intel IvyBridge only + CPU,ivb + CPU,ivb_ep +From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001 +From: Vince Weaver +Date: Fri, 6 Dec 2013 13:03:39 -0500 +Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support + +This is based on the manual, as I don't actually have one of these +chips. + +The events available differ a lot from older atoms. They also +support offcore events and some sort of RAPL support. +--- + src/papi_events.csv | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 42c1da0..0e1163e 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -356,6 +356,7 @@ CPU,ix86arch + PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS + # ++# Intel Atom + CPU,Intel Atom + CPU,atom + # +@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR + PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR + PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR + # ++# Intel Atom Silvermont ++CPU,slm ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES ++PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES ++PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES ++PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES ++PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES ++PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES ++# ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED ++# ++PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS ++# ++#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87 ++PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL ++PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV ++# + CPU,Intel Nehalem + CPU,Intel Westmere + CPU,nhm +-- +1.8.3.1 + diff --git a/SPECS/papi.spec b/SPECS/papi.spec index efbddfe..4ab8ad0 100644 --- a/SPECS/papi.spec +++ b/SPECS/papi.spec @@ -2,12 +2,13 @@ Summary: Performance Application Programming Interface Name: papi Version: 5.2.0 -Release: 2%{?dist} +Release: 5%{?dist} License: BSD Group: Development/System URL: http://icl.cs.utk.edu/papi/ Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root +Patch100: papi-intel.patch BuildRequires: autoconf BuildRequires: doxygen BuildRequires: ncurses-devel @@ -16,8 +17,8 @@ BuildRequires: kernel-headers >= 2.6.32 BuildRequires: chrpath BuildRequires: lm_sensors-devel %if %{without bundled_libpfm} -BuildRequires: libpfm-devel >= 4.3.0 -BuildRequires: libpfm-static >= 4.3.0 +BuildRequires: libpfm-devel >= 4.4.0-5 +BuildRequires: libpfm-static >= 4.4.0-5 %endif # Following required for net component BuildRequires: net-tools @@ -57,6 +58,7 @@ the PAPI user-space libraries and interfaces. %prep %setup -q +%patch100 -p1 %build %if %{without bundled_libpfm} @@ -138,6 +140,15 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.a %changelog +* Tue Jan 14 2014 William Cohen - 5.2.0-5 +- Add presets for Intel Silvermont. + +* Mon Jan 13 2014 William Cohen - 5.2.0-4 +- Add presets for Haswell and Ivy Bridge. + +* Fri Dec 27 2013 Daniel Mach - 5.2.0-3 +- Mass rebuild 2013-12-27 + * Wed Aug 14 2013 William Cohen - 5.2.0-2 - Enable infiniband and stealtime components.