Blame SOURCES/papi-intel.patch

0350d3
commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
0350d3
Author: James Ralph <ralph@icl.utk.edu>
0350d3
Date:   Mon Aug 26 10:23:52 2013 -0400
0350d3
0350d3
    papi_events.csv: First draft preset events on HSW
0350d3
    
0350d3
    Contributed by Nils Smeds
0350d3
    -------------------------
0350d3
    Here is a suggestion for addition to Hsw counters. These are not
0350d3
    rigorously tested. It compiles and loads.
0350d3
    I'm rather uncertain on many of the events so I am hoping that adding
0350d3
    events like this will get some useful
0350d3
    feedback from the community so that we can improve.
0350d3
    -------------------------
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 2e0da80..39ec16c 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
0350d3
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
0350d3
 #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
0350d3
 #
0350d3
+
0350d3
+# Intel Haswell events (and most likely also Sandy Bridge)
0350d3
+CPU,hsw
0350d3
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
0350d3
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
0350d3
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
0350d3
+# Loads and stores
0350d3
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
0350d3
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
0350d3
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
0350d3
+# L1 cache
0350d3
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
0350d3
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
0350d3
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
0350d3
+# L2 cache
0350d3
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
0350d3
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
0350d3
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
0350d3
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
0350d3
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
0350d3
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
0350d3
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
0350d3
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
0350d3
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
0350d3
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
0350d3
+# L3 cache
0350d3
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
0350d3
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
0350d3
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
0350d3
+# SMP
0350d3
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
0350d3
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
0350d3
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
0350d3
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
0350d3
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
0350d3
+# TLB
0350d3
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
0350d3
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
0350d3
+# Prefetcher
0350d3
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
0350d3
+# Stalls
0350d3
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
0350d3
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
0350d3
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
0350d3
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
0350d3
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
0350d3
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
0350d3
+# Branches
0350d3
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
0350d3
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
0350d3
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
0350d3
+# End of hsw list
0350d3
+#
0350d3
 CPU,Intel Core2
0350d3
 CPU,Intel Core
0350d3
 CPU,core
0350d3
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
0350d3
Author: James Ralph <ralph@icl.utk.edu>
0350d3
Date:   Tue Sep 17 09:06:50 2013 -0400
0350d3
0350d3
    papi_events.csv: Add PAPI_L1_ICM for Haswell
0350d3
    
0350d3
    Thanks to Maurice Marks of Unisys for the contribution
0350d3
    -------------
0350d3
    I've continued testing on Haswell. By comparison with Vtune and Emon on
0350d3
    Haswell I found that we can use
0350d3
    the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
0350d3
    measure.
0350d3
    
0350d3
    Attached is my current version of papi_events.csv with Haswell fixes.
0350d3
    -------------
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 39ec16c..01821a8 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
0350d3
 PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
0350d3
 PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
0350d3
 PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
0350d3
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
0350d3
 # L2 cache
0350d3
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
0350d3
 PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
0350d3
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
0350d3
Author: Vince Weaver <vincent.weaver@maine.edu>
0350d3
Date:   Tue Nov 5 16:09:11 2013 -0500
0350d3
0350d3
    Add floating point events for IvyBridge
0350d3
    
0350d3
    Now that Intel has documented them and libpfm4 supports them, PAPI
0350d3
    can use them.  We just use the same events as on sandybridge.
0350d3
    
0350d3
    Tested on an ivybridge system.
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 01821a8..42c1da0 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
0350d3
 PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
0350d3
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
0350d3
 #
0350d3
+# Counts scalars only; no SSE or AVX is counted; includes speculative
0350d3
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
+#
0350d3
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
+#
0350d3
 # Intel SandyBridge only
0350d3
 CPU,snb
0350d3
 CPU,snb_ep
0350d3
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
0350d3
 PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
0350d3
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
0350d3
 #
0350d3
-# Counts scalars only; no SSE or AVX is counted; includes speculative
0350d3
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
-#
0350d3
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
-#
0350d3
 # Intel IvyBridge only
0350d3
 CPU,ivb
0350d3
 CPU,ivb_ep
0350d3
From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001
0350d3
From: Vince Weaver <vincent.weaver@maine.edu>
0350d3
Date: Fri, 6 Dec 2013 13:03:39 -0500
0350d3
Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support
0350d3
0350d3
This is based on the manual, as I don't actually have one of these
0350d3
chips.
0350d3
0350d3
The events available differ a lot from older atoms.  They also
0350d3
support offcore events and some sort of RAPL support.
0350d3
---
0350d3
 src/papi_events.csv | 24 ++++++++++++++++++++++++
0350d3
 1 file changed, 24 insertions(+)
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 42c1da0..0e1163e 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -356,6 +356,7 @@ CPU,ix86arch
0350d3
 PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
0350d3
 PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS
0350d3
 #
0350d3
+# Intel Atom
0350d3
 CPU,Intel Atom
0350d3
 CPU,atom
0350d3
 #
0350d3
@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR
0350d3
 PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR
0350d3
 PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR
0350d3
 #
0350d3
+# Intel Atom Silvermont
0350d3
+CPU,slm
0350d3
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
0350d3
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
0350d3
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
0350d3
+PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES
0350d3
+PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES
0350d3
+PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES
0350d3
+PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES
0350d3
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES
0350d3
+PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES
0350d3
+PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES
0350d3
+#
0350d3
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC
0350d3
+PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
0350d3
+PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED
0350d3
+#
0350d3
+PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS
0350d3
+#
0350d3
+#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87
0350d3
+PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL
0350d3
+PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV
0350d3
+#
0350d3
 CPU,Intel Nehalem
0350d3
 CPU,Intel Westmere
0350d3
 CPU,nhm
0350d3
-- 
0350d3
1.8.3.1
0350d3