commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
Author: James Ralph <ralph@icl.utk.edu>
Date: Mon Aug 26 10:23:52 2013 -0400
papi_events.csv: First draft preset events on HSW
Contributed by Nils Smeds
-------------------------
Here is a suggestion for addition to Hsw counters. These are not
rigorously tested. It compiles and loads.
I'm rather uncertain on many of the events so I am hoping that adding
events like this will get some useful
feedback from the community so that we can improve.
-------------------------
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 2e0da80..39ec16c 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
#PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
#
+
+# Intel Haswell events (and most likely also Sandy Bridge)
+CPU,hsw
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
+# Loads and stores
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
+# L1 cache
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
+# L2 cache
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
+# L3 cache
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
+# SMP
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
+# TLB
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
+# Prefetcher
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
+# Stalls
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
+# Branches
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
+# End of hsw list
+#
CPU,Intel Core2
CPU,Intel Core
CPU,core
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
Author: James Ralph <ralph@icl.utk.edu>
Date: Tue Sep 17 09:06:50 2013 -0400
papi_events.csv: Add PAPI_L1_ICM for Haswell
Thanks to Maurice Marks of Unisys for the contribution
-------------
I've continued testing on Haswell. By comparison with Vtune and Emon on
Haswell I found that we can use
the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
measure.
Attached is my current version of papi_events.csv with Haswell fixes.
-------------
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 39ec16c..01821a8 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
Author: Vince Weaver <vincent.weaver@maine.edu>
Date: Tue Nov 5 16:09:11 2013 -0500
Add floating point events for IvyBridge
Now that Intel has documented them and libpfm4 supports them, PAPI
can use them. We just use the same events as on sandybridge.
Tested on an ivybridge system.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 01821a8..42c1da0 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
#
+# Counts scalars only; no SSE or AVX is counted; includes speculative
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
+#
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
+#
# Intel SandyBridge only
CPU,snb
CPU,snb_ep
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
#
-# Counts scalars only; no SSE or AVX is counted; includes speculative
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
-#
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
-#
# Intel IvyBridge only
CPU,ivb
CPU,ivb_ep
From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Fri, 6 Dec 2013 13:03:39 -0500
Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support
This is based on the manual, as I don't actually have one of these
chips.
The events available differ a lot from older atoms. They also
support offcore events and some sort of RAPL support.
---
src/papi_events.csv | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 42c1da0..0e1163e 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -356,6 +356,7 @@ CPU,ix86arch
PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS
#
+# Intel Atom
CPU,Intel Atom
CPU,atom
#
@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR
PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR
PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR
#
+# Intel Atom Silvermont
+CPU,slm
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
+PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES
+PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES
+PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES
+PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES
+PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES
+PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES
+#
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC
+PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
+PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED
+#
+PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS
+#
+#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87
+PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL
+PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV
+#
CPU,Intel Nehalem
CPU,Intel Westmere
CPU,nhm
--
1.8.3.1
commit c50e0dfed7e0624061d81059bbf6157ae6873e11
Author: Vince Weaver <vincent.weaver@maine.edu>
Date: Wed Mar 26 16:41:34 2014 -0400
remove Hawell PAPI_L1_TCA predefined event
It was making the tests complain a lot, and as far as I can
tell there's no way to make the event.
It had been set to
MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
but you cannot have multiple umasks on MEM_LOAD_UOPS_RETIRED
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 22a82ad..e449529 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -641,7 +641,6 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
# L1 cache
-PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
commit a870eef277ea782e15f91582ce87c46652932e77
Author: James Ralph <ralph@icl.utk.edu>
Date: Wed Apr 9 16:18:11 2014 -0400
Add x87 counts to FP_INS and FP_OPS on [S|I]VB
In Sandy/Ivy Bridge processors it is safe to assume 3 general counters
and the definition of FP_OPS/INS was only using 2. This commit changes
the definition PAPI_FP_INS/OPS to include FP_COMP_OPS_EXEC:X87
The effect appears minimal and improves counts with naively compiled
LAPACK. ( gfortran version 4.6 on an IvyBridge with the default build
paramaters for LAPACK produced no SSE/AVX ins, it did all its work
with X87 ins)
If issues arise, this is safe to revert.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index e449529..441844e 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -601,8 +601,8 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
#
# Counts scalars only; no SSE or AVX is counted; includes speculative
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
#
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458
Author: James Ralph <ralph@icl.utk.edu>
Date: Fri Jun 27 14:06:17 2014 -0400
Update preset mappings for Intel Haswell
Patch due to Michel Brown @ Bull, many thanks.
----------------------------------------------
As I did for some earlier CPUs, I have made an update to the Haswell Preset
Cache Events to provide a more accurate and a more complete set of preset
cache events.
I have validated with a test program all the events except the I-cache events.
The Haswell CPU used for the test was an “Intel(R) Xeon(R) CPU E5-2683 v3 @
2.00GHz” model 63.
I defined a couple of events that are not currently accepted by the preset
mechanism: PAPI_L2_LDH (Level 2 Cache Load Hits) and PAPI_L3_LDH (Level 3 Cache
Load Hits). I have validated the native events used for these presets.
I will leave it to you decide whether these presets should be included.
They are in the file; but are commented out.
The preset file for Haswell is already organized with the L1, L2 and L3 events
grouped together. For the preset definitions I felt it necessary to replace,
I commented them out. For the ones I added I put in a section following the
current group beginning with a “# Added by FMB” comment.
----------------------------------------------
diff --git a/src/papi_events.csv b/src/papi_events.csv
index dbbc8d8..97fd2ca 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -621,6 +621,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
#
# Intel IvyBridge only
CPU,ivb
+# Added by FMB
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
CPU,ivb_ep
#
PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
@@ -646,19 +651,42 @@ PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
+#PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
+#PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
-PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
-PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
+#PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
+# Added by FMB
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO
# L3 cache
-PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
-PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
-PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
+#PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
+#PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
+#PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
+# Added by FMB
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
+PRESET,PAPI_L3_DCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
+PRESET,PAPI_L3_TCR,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:DEMAND_RFO_MISS
+PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
# SMP
PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
commit bf55b6b72f3ad6df59050739c248bc94ad9c6722
Author: James Ralph <ralph@icl.utk.edu>
Date: Thu Jul 24 11:02:36 2014 -0400
Update HSW presets
Thanks to Gary Mohr
-------------------
Previously we sent updates to the PAPI preset event definitions to improve the
preset cache events on Haswell processors. In checking the latest source, it
looks like the L1 cache events changes did not get applied quite right. Here
is a patch to the latest source that will make it the way we had intended.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 97fd2ca..aea3b04 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -646,9 +646,14 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
# L1 cache
-PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
-PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
+#PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
+#PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
+# Added by FMB
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
#PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
From bb8143e44aa9c249c79c3fd820e55678b01b19fa Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Sun, 28 Sep 2014 11:32:43 -0400
Subject: [PATCH] Remove stray Intel Haswell events from Intel Ivy Bridge
presets
Commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 added some events
meant for Intel Haswell to the Intel Ivy bridge presets. This patch
removes those stray events. Without this patch on Intel Ivy Bridge
machines would see messages like the following:
PAPI Error: papi_preset: Error finding event L2_TRANS:DEMAND_DATA_RD.
PAPI Error: papi_preset: Error finding event L2_RQSTS:ALL_DEMAND_REFERENCES.
---
src/papi_events.csv | 5 -----
1 file changed, 5 deletions(-)
diff --git a/src/papi_events.csv b/src/papi_events.csv
index aea3b04..d6566c0 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -621,11 +621,6 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
#
# Intel IvyBridge only
CPU,ivb
-# Added by FMB
-PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
-PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
CPU,ivb_ep
#
PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
--
1.9.3