0350d3
commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
0350d3
Author: James Ralph <ralph@icl.utk.edu>
0350d3
Date:   Mon Aug 26 10:23:52 2013 -0400
0350d3
0350d3
    papi_events.csv: First draft preset events on HSW
0350d3
    
0350d3
    Contributed by Nils Smeds
0350d3
    -------------------------
0350d3
    Here is a suggestion for addition to Hsw counters. These are not
0350d3
    rigorously tested. It compiles and loads.
0350d3
    I'm rather uncertain on many of the events so I am hoping that adding
0350d3
    events like this will get some useful
0350d3
    feedback from the community so that we can improve.
0350d3
    -------------------------
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 2e0da80..39ec16c 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
0350d3
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
0350d3
 #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
0350d3
 #
0350d3
+
0350d3
+# Intel Haswell events (and most likely also Sandy Bridge)
0350d3
+CPU,hsw
0350d3
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
0350d3
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
0350d3
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
0350d3
+# Loads and stores
0350d3
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
0350d3
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
0350d3
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
0350d3
+# L1 cache
0350d3
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
0350d3
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
0350d3
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
0350d3
+# L2 cache
0350d3
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
0350d3
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
0350d3
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
0350d3
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
0350d3
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
0350d3
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
0350d3
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
0350d3
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
0350d3
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
0350d3
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
0350d3
+# L3 cache
0350d3
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
0350d3
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
0350d3
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
0350d3
+# SMP
0350d3
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
0350d3
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
0350d3
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
0350d3
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
0350d3
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
0350d3
+# TLB
0350d3
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
0350d3
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
0350d3
+# Prefetcher
0350d3
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
0350d3
+# Stalls
0350d3
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
0350d3
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
0350d3
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
0350d3
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
0350d3
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
0350d3
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
0350d3
+# Branches
0350d3
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
0350d3
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
0350d3
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
0350d3
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
0350d3
+# End of hsw list
0350d3
+#
0350d3
 CPU,Intel Core2
0350d3
 CPU,Intel Core
0350d3
 CPU,core
0350d3
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
0350d3
Author: James Ralph <ralph@icl.utk.edu>
0350d3
Date:   Tue Sep 17 09:06:50 2013 -0400
0350d3
0350d3
    papi_events.csv: Add PAPI_L1_ICM for Haswell
0350d3
    
0350d3
    Thanks to Maurice Marks of Unisys for the contribution
0350d3
    -------------
0350d3
    I've continued testing on Haswell. By comparison with Vtune and Emon on
0350d3
    Haswell I found that we can use
0350d3
    the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
0350d3
    measure.
0350d3
    
0350d3
    Attached is my current version of papi_events.csv with Haswell fixes.
0350d3
    -------------
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 39ec16c..01821a8 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
0350d3
 PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
0350d3
 PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
0350d3
 PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
0350d3
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
0350d3
 # L2 cache
0350d3
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
0350d3
 PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
0350d3
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
0350d3
Author: Vince Weaver <vincent.weaver@maine.edu>
0350d3
Date:   Tue Nov 5 16:09:11 2013 -0500
0350d3
0350d3
    Add floating point events for IvyBridge
0350d3
    
0350d3
    Now that Intel has documented them and libpfm4 supports them, PAPI
0350d3
    can use them.  We just use the same events as on sandybridge.
0350d3
    
0350d3
    Tested on an ivybridge system.
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 01821a8..42c1da0 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
0350d3
 PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
0350d3
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
0350d3
 #
0350d3
+# Counts scalars only; no SSE or AVX is counted; includes speculative
0350d3
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
+#
0350d3
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
+#
0350d3
 # Intel SandyBridge only
0350d3
 CPU,snb
0350d3
 CPU,snb_ep
0350d3
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
0350d3
 PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
0350d3
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
0350d3
 #
0350d3
-# Counts scalars only; no SSE or AVX is counted; includes speculative
0350d3
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
0350d3
-#
0350d3
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
0350d3
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
0350d3
-#
0350d3
 # Intel IvyBridge only
0350d3
 CPU,ivb
0350d3
 CPU,ivb_ep
0350d3
From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001
0350d3
From: Vince Weaver <vincent.weaver@maine.edu>
0350d3
Date: Fri, 6 Dec 2013 13:03:39 -0500
0350d3
Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support
0350d3
0350d3
This is based on the manual, as I don't actually have one of these
0350d3
chips.
0350d3
0350d3
The events available differ a lot from older atoms.  They also
0350d3
support offcore events and some sort of RAPL support.
0350d3
---
0350d3
 src/papi_events.csv | 24 ++++++++++++++++++++++++
0350d3
 1 file changed, 24 insertions(+)
0350d3
0350d3
diff --git a/src/papi_events.csv b/src/papi_events.csv
0350d3
index 42c1da0..0e1163e 100644
0350d3
--- a/src/papi_events.csv
0350d3
+++ b/src/papi_events.csv
0350d3
@@ -356,6 +356,7 @@ CPU,ix86arch
0350d3
 PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
0350d3
 PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS
0350d3
 #
0350d3
+# Intel Atom
0350d3
 CPU,Intel Atom
0350d3
 CPU,atom
0350d3
 #
0350d3
@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR
0350d3
 PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR
0350d3
 PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR
0350d3
 #
0350d3
+# Intel Atom Silvermont
0350d3
+CPU,slm
0350d3
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
0350d3
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
0350d3
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
0350d3
+PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES
0350d3
+PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES
0350d3
+PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES
0350d3
+PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES
0350d3
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES
0350d3
+PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES
0350d3
+PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES
0350d3
+#
0350d3
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC
0350d3
+PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
0350d3
+PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED
0350d3
+#
0350d3
+PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS
0350d3
+#
0350d3
+#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87
0350d3
+PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL
0350d3
+PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV
0350d3
+#
0350d3
 CPU,Intel Nehalem
0350d3
 CPU,Intel Westmere
0350d3
 CPU,nhm
0350d3
-- 
0350d3
1.8.3.1
0350d3
da2d47
commit c50e0dfed7e0624061d81059bbf6157ae6873e11
da2d47
Author: Vince Weaver <vincent.weaver@maine.edu>
da2d47
Date:   Wed Mar 26 16:41:34 2014 -0400
da2d47
da2d47
    remove Hawell PAPI_L1_TCA predefined event
da2d47
    
da2d47
    It was making the tests complain a lot, and as far as I can
da2d47
    tell there's no way to make the event.
da2d47
    
da2d47
    It had been set to
da2d47
    
da2d47
       MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
da2d47
    
da2d47
    but you cannot have multiple umasks on MEM_LOAD_UOPS_RETIRED
da2d47
da2d47
diff --git a/src/papi_events.csv b/src/papi_events.csv
da2d47
index 22a82ad..e449529 100644
da2d47
--- a/src/papi_events.csv
da2d47
+++ b/src/papi_events.csv
da2d47
@@ -641,7 +641,6 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
da2d47
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
da2d47
 PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
da2d47
 # L1 cache
da2d47
-PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
da2d47
 PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
da2d47
 PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
da2d47
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
da2d47
commit a870eef277ea782e15f91582ce87c46652932e77
da2d47
Author: James Ralph <ralph@icl.utk.edu>
da2d47
Date:   Wed Apr 9 16:18:11 2014 -0400
da2d47
da2d47
    Add x87 counts to FP_INS and FP_OPS on [S|I]VB
da2d47
    
da2d47
    In Sandy/Ivy Bridge processors it is safe to assume 3 general counters
da2d47
    and the definition of FP_OPS/INS was only using 2. This commit changes
da2d47
     the definition PAPI_FP_INS/OPS to include FP_COMP_OPS_EXEC:X87
da2d47
    
da2d47
    The effect appears minimal and improves counts with naively compiled
da2d47
    LAPACK. ( gfortran version 4.6 on an IvyBridge with the default build
da2d47
    paramaters for LAPACK produced no SSE/AVX ins, it did all its work
da2d47
    with X87 ins)
da2d47
    
da2d47
    If issues arise, this is safe to revert.
da2d47
da2d47
diff --git a/src/papi_events.csv b/src/papi_events.csv
da2d47
index e449529..441844e 100644
da2d47
--- a/src/papi_events.csv
da2d47
+++ b/src/papi_events.csv
da2d47
@@ -601,8 +601,8 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
da2d47
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
da2d47
 #
da2d47
 # Counts scalars only; no SSE or AVX is counted; includes speculative
da2d47
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
da2d47
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
da2d47
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
da2d47
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
da2d47
 #
da2d47
 PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
da2d47
 PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
da2d47
commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458
da2d47
Author: James Ralph <ralph@icl.utk.edu>
da2d47
Date:   Fri Jun 27 14:06:17 2014 -0400
da2d47
da2d47
    Update preset mappings for Intel Haswell
da2d47
    
da2d47
    Patch due to Michel Brown @ Bull, many thanks.
da2d47
    ----------------------------------------------
da2d47
    As I did for some earlier CPUs, I have made an update to the Haswell Preset
da2d47
    Cache Events to provide a more accurate and a more complete set of preset
da2d47
    cache events.
da2d47
    
da2d47
    I have validated with a test program all the events except the I-cache events.
da2d47
    The Haswell CPU used for the test was an “Intel(R) Xeon(R) CPU E5-2683 v3 @
da2d47
    2.00GHz” model 63.
da2d47
    
da2d47
    I defined a couple of events that are not currently accepted by the preset
da2d47
    mechanism: PAPI_L2_LDH (Level 2 Cache Load Hits) and PAPI_L3_LDH (Level 3 Cache
da2d47
    Load Hits).  I have validated the native events used for these presets.
da2d47
    
da2d47
    I will leave it to you decide whether these presets should be included.
da2d47
    They are in the file; but are commented out.
da2d47
    
da2d47
    The preset file for Haswell is already organized with the L1, L2 and L3 events
da2d47
    grouped together.  For the preset definitions I felt it necessary to replace,
da2d47
    I commented them out.  For the ones I added I put in a section following the
da2d47
    current group beginning with a “# Added by FMB” comment.
da2d47
    ----------------------------------------------
da2d47
da2d47
diff --git a/src/papi_events.csv b/src/papi_events.csv
da2d47
index dbbc8d8..97fd2ca 100644
da2d47
--- a/src/papi_events.csv
da2d47
+++ b/src/papi_events.csv
da2d47
@@ -621,6 +621,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
da2d47
 #
da2d47
 # Intel IvyBridge only
da2d47
 CPU,ivb
da2d47
+# Added by FMB
da2d47
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
da2d47
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
da2d47
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
da2d47
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
da2d47
 CPU,ivb_ep
da2d47
 #
da2d47
 PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
da2d47
@@ -646,19 +651,42 @@ PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
da2d47
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
da2d47
 # L2 cache
da2d47
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
da2d47
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
da2d47
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
da2d47
+#PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
da2d47
+#PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
da2d47
 PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
da2d47
 PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
da2d47
 PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
da2d47
 PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
da2d47
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
da2d47
-PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
da2d47
-PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
da2d47
+#PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
da2d47
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
da2d47
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
da2d47
+# Added by FMB
da2d47
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
da2d47
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO
da2d47
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
da2d47
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
da2d47
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
da2d47
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
da2d47
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
da2d47
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
da2d47
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
da2d47
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO
da2d47
 # L3 cache
da2d47
-PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
da2d47
-PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
da2d47
-PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
da2d47
+#PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
da2d47
+#PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
da2d47
+#PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
da2d47
+# Added by FMB
da2d47
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
da2d47
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
da2d47
+PRESET,PAPI_L3_DCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
da2d47
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
da2d47
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
da2d47
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
da2d47
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
da2d47
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
da2d47
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
da2d47
+PRESET,PAPI_L3_TCR,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:DEMAND_RFO_MISS
da2d47
+PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
da2d47
 # SMP
da2d47
 PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
da2d47
 PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
da2d47
commit bf55b6b72f3ad6df59050739c248bc94ad9c6722
da2d47
Author: James Ralph <ralph@icl.utk.edu>
da2d47
Date:   Thu Jul 24 11:02:36 2014 -0400
da2d47
da2d47
    Update HSW presets
da2d47
    
da2d47
    Thanks to Gary Mohr
da2d47
    -------------------
da2d47
    Previously we sent updates to the PAPI preset event definitions to improve the
da2d47
    preset cache events on Haswell processors.  In checking the latest source, it
da2d47
    looks like the L1 cache events changes did not get applied quite right.  Here
da2d47
    is a patch to the latest source that will make it the way we had intended.
da2d47
da2d47
diff --git a/src/papi_events.csv b/src/papi_events.csv
da2d47
index 97fd2ca..aea3b04 100644
da2d47
--- a/src/papi_events.csv
da2d47
+++ b/src/papi_events.csv
da2d47
@@ -646,9 +646,14 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
da2d47
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
da2d47
 PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
da2d47
 # L1 cache
da2d47
-PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
da2d47
-PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
da2d47
+#PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
da2d47
+#PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
da2d47
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
da2d47
+# Added by FMB
da2d47
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
da2d47
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
da2d47
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
da2d47
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
da2d47
 # L2 cache
da2d47
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
da2d47
 #PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
da2d47
From bb8143e44aa9c249c79c3fd820e55678b01b19fa Mon Sep 17 00:00:00 2001
da2d47
From: William Cohen <wcohen@redhat.com>
da2d47
Date: Sun, 28 Sep 2014 11:32:43 -0400
da2d47
Subject: [PATCH] Remove stray Intel Haswell events from Intel Ivy Bridge
da2d47
 presets
da2d47
da2d47
Commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 added some events
da2d47
meant for Intel Haswell to the Intel Ivy bridge presets.  This patch
da2d47
removes those stray events.  Without this patch on Intel Ivy Bridge
da2d47
machines would see messages like the following:
da2d47
da2d47
PAPI Error: papi_preset: Error finding event L2_TRANS:DEMAND_DATA_RD.
da2d47
PAPI Error: papi_preset: Error finding event L2_RQSTS:ALL_DEMAND_REFERENCES.
da2d47
---
da2d47
 src/papi_events.csv | 5 -----
da2d47
 1 file changed, 5 deletions(-)
da2d47
da2d47
diff --git a/src/papi_events.csv b/src/papi_events.csv
da2d47
index aea3b04..d6566c0 100644
da2d47
--- a/src/papi_events.csv
da2d47
+++ b/src/papi_events.csv
da2d47
@@ -621,11 +621,6 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
da2d47
 #
da2d47
 # Intel IvyBridge only
da2d47
 CPU,ivb
da2d47
-# Added by FMB
da2d47
-PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
da2d47
-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
da2d47
-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
da2d47
-PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
da2d47
 CPU,ivb_ep
da2d47
 #
da2d47
 PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
da2d47
-- 
da2d47
1.9.3
da2d47