Blame SOURCES/papi-intel.patch

c7ea89
commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
c7ea89
Author: James Ralph <ralph@icl.utk.edu>
c7ea89
Date:   Mon Aug 26 10:23:52 2013 -0400
c7ea89
c7ea89
    papi_events.csv: First draft preset events on HSW
c7ea89
    
c7ea89
    Contributed by Nils Smeds
c7ea89
    -------------------------
c7ea89
    Here is a suggestion for addition to Hsw counters. These are not
c7ea89
    rigorously tested. It compiles and loads.
c7ea89
    I'm rather uncertain on many of the events so I am hoping that adding
c7ea89
    events like this will get some useful
c7ea89
    feedback from the community so that we can improve.
c7ea89
    -------------------------
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 2e0da80..39ec16c 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
c7ea89
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
c7ea89
 #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
c7ea89
 #
c7ea89
+
c7ea89
+# Intel Haswell events (and most likely also Sandy Bridge)
c7ea89
+CPU,hsw
c7ea89
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
c7ea89
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
c7ea89
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
c7ea89
+# Loads and stores
c7ea89
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
c7ea89
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
+# L1 cache
c7ea89
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
c7ea89
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
c7ea89
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
+# L2 cache
c7ea89
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
c7ea89
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
c7ea89
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
c7ea89
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
c7ea89
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
c7ea89
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
c7ea89
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
c7ea89
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
c7ea89
+# L3 cache
c7ea89
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
c7ea89
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
c7ea89
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
c7ea89
+# SMP
c7ea89
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
c7ea89
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
c7ea89
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
c7ea89
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
c7ea89
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
c7ea89
+# TLB
c7ea89
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
c7ea89
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
c7ea89
+# Prefetcher
c7ea89
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
c7ea89
+# Stalls
c7ea89
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
c7ea89
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
c7ea89
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
c7ea89
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
c7ea89
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
c7ea89
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
c7ea89
+# Branches
c7ea89
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
c7ea89
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
c7ea89
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
c7ea89
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
c7ea89
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
c7ea89
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
c7ea89
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
c7ea89
+# End of hsw list
c7ea89
+#
c7ea89
 CPU,Intel Core2
c7ea89
 CPU,Intel Core
c7ea89
 CPU,core
c7ea89
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
c7ea89
Author: James Ralph <ralph@icl.utk.edu>
c7ea89
Date:   Tue Sep 17 09:06:50 2013 -0400
c7ea89
c7ea89
    papi_events.csv: Add PAPI_L1_ICM for Haswell
c7ea89
    
c7ea89
    Thanks to Maurice Marks of Unisys for the contribution
c7ea89
    -------------
c7ea89
    I've continued testing on Haswell. By comparison with Vtune and Emon on
c7ea89
    Haswell I found that we can use
c7ea89
    the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
c7ea89
    measure.
c7ea89
    
c7ea89
    Attached is my current version of papi_events.csv with Haswell fixes.
c7ea89
    -------------
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 39ec16c..01821a8 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
c7ea89
 PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
c7ea89
 PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
c7ea89
 PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
 # L2 cache
c7ea89
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
c7ea89
 PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
c7ea89
Author: Vince Weaver <vincent.weaver@maine.edu>
c7ea89
Date:   Tue Nov 5 16:09:11 2013 -0500
c7ea89
c7ea89
    Add floating point events for IvyBridge
c7ea89
    
c7ea89
    Now that Intel has documented them and libpfm4 supports them, PAPI
c7ea89
    can use them.  We just use the same events as on sandybridge.
c7ea89
    
c7ea89
    Tested on an ivybridge system.
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 01821a8..42c1da0 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
c7ea89
 PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
c7ea89
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
c7ea89
 #
c7ea89
+# Counts scalars only; no SSE or AVX is counted; includes speculative
c7ea89
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
+#
c7ea89
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
c7ea89
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
c7ea89
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
c7ea89
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
c7ea89
+#
c7ea89
 # Intel SandyBridge only
c7ea89
 CPU,snb
c7ea89
 CPU,snb_ep
c7ea89
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
c7ea89
 PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
c7ea89
 PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
c7ea89
 #
c7ea89
-# Counts scalars only; no SSE or AVX is counted; includes speculative
c7ea89
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
-#
c7ea89
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
c7ea89
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
c7ea89
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
c7ea89
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
c7ea89
-#
c7ea89
 # Intel IvyBridge only
c7ea89
 CPU,ivb
c7ea89
 CPU,ivb_ep
c7ea89
From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001
c7ea89
From: Vince Weaver <vincent.weaver@maine.edu>
c7ea89
Date: Fri, 6 Dec 2013 13:03:39 -0500
c7ea89
Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support
c7ea89
c7ea89
This is based on the manual, as I don't actually have one of these
c7ea89
chips.
c7ea89
c7ea89
The events available differ a lot from older atoms.  They also
c7ea89
support offcore events and some sort of RAPL support.
c7ea89
---
c7ea89
 src/papi_events.csv | 24 ++++++++++++++++++++++++
c7ea89
 1 file changed, 24 insertions(+)
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 42c1da0..0e1163e 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -356,6 +356,7 @@ CPU,ix86arch
c7ea89
 PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
c7ea89
 PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS
c7ea89
 #
c7ea89
+# Intel Atom
c7ea89
 CPU,Intel Atom
c7ea89
 CPU,atom
c7ea89
 #
c7ea89
@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR
c7ea89
 PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR
c7ea89
 PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR
c7ea89
 #
c7ea89
+# Intel Atom Silvermont
c7ea89
+CPU,slm
c7ea89
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
c7ea89
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
c7ea89
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
c7ea89
+PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES
c7ea89
+PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES
c7ea89
+PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES
c7ea89
+PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES
c7ea89
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES
c7ea89
+PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES
c7ea89
+PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES
c7ea89
+#
c7ea89
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC
c7ea89
+PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED
c7ea89
+PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED
c7ea89
+#
c7ea89
+PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS
c7ea89
+#
c7ea89
+#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87
c7ea89
+PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL
c7ea89
+PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV
c7ea89
+#
c7ea89
 CPU,Intel Nehalem
c7ea89
 CPU,Intel Westmere
c7ea89
 CPU,nhm
c7ea89
-- 
c7ea89
1.8.3.1
c7ea89
c7ea89
commit c50e0dfed7e0624061d81059bbf6157ae6873e11
c7ea89
Author: Vince Weaver <vincent.weaver@maine.edu>
c7ea89
Date:   Wed Mar 26 16:41:34 2014 -0400
c7ea89
c7ea89
    remove Hawell PAPI_L1_TCA predefined event
c7ea89
    
c7ea89
    It was making the tests complain a lot, and as far as I can
c7ea89
    tell there's no way to make the event.
c7ea89
    
c7ea89
    It had been set to
c7ea89
    
c7ea89
       MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
c7ea89
    
c7ea89
    but you cannot have multiple umasks on MEM_LOAD_UOPS_RETIRED
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 22a82ad..e449529 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -641,7 +641,6 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
c7ea89
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
 PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
 # L1 cache
c7ea89
-PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
c7ea89
 PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
c7ea89
 PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
commit a870eef277ea782e15f91582ce87c46652932e77
c7ea89
Author: James Ralph <ralph@icl.utk.edu>
c7ea89
Date:   Wed Apr 9 16:18:11 2014 -0400
c7ea89
c7ea89
    Add x87 counts to FP_INS and FP_OPS on [S|I]VB
c7ea89
    
c7ea89
    In Sandy/Ivy Bridge processors it is safe to assume 3 general counters
c7ea89
    and the definition of FP_OPS/INS was only using 2. This commit changes
c7ea89
     the definition PAPI_FP_INS/OPS to include FP_COMP_OPS_EXEC:X87
c7ea89
    
c7ea89
    The effect appears minimal and improves counts with naively compiled
c7ea89
    LAPACK. ( gfortran version 4.6 on an IvyBridge with the default build
c7ea89
    paramaters for LAPACK produced no SSE/AVX ins, it did all its work
c7ea89
    with X87 ins)
c7ea89
    
c7ea89
    If issues arise, this is safe to revert.
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index e449529..441844e 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -601,8 +601,8 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
c7ea89
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
c7ea89
 #
c7ea89
 # Counts scalars only; no SSE or AVX is counted; includes speculative
c7ea89
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
c7ea89
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
c7ea89
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87
c7ea89
 #
c7ea89
 PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
c7ea89
 PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
c7ea89
commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458
c7ea89
Author: James Ralph <ralph@icl.utk.edu>
c7ea89
Date:   Fri Jun 27 14:06:17 2014 -0400
c7ea89
c7ea89
    Update preset mappings for Intel Haswell
c7ea89
    
c7ea89
    Patch due to Michel Brown @ Bull, many thanks.
c7ea89
    ----------------------------------------------
c7ea89
    As I did for some earlier CPUs, I have made an update to the Haswell Preset
c7ea89
    Cache Events to provide a more accurate and a more complete set of preset
c7ea89
    cache events.
c7ea89
    
c7ea89
    I have validated with a test program all the events except the I-cache events.
c7ea89
    The Haswell CPU used for the test was an “Intel(R) Xeon(R) CPU E5-2683 v3 @
c7ea89
    2.00GHz” model 63.
c7ea89
    
c7ea89
    I defined a couple of events that are not currently accepted by the preset
c7ea89
    mechanism: PAPI_L2_LDH (Level 2 Cache Load Hits) and PAPI_L3_LDH (Level 3 Cache
c7ea89
    Load Hits).  I have validated the native events used for these presets.
c7ea89
    
c7ea89
    I will leave it to you decide whether these presets should be included.
c7ea89
    They are in the file; but are commented out.
c7ea89
    
c7ea89
    The preset file for Haswell is already organized with the L1, L2 and L3 events
c7ea89
    grouped together.  For the preset definitions I felt it necessary to replace,
c7ea89
    I commented them out.  For the ones I added I put in a section following the
c7ea89
    current group beginning with a “# Added by FMB” comment.
c7ea89
    ----------------------------------------------
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index dbbc8d8..97fd2ca 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -621,6 +621,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
c7ea89
 #
c7ea89
 # Intel IvyBridge only
c7ea89
 CPU,ivb
c7ea89
+# Added by FMB
c7ea89
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
c7ea89
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
c7ea89
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
c7ea89
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
c7ea89
 CPU,ivb_ep
c7ea89
 #
c7ea89
 PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
c7ea89
@@ -646,19 +651,42 @@ PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
 # L2 cache
c7ea89
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
c7ea89
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
c7ea89
+#PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
+#PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
c7ea89
 PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
c7ea89
 PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
c7ea89
 PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
c7ea89
 PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
c7ea89
-PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
c7ea89
-PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
c7ea89
+#PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
c7ea89
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
c7ea89
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
c7ea89
+# Added by FMB
c7ea89
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
c7ea89
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO
c7ea89
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
c7ea89
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
c7ea89
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
c7ea89
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
c7ea89
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
c7ea89
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO
c7ea89
 # L3 cache
c7ea89
-PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
c7ea89
-PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
c7ea89
-PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
c7ea89
+#PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
c7ea89
+#PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
c7ea89
+#PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
c7ea89
+# Added by FMB
c7ea89
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
c7ea89
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
c7ea89
+PRESET,PAPI_L3_DCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
c7ea89
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
c7ea89
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
c7ea89
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
c7ea89
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
c7ea89
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
c7ea89
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
c7ea89
+PRESET,PAPI_L3_TCR,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:DEMAND_RFO_MISS
c7ea89
+PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS
c7ea89
 # SMP
c7ea89
 PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
c7ea89
 PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
c7ea89
commit bf55b6b72f3ad6df59050739c248bc94ad9c6722
c7ea89
Author: James Ralph <ralph@icl.utk.edu>
c7ea89
Date:   Thu Jul 24 11:02:36 2014 -0400
c7ea89
c7ea89
    Update HSW presets
c7ea89
    
c7ea89
    Thanks to Gary Mohr
c7ea89
    -------------------
c7ea89
    Previously we sent updates to the PAPI preset event definitions to improve the
c7ea89
    preset cache events on Haswell processors.  In checking the latest source, it
c7ea89
    looks like the L1 cache events changes did not get applied quite right.  Here
c7ea89
    is a patch to the latest source that will make it the way we had intended.
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index 97fd2ca..aea3b04 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -646,9 +646,14 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
c7ea89
 PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
 PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
c7ea89
 # L1 cache
c7ea89
-PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
c7ea89
-PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
+#PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
c7ea89
+#PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
c7ea89
 PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
c7ea89
+# Added by FMB
c7ea89
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
c7ea89
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
c7ea89
+PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
c7ea89
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
c7ea89
 # L2 cache
c7ea89
 PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
c7ea89
 #PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
c7ea89
From bb8143e44aa9c249c79c3fd820e55678b01b19fa Mon Sep 17 00:00:00 2001
c7ea89
From: William Cohen <wcohen@redhat.com>
c7ea89
Date: Sun, 28 Sep 2014 11:32:43 -0400
c7ea89
Subject: [PATCH] Remove stray Intel Haswell events from Intel Ivy Bridge
c7ea89
 presets
c7ea89
c7ea89
Commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 added some events
c7ea89
meant for Intel Haswell to the Intel Ivy bridge presets.  This patch
c7ea89
removes those stray events.  Without this patch on Intel Ivy Bridge
c7ea89
machines would see messages like the following:
c7ea89
c7ea89
PAPI Error: papi_preset: Error finding event L2_TRANS:DEMAND_DATA_RD.
c7ea89
PAPI Error: papi_preset: Error finding event L2_RQSTS:ALL_DEMAND_REFERENCES.
c7ea89
---
c7ea89
 src/papi_events.csv | 5 -----
c7ea89
 1 file changed, 5 deletions(-)
c7ea89
c7ea89
diff --git a/src/papi_events.csv b/src/papi_events.csv
c7ea89
index aea3b04..d6566c0 100644
c7ea89
--- a/src/papi_events.csv
c7ea89
+++ b/src/papi_events.csv
c7ea89
@@ -621,11 +621,6 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:
c7ea89
 #
c7ea89
 # Intel IvyBridge only
c7ea89
 CPU,ivb
c7ea89
-# Added by FMB
c7ea89
-PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
c7ea89
-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD
c7ea89
-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB
c7ea89
-PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
c7ea89
 CPU,ivb_ep
c7ea89
 #
c7ea89
 PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO
c7ea89
-- 
c7ea89
1.9.3
c7ea89