commit adbae8cd948234539d3ad63363878011e5a59949 Author: Heike McCraw Date: Thu Dec 11 12:07:38 2014 -0500 Update presets for Intel Haswell and Haswell-EP (according to the updates of the libpfm4 event table for Intel Haswell and Haswell-EP). These mods have not been tested due to lacking access to an Intel Haswell system. diff --git a/src/papi_events.csv b/src/papi_events.csv index d27a41d..0a17ab3 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -634,7 +634,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL # Intel Haswell events (and most likely also Sandy Bridge) CPU,hsw CPU,hsw_ep -PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK # Loads and stores @@ -692,7 +692,7 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO -PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM +PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD # TLB PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK commit 1dbc7038450d53c7e8724f9e2cb0eb773bdc97bf Author: Heike McCraw Date: Thu May 28 13:46:52 2015 -0400 Temporary workaround: exclude_guest and exclude_host bits have to be zero in the attribute structure (via :mg=1:mh=1). exclude_guest wasn't introduced until Linux 3.2, and so, running newer PAPI versions with libpfm4 that allows exclude_guest to be set on older kernels completely breaks all events unless :mg=1:mh=1 is passed. PAPI code passes the attribute block created by libpfm4 to the kernel without modifying its contents. It would be better if libpfm4 provides different defaults for these attribute bits. This commit, however, enforces mg=1 and mh=1 for all Xeon Phi predefined events. A problem with always enforcding mg=1 (i.e. exclude_guest=0) is that if exclude_guest=1 (mg=0) is needed (e.g., to use PEBS) users have to fall back to using native events with the appropriate qualifier settings. This issue has been extensively discussed on the mailing list (Subject “KNC events", discussion started on 11/26/14) where more details can be found. diff --git a/src/papi_events.csv b/src/papi_events.csv index 8fe0ae1..74da53c 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1761,20 +1761,20 @@ PRESET,PAPI_TLB_TL,DERIVED_POSTFIX,N0|N1|+|N2|+|,PEVT_MMU_TLB_MISS_DIRECT_DERAT, # Intel MIC / Xeon-Phi / Knights Corner CPU,knc # -PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES -PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED -PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS -PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK -PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ -PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED -PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK -PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ -PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE -PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS -PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE -PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED -PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS -PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED +PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES:mg=1:mh=1 +PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED:mg=1:mh=1 +PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS:mg=1:mh=1 +PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK:mg=1:mh=1 +PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ:mg=1:mh=1 +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:mg=1:mh=1 +PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK:mg=1:mh=1 +PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ:mg=1:mh=1 +PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE:mg=1:mh=1 +PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS:mg=1:mh=1 +PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE:mg=1:mh=1 +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED:mg=1:mh=1 +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS:mg=1:mh=1 +PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED:mg=1:mh=1 CPU,BGP # The following PAPI presets are accurate for all application nodes commit f42eda64e7c3cc0784b3ce8b8a71f88647a61640 Author: Heike McCraw Date: Thu Jun 25 15:05:53 2015 -0400 Added definitions to Power8 for PAPI_SP_OPS, PAPI_DP_OPS. diff --git a/src/papi_events.csv b/src/papi_events.csv index 74da53c..40c562b 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1378,6 +1378,8 @@ PRESET,PAPI_TOT_INS,NOT_DERIVED,PM_INST_CMPL #n/aPRESET,PAPI_INT_INS,DERIVED_ADD,PM_FXU0_FIN,PM_FXU1_FIN PRESET,PAPI_FP_OPS,NOT_DERIVED,PM_FLOP PRESET,PAPI_FP_INS,NOT_DERIVED,PM_FLOP +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP commit 36c5b5b6b9bc90142743e4b62fa6cc8f99b3e46c Author: Vince Weaver Date: Thu Jun 25 22:20:17 2015 -0400 add broadwell predefined events For now they are the same as Haswell, as that's what the Linux kernel does. diff --git a/src/papi_events.csv b/src/papi_events.csv index 74da53c..2ffb6f2 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -631,9 +631,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS # -# Intel Haswell events (and most likely also Sandy Bridge) +# Intel Haswell events +# Using also for Broadwell events, this is what the Linux kernel does CPU,hsw CPU,hsw_ep +CPU,bdw PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK @@ -692,7 +694,6 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO -PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD # TLB PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK @@ -714,6 +715,13 @@ PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES + +CPU,hsw +CPU,hsw_ep +PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM +CPU,bdw +PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + # End of hsw list # CPU,Intel Core2 commit 71dcdb92c477bf5d2f419c03f94783098a991214 Merge: 36c5b5b f42eda6 Author: Vince Weaver Date: Thu Jun 25 22:21:06 2015 -0400 Merge branch 'master' of https://icl.cs.utk.edu/git/papi commit 0829a4f51b3de92de72f6c6185b99ece15e20254 Author: Vince Weaver Date: Fri Jun 26 11:41:42 2015 -0400 Add future broadwell-ep support. libpfm4 doesn't support it yet, but add it for when it appears. diff --git a/src/papi_events.csv b/src/papi_events.csv index ca556c9..c38a892 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -636,6 +636,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL CPU,hsw CPU,hsw_ep CPU,bdw +CPU,bdw_ep PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK @@ -720,6 +721,7 @@ CPU,hsw CPU,hsw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM CPU,bdw +CPU,bdw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM # End of hsw list commit a10e8331ced0173ead9982c3f78c2e5238b04d66 Author: Vince Weaver Date: Wed Oct 21 08:58:20 2015 -0400 papi_events: add Intel Skylake presets This just shares all of teh broadwell events with skylake. Some quick tests show that this probably works. Someone with skylake hardware should validate this at some point. diff --git a/src/papi_events.csv b/src/papi_events.csv index c38a892..2865560 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -637,6 +637,7 @@ CPU,hsw CPU,hsw_ep CPU,bdw CPU,bdw_ep +CPU,skl PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK commit db9c70f517aae119145ef7ec5cded597b70b0437 Author: Heike McCraw Date: Fri Jun 17 18:11:07 2016 -0400 Added FP (SP, DP) presets for Skylake. Corrected L1_LDM|STM, L2_DCW|TCW, PRF_DM, STL_ICY presets for Skylake. diff --git a/src/papi_events.csv b/src/papi_events.csv index 2865560..114149d 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -651,8 +651,6 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_ PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD # Added by FMB PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT -PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD -PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD # L2 cache PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES @@ -667,7 +665,6 @@ PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD #PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS # Added by FMB PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS -PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD #PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS @@ -675,7 +672,6 @@ PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD -PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO # L3 cache #PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE #PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT @@ -700,12 +696,9 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD # TLB PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK -# Prefetcher -PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS # Stalls PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY -PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1 PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4 @@ -720,13 +713,41 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES CPU,hsw CPU,hsw_ep +CPU,bdw +CPU,bdw_ep +PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD +PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO +PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO +PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS +PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY + +CPU,hsw +CPU,hsw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM CPU,bdw CPU,bdw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM -# End of hsw list +CPU,skl +# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE +# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE +PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE +PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE + +PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD +PRESET,PAPI_L1_STM,NOT_DERIVED,L2_RQSTS:ALL_RFO +PRESET,PAPI_L2_DCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT +PRESET,PAPI_L2_TCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT +PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:PF_MISS +PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE + +# End of hsw,bdw,skl list # + + CPU,Intel Core2 CPU,Intel Core CPU,core commit 1c64bfc0d4aa17aa36b8ab542c841203518e6df7 Author: Heike Jagode Date: Thu Jun 23 15:26:26 2016 -0400 Added FP (SP, DP) presets for Broadwell. NOT TESTED yet due to lack of access to bdw hardware diff --git a/src/papi_events.csv b/src/papi_events.csv index 114149d..5c04442 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -725,10 +725,18 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY CPU,hsw CPU,hsw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + CPU,bdw CPU,bdw_ep PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM +# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE +# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE +PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE +PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE + CPU,skl # PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE commit e9144b9bda355874a1cefd45285578f6c825cc31 Author: Heike Jagode Date: Thu Aug 18 16:34:54 2016 -0400 Added preset definitions for KNL. diff --git a/src/papi_events.csv b/src/papi_events.csv index 2d2eca0..e3e80a4 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -754,7 +754,42 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE # End of hsw,bdw,skl list # - +# +# Intel MIC / Xeon-Phi / Knights Landing +# +CPU,knl +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES +PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES +PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES +PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES +PRESET,PAPI_L1_ICH,NOT_DERIVED,ICACHE:HIT +# +PRESET,PAPI_L1_DCA,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST +PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS +PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_UOPS_RETIRED:LD_DCU_MISS,ICACHE:MISSES +PRESET,PAPI_L1_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS +# +PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES +PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES +PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES +PRESET,PAPI_L2_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_L2_MISS +PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_LD +PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_ST +PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST +# +PRESET,PAPI_TLB_DM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_UTLB_MISS +# +PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED +PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC +PRESET,PAPI_BR_UCN,DERIVED_SUB,BRANCH_INSTRUCTIONS_RETIRED,BR_INST_RETIRED:JCC +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:TAKEN_JCC +PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED:JCC,BR_INST_RETIRED:TAKEN_JCC +# +PRESET,PAPI_RES_STL,NOT_DERIVED,RS_FULL_STALL:ANY +PRESET,PAPI_STL_ICY,NOT_DERIVED,NO_ALLOC_CYCLES:ANY +# CPU,Intel Core2 CPU,Intel Core