From 7c630d9efb5105e3e665fe2d156044fa36904de7 Mon Sep 17 00:00:00 2001 From: CentOS Buildsys Date: Jan 26 2014 12:52:22 +0000 Subject: import libpfm-4.4.0-6.el7.src.rpm --- diff --git a/SOURCES/libpfm-events.patch b/SOURCES/libpfm-events.patch new file mode 100644 index 0000000..e1e5cce --- /dev/null +++ b/SOURCES/libpfm-events.patch @@ -0,0 +1,4957 @@ +From 1f169c82d7e788f3a7096b212fa33d26c8155a85 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 19 Jun 2013 08:42:20 +0200 +Subject: [PATCH 01/14] fix event name typo for CPU_IO_REQUESTS_TO_MEMORY_IO + +Reported by Steve Kaufmann. + +Signed-off-by: Stephane Eranian +--- + lib/events/amd64_events_fam15h.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h +index 7195f13..5738e4c 100644 +--- a/lib/events/amd64_events_fam15h.h ++++ b/lib/events/amd64_events_fam15h.h +@@ -2277,7 +2277,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ + .ngrp = 1, + .umasks = amd64_fam15h_thermal_status, + }, +-{ .name = "CPU_0O_REQUESTS_TO_MEMORY_IO", ++{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", + .desc = "CPU/IO Requests to Memory/IO", + .code = 0xe9, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_io_requests_to_memory_io), +-- +1.8.3.1 + +From 0f7cd3b77060def8a91218819493effe276350c8 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 1 Jul 2013 08:02:09 +0200 +Subject: [PATCH 02/14] fix event code for Intel Haswell LSD event + +Was 0xa0 instead of 0xa8 + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index bc0549e..e00291e 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -2228,7 +2228,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ + }, + { .name = "LSD", + .desc = "Loop stream detector", +- .code = 0xa0, ++ .code = 0xa8, + .cntmsk = 0xff, + .ngrp = 1, + .modmsk = INTEL_V4_ATTRS, +-- +1.8.3.1 + +From 7d74c8db594447b7235daf3a54154b9a9f17da0d Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 1 Jul 2013 08:32:37 +0200 +Subject: [PATCH 03/14] remove unsupported umask combo for HSW BR_MISP_EXEC and + BR_INST_EXEC + +Some umask combinations were not supported + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 19 ++----------------- + 1 file changed, 2 insertions(+), 17 deletions(-) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index e00291e..edcc6bc 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -63,11 +63,6 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ + .ucode = 0x9000, + .uflags = INTEL_X86_NCOMBO, + }, +- { .uname = "TAKEN_INDIRECT_NEAR_CALL", +- .udesc = "All taken indirect calls, including both register and memory indirect", +- .ucode = 0xa000, +- .uflags = INTEL_X86_NCOMBO, +- }, + { .uname = "ALL_CONDITIONAL", + .udesc = "Speculative and retired macro-conditional branches", + .ucode = 0xc100, +@@ -93,9 +88,9 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ + .ucode = 0xd000, + .uflags = INTEL_X86_NCOMBO, + }, +- { .uname = "ANY_INDIRECT_NEAR_CALL", ++ { .uname = "TAKEN_INDIRECT_NEAR_CALL", + .udesc = "All indirect calls, including both register and memory indirect", +- .ucode = 0xe000, ++ .ucode = 0xa000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ALL_BRANCHES", +@@ -174,16 +169,6 @@ static const intel_x86_umask_t hsw_br_misp_exec[]={ + .ucode = 0xc400, + .uflags = INTEL_X86_NCOMBO, + }, +- { .uname = "ANY_RETURN_NEAR", +- .udesc = "Speculative and retired mispredicted indirect branches with return mnemonic", +- .ucode = 0xc800, +- .uflags = INTEL_X86_NCOMBO, +- }, +- { .uname = "ANY_INDIRECT_NEAR_CALL", +- .udesc = "All mispredicted indirect calls, including both register and memory indirect", +- .ucode = 0xe000, +- .uflags = INTEL_X86_NCOMBO, +- }, + { .uname = "ALL_BRANCHES", + .udesc = "Speculative and retired mispredicted macro conditional branches", + .ucode = 0xff00, +-- +1.8.3.1 + +From b52f161160dc0ddb9dfcdd51e61b4a9171a293ce Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 1 Jul 2013 09:34:12 +0200 +Subject: [PATCH 04/14] add Haswell UOPS_EXECUTED:STALL_CYCLES + +Handy alias to UOPS_EXECUTED:CORE:c=1:i + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index edcc6bc..d479862 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -1101,6 +1101,13 @@ static const intel_x86_umask_t hsw_uops_executed[]={ + .ucode = 0x200, + .uflags = INTEL_X86_DFL, + }, ++ { .uname = "STALL_CYCLES", ++ .udesc = "Number of cycles with no uops executed", ++ .ucode = 0x200 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ ++ .uequiv = "CORE:c=1:i=1", ++ .uflags = INTEL_X86_NCOMBO, ++ .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, ++ }, + }; + + static const intel_x86_umask_t hsw_uops_executed_port[]={ +-- +1.8.3.1 + +From 138ec47914922851256e1275e508d94d3ecf7956 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 31 Jul 2013 16:01:05 +0200 +Subject: [PATCH 05/14] fix modmsk for Intel Haswell CYCLE_ACTIVITY event + +Was marked as V3 when it is V4 + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index d479862..ccd4a2b 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -1855,7 +1855,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ + .code = 0xa3, + .cntmsk = 0xf, + .ngrp = 1, +- .modmsk = INTEL_V3_ATTRS & ~_INTEL_X86_ATTR_C, ++ .modmsk = INTEL_V4_ATTRS & ~_INTEL_X86_ATTR_C, + .numasks = LIBPFM_ARRAY_SIZE(hsw_cycle_activity), + .umasks = hsw_cycle_activity + }, +-- +1.8.3.1 + +From a3e9c3ec4d87c0a82e5622c6421133493e7cc0a4 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sat, 10 Aug 2013 15:35:25 +0200 +Subject: [PATCH 06/14] drop umask from snbep_unc_pcu:COREx_TRANSITION_CYCLES + +Because they do not use .occ_sel bitfield. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_snbep_unc_pcu_events.h | 24 ------------------------ + 1 file changed, 24 deletions(-) + +diff --git a/lib/events/intel_snbep_unc_pcu_events.h b/lib/events/intel_snbep_unc_pcu_events.h +index dd4aa3e..10dc6b3 100644 +--- a/lib/events/intel_snbep_unc_pcu_events.h ++++ b/lib/events/intel_snbep_unc_pcu_events.h +@@ -72,73 +72,49 @@ static const intel_x86_entry_t intel_snbep_unc_p_pe[]={ + .desc = "Core C State Transition Cycles", + .code = 0x3 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE1_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x4 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE2_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x5 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE3_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x6 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE4_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x7 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE5_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x8 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE6_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0x9 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_CORE7_TRANSITION_CYCLES", + .desc = "Core C State Transition Cycles", + .code = 0xa | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, +- .ngrp = 1, + .modmsk = SNBEP_UNC_PCU_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), +- .umasks = snbep_unc_p_occupancy_counters + }, + { .name = "UNC_P_DEMOTIONS_CORE0", + .desc = "Core C State Demotions", +-- +1.8.3.1 + +From 7af6bc46302812d29cfbc23d24430d31f09049da Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 4 Nov 2013 19:07:23 +0100 +Subject: [PATCH 07/14] add missing events/umasks for Intel Ivy Bridge + +ivb::FP_COMP_OPS_EXE:X87 +ivb::FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE +ivb::FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +ivb::FP_COMP_OPS_EXE:SSE_PACKED_SINGLE +ivb::SIMD_FP_256:PACKED_SINGLE +ivb::SIMD_FP_256:PACKED_DOUBLE +ivb::LSD:UOPS +ivb::UOPS_EXECUTED:THREAD +ivb::ICACHE:IFETCH_STALLS +ivb::LD_BLOCKS:NO_SR +ivb::OTHER_ASSISTS:WB + +Added aliases: +ivb::DTLB_LOAD_ACCESS -> TLB_ACCESS +ivb::LONGEST_LAT_CACHE -> L3_LAT_CACHE + +Thanks to Vince Weaver for spotting those new updates in +the Sep 2013 SDM Vol3b edition. + +Also added a few more IVB validation tests. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_ivb_events.h | 132 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 127 insertions(+), 5 deletions(-) + +diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h +index e473756..3c5583e 100644 +--- a/lib/events/intel_ivb_events.h ++++ b/lib/events/intel_ivb_events.h +@@ -406,7 +406,12 @@ static const intel_x86_umask_t ivb_icache[]={ + { .uname = "MISSES", + .udesc = "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses", + .ucode = 0x200, +- .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IFETCH_STALL", ++ .udesc = "Number of cycles wher a code-fetch stalled due to L1 instruction cache miss or iTLB miss", ++ .ucode = 0x400, ++ .uflags= INTEL_X86_NCOMBO, + }, + }; + +@@ -853,7 +858,12 @@ static const intel_x86_umask_t ivb_ld_blocks[]={ + { .uname = "STORE_FORWARD", + .udesc = "Loads blocked by overlapping with store buffer that cannot be forwarded", + .ucode = 0x200, +- .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "NO_SR", ++ .udesc = "Number of times that split load operations are temporarily blocked because all resources for handlding the split accesses are in use", ++ .ucode = 0x800, ++ .uflags= INTEL_X86_NCOMBO, + }, + }; + +@@ -1167,6 +1177,11 @@ static const intel_x86_umask_t ivb_other_assists[]={ + .ucode = 0x0800, + .uflags= INTEL_X86_NCOMBO, + }, ++ { .uname = "WB", ++ .udesc = "Number of times the microcode assist is invoked by hardware upon uop writeback", ++ .ucode = 0x8000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, + }; + + static const intel_x86_umask_t ivb_resource_stalls[]={ +@@ -1206,11 +1221,17 @@ static const intel_x86_umask_t ivb_rs_events[]={ + }; + + static const intel_x86_umask_t ivb_tlb_access[]={ +- { .uname = "LOAD_STLB_HIT", +- .udesc = "Number of load operations that missed L1TLN but hit L2TLB", ++ { .uname = "STLB_HIT", ++ .udesc = "Number of load operations that missed L1TLB but hit L2TLB", + .ucode = 0x400, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, ++ { .uname = "LOAD_STLB_HIT", ++ .udesc = "Number of load operations that missed L1TLB but hit L2TLB", ++ .ucode = 0x400, ++ .uequiv= "STLB_HIT", ++ .uflags= INTEL_X86_NCOMBO, ++ }, + }; + + static const intel_x86_umask_t ivb_tlb_flush[]={ +@@ -1230,7 +1251,12 @@ static const intel_x86_umask_t ivb_uops_executed[]={ + { .uname = "CORE", + .udesc = "Counts total number of uops executed from any thread per cycle", + .ucode = 0x200, +- .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "THREAD", ++ .udesc = "Counts total number of uops executed per thread each cycle", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, + }, + }; + +@@ -1577,6 +1603,55 @@ static const intel_x86_umask_t ivb_cycle_activity[]={ + }, + }; + ++static const intel_x86_umask_t ivb_fp_comp_ops_exe[]={ ++ { .uname = "X87", ++ .udesc = "Number of X87 uops executed", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SSE_FP_PACKED_DOUBLE", ++ .udesc = "Number of SSE or AVX-128 double precision FP packed uops executed", ++ .ucode = 0x1000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SSE_FP_SCALAR_SINGLE", ++ .udesc = "Number of SSE or AVX-128 single precision FP scalar uops executed", ++ .ucode = 0x2000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SSE_PACKED_SINGLE", ++ .udesc = "Number of SSE or AVX-128 single precision FP packed uops executed", ++ .ucode = 0x4000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SSE_SCALAR_DOUBLE", ++ .udesc = "Number of SSE or AVX-128 double precision FP scalar uops executed", ++ .ucode = 0x8000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t ivb_simd_fp_256[]={ ++ { .uname = "PACKED_SINGLE", ++ .udesc = "Counts 256-bit packed single-precision", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PACKED_DOUBLE", ++ .udesc = "Counts 256-bit packed double-precision", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t ivb_lsd[]={ ++ { .uname = "UOPS", ++ .udesc = "Number of uops delivered by the Loop Stream Detector (LSD)", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_entry_t intel_ivb_pe[]={ + { .name = "ARITH", + .desc = "Counts arithmetic multiply operations", +@@ -1942,6 +2017,16 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .ngrp = 1, + .umasks = ivb_l3_lat_cache, + }, ++{ .name = "LONGEST_LAT_CACHE", ++ .desc = "Core-originated cacheable demand requests to L3", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xff, ++ .code = 0x2e, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_l3_lat_cache), ++ .ngrp = 1, ++ .equiv = "L3_LAT_CACHE", ++ .umasks = ivb_l3_lat_cache, ++}, + { .name = "MACHINE_CLEARS", + .desc = "Machine clear asserted", + .modmsk = INTEL_V3_ATTRS, +@@ -2107,6 +2192,15 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .ngrp = 1, + .umasks = ivb_rs_events, + }, ++{ .name = "DTLB_LOAD_ACCESS", ++ .desc = "TLB access", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xff, ++ .code = 0x5f, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_tlb_access), ++ .ngrp = 1, ++ .umasks = ivb_tlb_access, ++}, + { .name = "TLB_ACCESS", + .desc = "TLB access", + .modmsk = INTEL_V3_ATTRS, +@@ -2114,6 +2208,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .code = 0x5f, + .numasks = LIBPFM_ARRAY_SIZE(ivb_tlb_access), + .ngrp = 1, ++ .equiv = "DTLB_LOAD_ACCESS", + .umasks = ivb_tlb_access, + }, + { .name = "TLB_FLUSH", +@@ -2175,6 +2270,33 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .ngrp = 1, + .umasks = ivb_uops_retired, + }, ++{ .name = "FP_COMP_OPS_EXE", ++ .desc = "Counts number of floating point events", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xff, ++ .code = 0x10, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_fp_comp_ops_exe), ++ .ngrp = 1, ++ .umasks = ivb_fp_comp_ops_exe, ++}, ++{ .name = "SIMD_FP_256", ++ .desc = "Counts 256-bit packed floating point instructions", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xff, ++ .code = 0x11, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_simd_fp_256), ++ .ngrp = 1, ++ .umasks = ivb_simd_fp_256, ++}, ++{ .name = "LSD", ++ .desc = "Loop stream detector", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xff, ++ .code = 0xa8, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_lsd), ++ .ngrp = 1, ++ .umasks = ivb_lsd, ++}, + { .name = "OFFCORE_RESPONSE_0", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", + .modmsk = INTEL_V3_ATTRS, +-- +1.8.3.1 + +From fe3e6e865e98cd7f1743a26896e777873ae8b682 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 11 Nov 2013 17:58:19 +0100 +Subject: [PATCH 08/14] add missing Core select umasks to AMD Fam15h event + table + +As documented in BKDG Fam15 rev 3.08. + +As per encoding, the core_select umasks cannot be combined. +User has to select either ANY_CORE or one specific core, e.g., +CORE_1. Default is ANY_CORE for all relevant events. + +Patch adds corresponding tests to validation test suite. + +Signed-off-by: Stephane Eranian +--- + lib/events/amd64_events_fam15h.h | 84 ++++++++++++++++++++++++++++++++++------ + 1 file changed, 72 insertions(+), 12 deletions(-) + +diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h +index 5738e4c..ac2b111 100644 +--- a/lib/events/amd64_events_fam15h.h ++++ b/lib/events/amd64_events_fam15h.h +@@ -40,6 +40,62 @@ + * Processors, Rev 0.90, May 18, 2010 + */ + ++#define CORE_SELECT(b) \ ++ { .uname = "CORE_0",\ ++ .udesc = "Measure on Core0",\ ++ .ucode = 0 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_1",\ ++ .udesc = "Measure on Core1",\ ++ .ucode = 1 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_2",\ ++ .udesc = "Measure on Core2",\ ++ .ucode = 2 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_3",\ ++ .udesc = "Measure on Core3",\ ++ .ucode = 3 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_4",\ ++ .udesc = "Measure on Core4",\ ++ .ucode = 4 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_5",\ ++ .udesc = "Measure on Core5",\ ++ .ucode = 5 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_6",\ ++ .udesc = "Measure on Core6",\ ++ .ucode = 6 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_7",\ ++ .udesc = "Measure on Core7",\ ++ .ucode = 7 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "ANY_CORE",\ ++ .udesc = "Measure on any core",\ ++ .ucode = 0xf << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL,\ ++ } ++ + static const amd64_umask_t amd64_fam15h_dispatched_fpu_ops[]={ + { .uname = "OPS_PIPE0", + .udesc = "Total number uops assigned to Pipe 0", +@@ -1639,20 +1695,30 @@ static const amd64_umask_t amd64_fam15h_read_request_to_l3_cache[]={ + { .uname = "READ_BLOCK_EXCLUSIVE", + .udesc = "Read Block Exclusive (Data cache read)", + .ucode = 0x1, ++ .grpid = 0, + }, + { .uname = "READ_BLOCK_SHARED", + .udesc = "Read Block Shared (Instruction cache read)", + .ucode = 0x2, ++ .grpid = 0, + }, + { .uname = "READ_BLOCK_MODIFY", + .udesc = "Read Block Modify", + .ucode = 0x4, ++ .grpid = 0, + }, +- { .uname = "ALL", +- .udesc = "All sub-events selected", ++ { .uname = "PREFETCH", ++ .udesc = "Count prefetches honly", ++ .ucode = 0x8, ++ .grpid = 0, ++ }, ++ { .uname = "READ_BLOCK_ANY", ++ .udesc = "Count any read request", + .ucode = 0x7, +- .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ .grpid = 0, ++ .uflags= AMD64_FL_DFL | AMD64_FL_NCOMBO, + }, ++ CORE_SELECT(1), + }; + + static const amd64_umask_t amd64_fam15h_l3_fills_caused_by_l2_evictions[]={ +@@ -1682,13 +1748,7 @@ static const amd64_umask_t amd64_fam15h_l3_fills_caused_by_l2_evictions[]={ + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + .grpid = 0, + }, +- { .uname = "ALL_CORES", +- .udesc = "All core", +- .ucode = 0xf0, +- .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, +- .grpid = 1, +- }, +- ++ CORE_SELECT(1), + }; + + static const amd64_umask_t amd64_fam15h_l3_evictions[]={ +@@ -2421,7 +2481,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ + .desc = "Read Request to L3 Cache", + .code = 0x4e0, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), +- .ngrp = 1, ++ .ngrp = 2, + .umasks = amd64_fam15h_read_request_to_l3_cache, + }, + { .name = "L3_CACHE_MISSES", +@@ -2449,7 +2509,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ + .desc = "Non-canceled L3 Read Requests", + .code = 0x4ed, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), +- .ngrp = 1, ++ .ngrp = 2, + .umasks = amd64_fam15h_read_request_to_l3_cache, + }, + { .name = "L3_LATENCY", +-- +1.8.3.1 + +From 59bc6b3c586561137d9c1172a34deac3f8887999 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Thu, 28 Nov 2013 10:39:45 +0100 +Subject: [PATCH 09/14] fix typos in event description for Cortex A9 and P4 + +Event description typos fixes. + +Signed-off-by: Andreas Beckmann +--- + lib/events/arm_cortex_a8_events.h | 2 +- + lib/events/arm_cortex_a9_events.h | 2 +- + lib/events/intel_netburst_events.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/events/arm_cortex_a8_events.h b/lib/events/arm_cortex_a8_events.h +index 467813e..2b61dda 100644 +--- a/lib/events/arm_cortex_a8_events.h ++++ b/lib/events/arm_cortex_a8_events.h +@@ -82,7 +82,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ + }, + {.name = "PC_WRITE", + .code = 0x0c, +- .desc = "Software change of PC. Equivelant to branches" ++ .desc = "Software change of PC. Equivalent to branches" + }, + {.name = "PC_IMM_BRANCH", + .code = 0x0d, +diff --git a/lib/events/arm_cortex_a9_events.h b/lib/events/arm_cortex_a9_events.h +index 4f56fac..c034bd3 100644 +--- a/lib/events/arm_cortex_a9_events.h ++++ b/lib/events/arm_cortex_a9_events.h +@@ -82,7 +82,7 @@ static const arm_entry_t arm_cortex_a9_pe []={ + }, + {.name = "PC_WRITE", + .code = 0x0c, +- .desc = "Software change of PC. Equivelant to branches" ++ .desc = "Software change of PC. Equivalent to branches" + }, + {.name = "PC_IMM_BRANCH", + .code = 0x0d, +diff --git a/lib/events/intel_netburst_events.h b/lib/events/intel_netburst_events.h +index 73f58da..e24f22f 100644 +--- a/lib/events/intel_netburst_events.h ++++ b/lib/events/intel_netburst_events.h +@@ -396,7 +396,7 @@ static const netburst_entry_t netburst_events[] = { + .desc = "Number of entries (clipped at 15) in the IOQ that are " + "active. An allocated entry can be a sector (64 bytes) " + "or a chunk of 8 bytes. This event must be programmed in " +- "conjuction with IOQ_allocation. All 'TYPE_BIT*' event-masks " ++ "conjunction with IOQ_allocation. All 'TYPE_BIT*' event-masks " + "together are treated as a single 5-bit value", + .event_select = 0x1A, + .escr_select = 0x6, +-- +1.8.3.1 + +From e4ced34f7558780ebb204f4fede45f9eeebfacb7 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sun, 22 Sep 2013 22:16:12 +0200 +Subject: [PATCH 10/14] add Intel Silvermont core PMU support + +Add support for Intel Silvermont Atom processors. + +Note that there is not support for the Average Latency measurement +in this patch. + +Based on the Software Optimization Guide, Chapter 15. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_slm_events.h | 896 ++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 896 insertions(+) + create mode 100644 lib/events/intel_slm_events.h + +diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h +new file mode 100644 +index 0000000..c540e64 +--- /dev/null ++++ b/lib/events/intel_slm_events.h +@@ -0,0 +1,896 @@ ++/* ++ * Copyright (c) 2013 Google, Inc ++ * Contributed by Stephane Eranian ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: slm (Intel Silvermont) ++ */ ++ ++static const intel_x86_umask_t slm_icache[]={ ++ { .uname = "ACCESSES", ++ .udesc = "Instruction fetches, including uncacheacble fetches", ++ .ucode = 0x300, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "MISSES", ++ .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "HIT", ++ .udesc = "Count all instructions fetches from the instruction cache", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_uops_retired[]={ ++ { .uname = "ANY", ++ .udesc = "Micro-ops retired", ++ .ucode = 0x1000, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "MS", ++ .udesc = "Micro-ops retired that were supplied fro MSROM", ++ .ucode = 0x0100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "X87", ++ .udesc = "Micro-ops retired that used X87 hardware", ++ .ucode = 0x0200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "MUL", ++ .udesc = "Micro-ops retired that used MUL hardware", ++ .ucode = 0x0400, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "DIV", ++ .udesc = "Micro-ops retired that used DIV hardware", ++ .ucode = 0x0800, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "STALLED_CYCLES", ++ .udesc = "Cycles no micro-ops retired", ++ .ucode = 0x1000 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "STALLS", ++ .udesc = "Periods no micro-ops retired", ++ .ucode = 0x1000 | INTEL_X86_MOD_EDGE | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_inst_retired[]={ ++ { .uname = "ANY_P", ++ .udesc = "Instructions retired using generic counter (precise event)", ++ .ucode = 0x0, ++ .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, ++ }, ++ { .uname = "ANY", ++ .udesc = "Instructions retired using generic counter (precise event)", ++ .uequiv = "ANY_P", ++ .ucode = 0x0, ++ .uflags= INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_l2_reject_xq[]={ ++ { .uname = "ALL", ++ .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indictes back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", ++ .ucode = 0x000, ++ .uflags= INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_machine_clears[]={ ++ { .uname = "SMC", ++ .udesc = "Self-Modifying Code detected", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_DFL, ++ }, ++ { .uname = "MEMORY_ORDERING", ++ .udesc = "Number of stalled cycles due to memory ordering", ++ .ucode = 0x200, ++ }, ++ { .uname = "FP_ASSIST", ++ .udesc = "Number of stalled cycle due to FPU assist", ++ .ucode = 0x400, ++ }, ++ { .uname = "ALL", ++ .udesc = "Count any the machine clears", ++ .ucode = 0x800, ++ }, ++ { .uname = "ANY", ++ .udesc = "Count any the machine clears", ++ .uequiv = "ALL", ++ .ucode = 0x800, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_br_inst_retired[]={ ++ { .uname = "ANY", ++ .udesc = "Any retired branch instruction (Precise Event)", ++ .ucode = 0x0, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, ++ }, ++ { .uname = "ALL_BRANCHES", ++ .udesc = "Any Retired branch instruction (Precise Event)", ++ .uequiv = "ANY", ++ .ucode = 0x0, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "JCC", ++ .udesc = "JCC instructions retired (Precise Event)", ++ .ucode = 0x7e00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Taken JCC instructions retired (Precise Event)", ++ .ucode = 0xfe00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "CALL", ++ .udesc = "Near call instructions retired (Precise Event)", ++ .ucode = 0xf900, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REL_CALL", ++ .udesc = "Near relative call instructions retired (Precise Event)", ++ .ucode = 0xfd00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Near indirect call instructions retired (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Near ret instructions retired (Precise Event)", ++ .ucode = 0xc00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Number of near indirect jmp and near indirect call instructions retired (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR_BRANCH", ++ .udesc = "Far branch instructions retired (Precise Event)", ++ .uequiv = "FAR", ++ .ucode = 0xbf00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR", ++ .udesc = "Far branch instructions retired (Precise Event)", ++ .ucode = 0xbf00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_baclears[]={ ++ { .uname = "ANY", ++ .udesc = "BACLEARS asserted", ++ .uequiv = "ALL", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ALL", ++ .udesc = "BACLEARS asserted", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_DFL | INTEL_X86_NCOMBO, ++ }, ++ { .uname = "INDIRECT", ++ .udesc = "Number of baclears for indirect branches", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "UNCOND", ++ .udesc = "Number of baclears for unconditional branches", ++ .ucode = 0x400, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Number of baclears for return branches", ++ .ucode = 0x800, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "COND", ++ .udesc = "Number of baclears for conditional branches", ++ .ucode = 0x1000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_cpu_clk_unhalted[]={ ++ { .uname = "CORE_P", ++ .udesc = "Core cycles when core is not halted", ++ .ucode = 0x0, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "BUS", ++ .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", ++ .uequiv = "REF_P", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "REF_P", ++ .udesc = "Number of reference cycles that the core is not in a halted state. The core enters the halted state when it is running the HLT instruction. In mobile systems, the core frequency may change from time to time. This event is not affected by core frequency changes but counts as if the core is running a the same maximum frequency all the time", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_mem_uop_retired[]={ ++ { .uname = "LD_DCU_MISS", ++ .udesc = "Number of load uops retired that miss in L1 data cache. Note that prefetch misses will not be counted", ++ .ucode = 0x100, ++ }, ++ { .uname = "LD_L2_HIT", ++ .udesc = "Number of load uops retired that hit L2 (Precise Event)", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_L2_MISS", ++ .udesc = "Number of load uops retired that missed L2 (Precise Event)", ++ .ucode = 0x400, ++ .uflags= INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_DTLB_MISS", ++ .udesc = "Number of load uops retired that had a DTLB miss (Precise Event)", ++ .ucode = 0x800, ++ .uflags= INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_UTLB_MISS", ++ .udesc = "Number of load uops retired that had a UTLB miss", ++ .ucode = 0x1000, ++ }, ++ { .uname = "HITM", ++ .udesc = "Number of load uops retired that got data from the other core or from the other module and the line was modified (Precise Event)", ++ .ucode = 0x2000, ++ .uflags= INTEL_X86_PEBS, ++ }, ++ { .uname = "ANY_LD", ++ .udesc = "Number of load uops retired", ++ .ucode = 0x4000, ++ }, ++ { .uname = "ANY_ST", ++ .udesc = "Number of store uops retired", ++ .ucode = 0x8000, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_page_walks[]={ ++ { .uname = "D_SIDE_CYCLES", ++ .udesc = "Number of cycles when a D-side page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks", ++ .ucode = 0x100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "D_SIDE_WALKS", ++ .udesc = "Number of D-side page walks", ++ .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "I_SIDE_CYCLES", ++ .udesc = "Number of cycles when a I-side page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks", ++ .ucode = 0x200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "I_SIDE_WALKS", ++ .udesc = "Number of I-side page walks", ++ .ucode = 0x200 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_llc_rqsts[]={ ++ { .uname = "MISS", ++ .udesc = "Number of L2 cache misses", ++ .ucode = 0x4100, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY", ++ .udesc = "Number of L2 cache references", ++ .ucode = 0x4f00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_rehabq[]={ ++ { .uname = "LD_BLOCK_ST_FORWARD", ++ .udesc = "Number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch (Precise Event)", ++ .ucode = 0x0100, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_BLOCK_STD_NOTREADY", ++ .udesc = "Number of times forward was technically possible but did not occur because the store data was not available at the right time", ++ .ucode = 0x0200, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ST_SPLITS", ++ .udesc = "Number of retired stores that experienced cache line boundary splits", ++ .ucode = 0x0400, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "LD_SPLITS", ++ .udesc = "Number of retired loads that experienced cache line boundary splits (Precise Event)", ++ .ucode = 0x0800, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "LOCK", ++ .udesc = "Number of retired memory operations with lock semantics. These are either implicit locked instructions such as XCHG or instructions with an explicit LOCK prefix", ++ .ucode = 0x1000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "STA_FULL", ++ .udesc = "Number of retired stores that are delayed becuase there is not a store address buffer available", ++ .ucode = 0x2000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY_LD", ++ .udesc = "Number of load uops reissued from RehabQ", ++ .ucode = 0x4000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY_ST", ++ .udesc = "Number of store uops reissued from RehabQ", ++ .ucode = 0x8000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_offcore_response[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_RFO", ++ .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_IFETCH", ++ .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "WB", ++ .udesc = "Request: number of writebacks (modified to exclusive) transactions", ++ .ucode = 1ULL << (3 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L2_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", ++ .ucode = 1ULL << (4 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_RFO", ++ .udesc = "Request: number of RFO requests generated by L2 prefetchers", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_IFETCH", ++ .udesc = "Request: number of code reads generated by L2 prefetchers", ++ .ucode = 1ULL << (6 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_READ", ++ .udesc = "Request: number of demand reads of partial cachelines (including UC, WC)", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_WRITE", ++ .udesc = "Request: number of demand RFO requests to write to partial cache lines (includes UC, WT, WP)", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "UC_IFETCH", ++ .udesc = "Request: number of UC instruction fetches", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Request: number bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "STRM_ST", ++ .udesc = "Request: number of streaming store requests", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "SW_PREFETCH", ++ .udesc = "Request: number of software prefetch requests", ++ .ucode = 1ULL << (12 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L1_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L1 prefetchers", ++ .ucode = 1ULL << (13 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_STRM_ST", ++ .udesc = "Request: number of partial streaming store requests", ++ .ucode = 1ULL << (14 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "OTHER", ++ .udesc = "Request: counts one any other request that crosses IDI, including I/O", ++ .ucode = 1ULL << (15+8), ++ .grpid = 0, ++ }, ++ { .uname = "ANY_IFETCH", ++ .udesc = "Request: combination of PF_IFETCH | DMND_IFETCH | UC_IFETCH", ++ .uequiv = "PF_IFETCH:DMND_IFETCH:UC_IFETCH", ++ .ucode = (1ULL << 6 | 1ULL << 2 | 1ULL << 9) << 8, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Request: combination of all request umasks", ++ .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_L2_DATA_RD:PF_RFO:PF_IFETCH:PARTIAL_READ:PARTIAL_WRITE:UC_IFETCH:BUS_LOCKS:STRM_ST:SW_PREFETCH:PF_L1_DATA_RD:PARTIAL_STRM_ST:OTHER", ++ .ucode = 0xffff00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_DATA", ++ .udesc = "Request: combination of DMND_DATA | PF_L1_DATA_RD | PF_L2_DATA_RD", ++ .uequiv = "DMND_DATA_RD:PF_L1_DATA_RD:PF_L2_DATA_RD", ++ .ucode = (1ULL << 0 | 1ULL << 4 | 1ULL << 13) << 8, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Request: combination of DMND_RFO | PF_RFO", ++ .uequiv = "DMND_RFO:PF_RFO", ++ .ucode = (1ULL << 1 | 1ULL << 5) << 8, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Response: count any response type", ++ .ucode = 1ULL << (16+8), ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT", ++ .udesc = "Supplier: counts L2 hits in M/E/S states", ++ .ucode = 1ULL << (18+8), ++ .grpid = 1, ++ }, ++ { .uname = "SNP_NONE", ++ .udesc = "Snoop: counts number of times no snoop-related information is available", ++ .ucode = 1ULL << (31+8), ++ .grpid = 2, ++ }, ++ { .uname = "SNP_MISS", ++ .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", ++ .ucode = 1ULL << (33+8), ++ .grpid = 2, ++ }, ++ { .uname = "SNP_HIT", ++ .udesc = "Snoop: counts number of times a snoop hits in the other module where no modified copies were found in the L1 cache of the other core", ++ .ucode = 1ULL << (34+8), ++ .grpid = 2, ++ }, ++ { .uname = "SNP_HITM", ++ .udesc = "Snoop: counts number of times a snoop hits in the other module where modified copies were found in the L1 cache of the other core", ++ .ucode = 1ULL << (36+8), ++ .grpid = 2, ++ }, ++ { .uname = "NON_DRAM", ++ .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", ++ .ucode = 1ULL << (37+8), ++ .grpid = 2, ++ }, ++ { .uname = "SNP_ANY", ++ .udesc = "Snoop: any snoop reason", ++ .ucode = 0x7dULL << (31+8), ++ .uequiv = "SNP_NONE:SNP_MISS:SNP_HIT:SNP_HITM:NON_DRAM", ++ .uflags= INTEL_X86_DFL, ++ .grpid = 2, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_br_misp_inst_retired[]={ ++ { .uname = "ALL_BRANCHES", ++ .udesc = "All mispredicted branches (Precise Event)", ++ .uequiv = "ANY", ++ .ucode = 0x0000, /* architectural encoding */ ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "ANY", ++ .udesc = "All mispredicted branches (Precise Event)", ++ .ucode = 0x0000, /* architectural encoding */ ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ }, ++ { .uname = "JCC", ++ .udesc = "Number of mispredicted conditional branch instructions retired (Precise Event)", ++ .ucode = 0x7e00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Number of mispredicted non-return branch instructions retired (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR", ++ .udesc = "Number of mispredicted far branch instructions retired (Precise Event)", ++ .ucode = 0xbf00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Number of mispredicted return branch instructions retired (Precise Event)", ++ .ucode = 0xf700, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "CALL", ++ .udesc = "Number of mispredicted call branch instructions retired (Precise Event)", ++ .ucode = 0xf900, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Number of mispredicted indirect call branch instructions retired (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REL_CALL", ++ .udesc = "Number of mispredicted relative call branch instructions retired (Precise Event)", ++ .ucode = 0xfd00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Number of mispredicted taken conditional branch instructions retired (Precise Event)", ++ .ucode = 0xfe00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_no_alloc_cycles[]={ ++ { .uname = "ANY", ++ .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated for any reason", ++ .ucode = 0x3f00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "NOT_DELIVERED", ++ .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated but the back-end is not stalled", ++ .ucode = 0x5000, ++ .uflags= INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_rs_full_stall[]={ ++ { .uname = "MEC", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for the MEC cluster is full", ++ .ucode = 0x0100, ++ }, ++ { .uname = "IEC_PORT0", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 0 integer cluster is full", ++ .ucode = 0x0200, ++ }, ++ { .uname = "IEC_PORT1", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 1 integer cluster is full", ++ .ucode = 0x0400, ++ }, ++ { .uname = "FPC_PORT0", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 0 floating-pointer cluster is full", ++ .ucode = 0x0800, ++ }, ++ { .uname = "FPC_PORT1", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 1 floating-pointer cluster is full", ++ .ucode = 0x1000, ++ }, ++ { .uname = "ANY", ++ .udesc = "Number of cycles when the allocation pipeline is stalled due any one of the RS being full", ++ .ucode = 0x1f00, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_cycles_div_busy[]={ ++ { .uname = "ANY", ++ .udesc = "Number of cycles the divider is busy", ++ .ucode = 0x0100, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_ms_decoded[]={ ++ { .uname = "ENTRY", ++ .udesc = "Number of times the MSROM starts a flow of uops", ++ .ucode = 0x0100, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t slm_decode_restriction[]={ ++ { .uname = "PREDECODE_WRONG", ++ .udesc = "Number of times the prediction (from the predecode cache) for intruction length is incorrect", ++ .ucode = 0x0100, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_slm_pe[]={ ++{ .name = "UNHALTED_CORE_CYCLES", ++ .desc = "Unhalted core cycles", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x200000003ull, ++ .code = 0x3c, ++}, ++{ .name = "UNHALTED_REFERENCE_CYCLES", ++ .desc = "Unhalted reference cycle", ++ .modmsk = INTEL_FIXED3_ATTRS, ++ .cntmsk = 0x400000000ull, ++ .code = 0x0300, /* pseudo encoding */ ++ .flags = INTEL_X86_FIXED, ++}, ++{ .name = "INSTRUCTION_RETIRED", ++ .desc = "Instructions retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x100000003ull, ++ .code = 0xc0, ++}, ++{ .name = "INSTRUCTIONS_RETIRED", ++ .desc = "This is an alias for INSTRUCTION_RETIRED", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "INSTRUCTION_RETIRED", ++ .cntmsk = 0x10003, ++ .code = 0xc0, ++}, ++{ .name = "LLC_REFERENCES", ++ .desc = "Last level of cache references", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x4f2e, ++}, ++{ .name = "LAST_LEVEL_CACHE_REFERENCES", ++ .desc = "This is an alias for LLC_REFERENCES", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "LLC_REFERENCES", ++ .cntmsk = 0x3, ++ .code = 0x4f2e, ++}, ++{ .name = "LLC_MISSES", ++ .desc = "Last level of cache misses", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x412e, ++}, ++{ .name = "LAST_LEVEL_CACHE_MISSES", ++ .desc = "This is an alias for LLC_MISSES", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "LLC_MISSES", ++ .cntmsk = 0x3, ++ .code = 0x412e, ++}, ++{ .name = "BRANCH_INSTRUCTIONS_RETIRED", ++ .desc = "Branch instructions retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "BR_INST_RETIRED:ANY", ++ .cntmsk = 0x3, ++ .code = 0xc4, ++}, ++{ .name = "MISPREDICTED_BRANCH_RETIRED", ++ .desc = "Mispredicted branch instruction retired", ++ .equiv = "BR_MISP_INST_RETIRED", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc5, ++ .flags= INTEL_X86_PEBS, ++}, ++/* begin model specific events */ ++{ .name = "DECODE_RESTRICTION", ++ .desc = "Instruction length prediction delay", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xe9, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_decode_restriction), ++ .umasks = slm_decode_restriction, ++}, ++{ .name = "L2_REJECT_XQ", ++ .desc = "Rejected L2 requests to XQ", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x30, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_l2_reject_xq), ++ .ngrp = 1, ++ .umasks = slm_l2_reject_xq, ++}, ++{ .name = "ICACHE", ++ .desc = "Instruction fetches", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x80, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_icache), ++ .ngrp = 1, ++ .umasks = slm_icache, ++}, ++{ .name = "UOPS_RETIRED", ++ .desc = "Micro-ops retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc2, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_uops_retired), ++ .ngrp = 1, ++ .umasks = slm_uops_retired, ++}, ++{ .name = "INST_RETIRED", ++ .desc = "Instructions retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc0, ++ .flags= INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_inst_retired), ++ .ngrp = 1, ++ .umasks = slm_inst_retired, ++}, ++{ .name = "CYCLES_DIV_BUSY", ++ .desc = "Cycles the divider is busy", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xcd, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_cycles_div_busy), ++ .ngrp = 1, ++ .umasks = slm_cycles_div_busy, ++}, ++{ .name = "RS_FULL_STALL", ++ .desc = "RS full", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xcb, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_rs_full_stall), ++ .ngrp = 1, ++ .umasks = slm_rs_full_stall, ++}, ++{ .name = "LLC_RQSTS", ++ .desc = "L2 cache requests", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x2e, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_llc_rqsts), ++ .ngrp = 1, ++ .umasks = slm_llc_rqsts, ++}, ++{ .name = "MACHINE_CLEARS", ++ .desc = "Self-Modifying Code detected", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc3, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_machine_clears), ++ .ngrp = 1, ++ .umasks = slm_machine_clears, ++}, ++{ .name = "BR_INST_RETIRED", ++ .desc = "Retired branch instructions", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc4, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_br_inst_retired), ++ .flags= INTEL_X86_PEBS, ++ .ngrp = 1, ++ .umasks = slm_br_inst_retired, ++}, ++{ .name = "BR_MISP_INST_RETIRED", ++ .desc = "Mispredicted retired branch instructions", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc5, ++ .flags= INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_br_misp_inst_retired), ++ .ngrp = 1, ++ .umasks = slm_br_misp_inst_retired, ++}, ++{ .name = "MS_DECODED", ++ .desc = "MS decoder", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xe7, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_ms_decoded), ++ .ngrp = 1, ++ .umasks = slm_ms_decoded, ++}, ++{ .name = "BACLEARS", ++ .desc = "Branch address calculator", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xe6, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_baclears), ++ .ngrp = 1, ++ .umasks = slm_baclears, ++}, ++{ .name = "NO_ALLOC_CYCLES", ++ .desc = "Front-end allocation", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xca, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_no_alloc_cycles), ++ .ngrp = 1, ++ .umasks = slm_no_alloc_cycles, ++}, ++{ .name = "CPU_CLK_UNHALTED", ++ .desc = "Core cycles when core is not halted", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x3c, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_cpu_clk_unhalted), ++ .ngrp = 1, ++ .umasks = slm_cpu_clk_unhalted, ++}, ++{ .name = "MEM_UOP_RETIRED", ++ .desc = "Retired loads micro-ops", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x4, ++ .flags= INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_mem_uop_retired), ++ .ngrp = 1, ++ .umasks = slm_mem_uop_retired, ++}, ++{ .name = "PAGE_WALKS", ++ .desc = "Number of page-walks executed", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x5, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_page_walks), ++ .ngrp = 1, ++ .umasks = slm_page_walks, ++}, ++{ .name = "CORE_REJECT", ++ .desc = "Demand and L1 prefetcher requests rejected by L2", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x31, ++}, ++{ .name = "REHABQ", ++ .desc = "Memory reference queue", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x03, ++ .flags= INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_rehabq), ++ .ngrp = 1, ++ .umasks = slm_rehabq, ++}, ++{ .name = "OFFCORE_RESPONSE_0", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x01b7, ++ .flags= INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), ++ .ngrp = 3, ++ .umasks = slm_offcore_response, ++}, ++{ .name = "OFFCORE_RESPONSE_1", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x02b7, ++ .flags= INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), ++ .ngrp = 3, ++ .umasks = slm_offcore_response, /* identical to actual umasks list for this event */ ++}, ++}; +-- +1.8.3.1 + +From 93f4b19b49ee849cd3f822e87be12238305c40ba Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sun, 1 Dec 2013 10:27:53 +0100 +Subject: [PATCH 11/14] add AMD Fam15h Norhtbridge PMU support + +This patch adds proper support for AMD Fam15h uncore PMU +(Northbridge). + +The Northbridge (NB) events were in the core PMU event +list before. But on Fam15h, the NB PMU is completely +distinct and thus must be implemented in a separate +table. + +Furthermore, the NB perf_event support is also independent +from core PMU and uses dynamic PMU registration. This patch +handles this correctly too. + +The test suite is updated to take those changes into consideration. + +Signed-off-by: Stephane Eranian +--- + lib/events/amd64_events_fam15h.h | 224 ---- + lib/events/amd64_events_fam15h_nb.h | 2022 +++++++++++++++++++++++++++++++++++ + 2 files changed, 2022 insertions(+), 224 deletions(-) + create mode 100644 lib/events/amd64_events_fam15h_nb.h + +diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h +index ac2b111..7872468 100644 +--- a/lib/events/amd64_events_fam15h.h ++++ b/lib/events/amd64_events_fam15h.h +@@ -2295,228 +2295,4 @@ static const amd64_entry_t amd64_fam15h_pe[]={ + .modmsk = AMD64_FAM15H_ATTRS, + .code = 0x1d8, + }, +-{ .name = "DRAM_ACCESSES", +- .desc = "DRAM Accesses", +- .code = 0xe0, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_dram_accesses), +- .ngrp = 1, +- .umasks = amd64_fam15h_dram_accesses, +-}, +-{ .name = "DRAM_CONTROLLER_PAGE_TABLE_OVERFLOWS", +- .desc = "DRAM Controller Page Table Overflows", +- .code = 0xe1, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_dram_controller_page_table_overflows), +- .ngrp = 1, +- .umasks = amd64_fam15h_dram_controller_page_table_overflows, +-}, +-{ .name = "MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED", +- .desc = "Memory Controller DRAM Command Slots Missed", +- .code = 0xe2, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_dram_command_slots_missed), +- .ngrp = 1, +- .umasks = amd64_fam15h_memory_controller_dram_command_slots_missed, +-}, +-{ .name = "MEMORY_CONTROLLER_TURNAROUNDS", +- .desc = "Memory Controller Turnarounds", +- .code = 0xe3, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_turnarounds), +- .ngrp = 1, +- .umasks = amd64_fam15h_memory_controller_turnarounds, +-}, +-{ .name = "MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION", +- .desc = "Memory Controller Bypass Counter Saturation", +- .code = 0xe4, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_bypass_counter_saturation), +- .ngrp = 1, +- .umasks = amd64_fam15h_memory_controller_bypass_counter_saturation, +-}, +-{ .name = "THERMAL_STATUS", +- .desc = "Thermal Status", +- .code = 0xe8, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_thermal_status), +- .ngrp = 1, +- .umasks = amd64_fam15h_thermal_status, +-}, +-{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", +- .desc = "CPU/IO Requests to Memory/IO", +- .code = 0xe9, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_io_requests_to_memory_io), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_io_requests_to_memory_io, +-}, +-{ .name = "CACHE_BLOCK_COMMANDS", +- .desc = "Cache Block Commands", +- .code = 0xea, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cache_block_commands), +- .ngrp = 1, +- .umasks = amd64_fam15h_cache_block_commands, +-}, +-{ .name = "SIZED_COMMANDS", +- .desc = "Sized Commands", +- .code = 0xeb, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_sized_commands), +- .ngrp = 1, +- .umasks = amd64_fam15h_sized_commands, +-}, +-{ .name = "PROBE_RESPONSES_AND_UPSTREAM_REQUESTS", +- .desc = "Probe Responses and Upstream Requests", +- .code = 0xec, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_probe_responses_and_upstream_requests), +- .ngrp = 1, +- .umasks = amd64_fam15h_probe_responses_and_upstream_requests, +-}, +-{ .name = "GART_EVENTS", +- .desc = "GART Events", +- .code = 0xee, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_gart_events), +- .ngrp = 1, +- .umasks = amd64_fam15h_gart_events, +-}, +-{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_0", +- .desc = "Link Transmit Bandwidth Link 0", +- .code = 0xf6, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), +- .ngrp = 2, +- .umasks = amd64_fam15h_link_transmit_bandwidth, +-}, +-{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_1", +- .desc = "Link Transmit Bandwidth Link 1", +- .code = 0xf7, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), +- .ngrp = 2, +- .umasks = amd64_fam15h_link_transmit_bandwidth, +-}, +-{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_2", +- .desc = "Link Transmit Bandwidth Link 2", +- .code = 0xf8, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), +- .ngrp = 2, +- .umasks = amd64_fam15h_link_transmit_bandwidth, +-}, +-{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_3", +- .desc = "Link Transmit Bandwidth Link 3", +- .code = 0x1f9, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), +- .ngrp = 2, +- .umasks = amd64_fam15h_link_transmit_bandwidth, +-}, +-{ .name = "CPU_TO_DRAM_REQUESTS_TO_TARGET_NODE", +- .desc = "CPU to DRAM Requests to Target Node", +- .code = 0x1e0, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_to_dram_requests_to_target_node), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_to_dram_requests_to_target_node, +-}, +-{ .name = "IO_TO_DRAM_REQUESTS_TO_TARGET_NODE", +- .desc = "IO to DRAM Requests to Target Node", +- .code = 0x1e1, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_io_to_dram_requests_to_target_node), +- .ngrp = 1, +- .umasks = amd64_fam15h_io_to_dram_requests_to_target_node, +-}, +-{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_0_3", +- .desc = "CPU Read Command Latency to Target Node 0-3", +- .code = 0x1e2, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_0_3), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_0_3, +-}, +-{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_0_3", +- .desc = "CPU Read Command Requests to Target Node 0-3", +- .code = 0x1e3, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_0_3), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_0_3, +-}, +-{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_4_7", +- .desc = "CPU Read Command Latency to Target Node 4-7", +- .code = 0x1e4, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_4_7), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_4_7, +-}, +-{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_4_7", +- .desc = "CPU Read Command Requests to Target Node 4-7", +- .code = 0x1e5, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_4_7), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_4_7, +-}, +-{ .name = "CPU_COMMAND_LATENCY_TO_TARGET_NODE", +- .desc = "CPU Command Latency to Target Node", +- .code = 0x1e6, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_command_requests_to_target_node), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_command_requests_to_target_node, +-}, +-{ .name = "CPU_REQUESTS_TO_TARGET_NODE", +- .desc = "CPU Requests to Target Node", +- .code = 0x1e7, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_command_requests_to_target_node), +- .ngrp = 1, +- .umasks = amd64_fam15h_cpu_command_requests_to_target_node, +-}, +-{ .name = "REQUEST_CACHE_STATUS_0", +- .desc = "Request Cache Status 0", +- .code = 0x1ea, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_request_cache_status_0), +- .ngrp = 1, +- .umasks = amd64_fam15h_request_cache_status_0, +-}, +-{ .name = "REQUEST_CACHE_STATUS_1", +- .desc = "Request Cache Status 1", +- .code = 0x1eb, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_request_cache_status_1), +- .ngrp = 1, +- .umasks = amd64_fam15h_request_cache_status_1, +-}, +-{ .name = "MEMORY_CONTROLLER_REQUESTS", +- .desc = "Memory Controller Requests", +- .code = 0x1f0, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_requests), +- .ngrp = 1, +- .umasks = amd64_fam15h_memory_controller_requests, +-}, +-{ .name = "READ_REQUEST_TO_L3_CACHE", +- .desc = "Read Request to L3 Cache", +- .code = 0x4e0, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), +- .ngrp = 2, +- .umasks = amd64_fam15h_read_request_to_l3_cache, +-}, +-{ .name = "L3_CACHE_MISSES", +- .desc = "L3 Cache Misses", +- .code = 0x4e1, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), +- .ngrp = 2, +- .umasks = amd64_fam15h_read_request_to_l3_cache, +-}, +-{ .name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", +- .desc = "L3 Fills caused by L2 Evictions", +- .code = 0x4e2, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_fills_caused_by_l2_evictions), +- .ngrp = 2, +- .umasks = amd64_fam15h_l3_fills_caused_by_l2_evictions, +-}, +-{ .name = "L3_EVICTIONS", +- .desc = "L3 Evictions", +- .code = 0x4e3, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_evictions), +- .ngrp = 1, +- .umasks = amd64_fam15h_l3_evictions, +-}, +-{ .name = "NON_CANCELED_L3_READ_REQUESTS", +- .desc = "Non-canceled L3 Read Requests", +- .code = 0x4ed, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), +- .ngrp = 2, +- .umasks = amd64_fam15h_read_request_to_l3_cache, +-}, +-{ .name = "L3_LATENCY", +- .desc = "L3 Latency", +- .code = 0x4ef, +- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_latency), +- .ngrp = 1, +- .umasks = amd64_fam15h_l3_latency, +-}, + }; +diff --git a/lib/events/amd64_events_fam15h_nb.h b/lib/events/amd64_events_fam15h_nb.h +new file mode 100644 +index 0000000..5969eb6 +--- /dev/null ++++ b/lib/events/amd64_events_fam15h_nb.h +@@ -0,0 +1,2022 @@ ++/* ++ * Copyright (c) 2013 Google, Inc ++ * Contributed by Stephane Eranian ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * This file has been automatically generated. ++ * ++ * PMU: amd64_fam15h_nb_nb (AMD64 Fam15h Interlagos NorthBridge) ++ * ++ * Based on libpfm patch by Robert Richter : ++ * Family 15h Microarchitecture performance monitor events ++ * ++ * History: ++ * ++ * Nov 30 2013 -- Stephane Eranian , eranian@gmail.com: ++ * Split core and Northbridge events as PMU is distinct ++ * ++ * Apr 29 2011 -- Robert Richter, robert.richter@amd.com: ++ * Source: BKDG for AMD Family 15h Models 00h-0Fh Processors, ++ * 42301, Rev 1.15, April 18, 2011 ++ * ++ * Dec 09 2010 -- Robert Richter, robert.richter@amd.com: ++ * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h ++ * Processors, Rev 0.90, May 18, 2010 ++ */ ++ ++#define CORE_SELECT(b) \ ++ { .uname = "CORE_0",\ ++ .udesc = "Measure on Core0",\ ++ .ucode = 0 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_1",\ ++ .udesc = "Measure on Core1",\ ++ .ucode = 1 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_2",\ ++ .udesc = "Measure on Core2",\ ++ .ucode = 2 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_3",\ ++ .udesc = "Measure on Core3",\ ++ .ucode = 3 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_4",\ ++ .udesc = "Measure on Core4",\ ++ .ucode = 4 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_5",\ ++ .udesc = "Measure on Core5",\ ++ .ucode = 5 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_6",\ ++ .udesc = "Measure on Core6",\ ++ .ucode = 6 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "CORE_7",\ ++ .udesc = "Measure on Core7",\ ++ .ucode = 7 << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO,\ ++ },\ ++ { .uname = "ANY_CORE",\ ++ .udesc = "Measure on any core",\ ++ .ucode = 0xf << 4,\ ++ .grpid = b,\ ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL,\ ++ } ++ ++static const amd64_umask_t amd64_fam15h_nb_dispatched_fpu_ops[]={ ++ { .uname = "OPS_PIPE0", ++ .udesc = "Total number uops assigned to Pipe 0", ++ .ucode = 0x1, ++ }, ++ { .uname = "OPS_PIPE1", ++ .udesc = "Total number uops assigned to Pipe 1", ++ .ucode = 0x2, ++ }, ++ { .uname = "OPS_PIPE2", ++ .udesc = "Total number uops assigned to Pipe 2", ++ .ucode = 0x4, ++ }, ++ { .uname = "OPS_PIPE3", ++ .udesc = "Total number uops assigned to Pipe 3", ++ .ucode = 0x8, ++ }, ++ { .uname = "OPS_DUAL_PIPE0", ++ .udesc = "Total number dual-pipe uops assigned to Pipe 0", ++ .ucode = 0x10, ++ }, ++ { .uname = "OPS_DUAL_PIPE1", ++ .udesc = "Total number dual-pipe uops assigned to Pipe 1", ++ .ucode = 0x20, ++ }, ++ { .uname = "OPS_DUAL_PIPE2", ++ .udesc = "Total number dual-pipe uops assigned to Pipe 2", ++ .ucode = 0x40, ++ }, ++ { .uname = "OPS_DUAL_PIPE3", ++ .udesc = "Total number dual-pipe uops assigned to Pipe 3", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_retired_sse_ops[]={ ++ { .uname = "SINGLE_ADD_SUB_OPS", ++ .udesc = "Single-precision add/subtract FLOPS", ++ .ucode = 0x1, ++ }, ++ { .uname = "SINGLE_MUL_OPS", ++ .udesc = "Single-precision multiply FLOPS", ++ .ucode = 0x2, ++ }, ++ { .uname = "SINGLE_DIV_OPS", ++ .udesc = "Single-precision divide/square root FLOPS", ++ .ucode = 0x4, ++ }, ++ { .uname = "SINGLE_MUL_ADD_OPS", ++ .udesc = "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", ++ .ucode = 0x8, ++ }, ++ { .uname = "DOUBLE_ADD_SUB_OPS", ++ .udesc = "Double precision add/subtract FLOPS", ++ .ucode = 0x10, ++ }, ++ { .uname = "DOUBLE_MUL_OPS", ++ .udesc = "Double precision multiply FLOPS", ++ .ucode = 0x20, ++ }, ++ { .uname = "DOUBLE_DIV_OPS", ++ .udesc = "Double precision divide/square root FLOPS", ++ .ucode = 0x40, ++ }, ++ { .uname = "DOUBLE_MUL_ADD_OPS", ++ .udesc = "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_move_scalar_optimization[]={ ++ { .uname = "SSE_MOVE_OPS", ++ .udesc = "Number of SSE Move Ops", ++ .ucode = 0x1, ++ }, ++ { .uname = "SSE_MOVE_OPS_ELIM", ++ .udesc = "Number of SSE Move Ops eliminated", ++ .ucode = 0x2, ++ }, ++ { .uname = "OPT_CAND", ++ .udesc = "Number of Ops that are candidates for optimization (Z-bit set or pass)", ++ .ucode = 0x4, ++ }, ++ { .uname = "SCALAR_OPS_OPTIMIZED", ++ .udesc = "Number of Scalar ops optimized", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_retired_serializing_ops[]={ ++ { .uname = "SSE_RETIRED", ++ .udesc = "SSE bottom-executing uops retired", ++ .ucode = 0x1, ++ }, ++ { .uname = "SSE_MISPREDICTED", ++ .udesc = "SSE control word mispredict traps due to mispredictions", ++ .ucode = 0x2, ++ }, ++ { .uname = "X87_RETIRED", ++ .udesc = "X87 bottom-executing uops retired", ++ .ucode = 0x4, ++ }, ++ { .uname = "X87_MISPREDICTED", ++ .udesc = "X87 control word mispredict traps due to mispredictions", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_segment_register_loads[]={ ++ { .uname = "ES", ++ .udesc = "ES", ++ .ucode = 0x1, ++ }, ++ { .uname = "CS", ++ .udesc = "CS", ++ .ucode = 0x2, ++ }, ++ { .uname = "SS", ++ .udesc = "SS", ++ .ucode = 0x4, ++ }, ++ { .uname = "DS", ++ .udesc = "DS", ++ .ucode = 0x8, ++ }, ++ { .uname = "FS", ++ .udesc = "FS", ++ .ucode = 0x10, ++ }, ++ { .uname = "GS", ++ .udesc = "GS", ++ .ucode = 0x20, ++ }, ++ { .uname = "HS", ++ .udesc = "HS", ++ .ucode = 0x40, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_load_q_store_q_full[]={ ++ { .uname = "LOAD_QUEUE", ++ .udesc = "The number of cycles that the load buffer is full", ++ .ucode = 0x1, ++ }, ++ { .uname = "STORE_QUEUE", ++ .udesc = "The number of cycles that the store buffer is full", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_locked_ops[]={ ++ { .uname = "EXECUTED", ++ .udesc = "Number of locked instructions executed", ++ .ucode = 0x1, ++ }, ++ { .uname = "CYCLES_NON_SPECULATIVE_PHASE", ++ .udesc = "Number of cycles spent in non-speculative phase, excluding cache miss penalty", ++ .ucode = 0x4, ++ }, ++ { .uname = "CYCLES_WAITING", ++ .udesc = "Number of cycles spent in non-speculative phase, including the cache miss penalty", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xd, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cancelled_store_to_load[]={ ++ { .uname = "SIZE_ADDRESS_MISMATCHES", ++ .udesc = "Store is smaller than load or different starting byte but partial overlap", ++ .ucode = 0x1, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x1, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_data_cache_misses[]={ ++ { .uname = "DC_MISS_STREAMING_STORE", ++ .udesc = "First data cache miss or streaming store to a 64B cache line", ++ .ucode = 0x1, ++ }, ++ { .uname = "STREAMING_STORE", ++ .udesc = "First streaming store to a 64B cache line", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_data_cache_refills_from_l2_or_northbridge[]={ ++ { .uname = "GOOD", ++ .udesc = "Fill with good data. (Final valid status is valid)", ++ .ucode = 0x1, ++ }, ++ { .uname = "INVALID", ++ .udesc = "Early valid status turned out to be invalid", ++ .ucode = 0x2, ++ }, ++ { .uname = "POISON", ++ .udesc = "Fill with poison data", ++ .ucode = 0x4, ++ }, ++ { .uname = "READ_ERROR", ++ .udesc = "Fill with read data error", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_unified_tlb_hit[]={ ++ { .uname = "4K_DATA", ++ .udesc = "4 KB unified TLB hit for data", ++ .ucode = 0x1, ++ }, ++ { .uname = "2M_DATA", ++ .udesc = "2 MB unified TLB hit for data", ++ .ucode = 0x2, ++ }, ++ { .uname = "1G_DATA", ++ .udesc = "1 GB unified TLB hit for data", ++ .ucode = 0x4, ++ }, ++ { .uname = "4K_INST", ++ .udesc = "4 KB unified TLB hit for instruction", ++ .ucode = 0x10, ++ }, ++ { .uname = "2M_INST", ++ .udesc = "2 MB unified TLB hit for instruction", ++ .ucode = 0x20, ++ }, ++ { .uname = "1G_INST", ++ .udesc = "1 GB unified TLB hit for instruction", ++ .ucode = 0x40, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x77, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_unified_tlb_miss[]={ ++ { .uname = "4K_DATA", ++ .udesc = "4 KB unified TLB miss for data", ++ .ucode = 0x1, ++ }, ++ { .uname = "2M_DATA", ++ .udesc = "2 MB unified TLB miss for data", ++ .ucode = 0x2, ++ }, ++ { .uname = "1GB_DATA", ++ .udesc = "1 GB unified TLB miss for data", ++ .ucode = 0x4, ++ }, ++ { .uname = "4K_INST", ++ .udesc = "4 KB unified TLB miss for instruction", ++ .ucode = 0x10, ++ }, ++ { .uname = "2M_INST", ++ .udesc = "2 MB unified TLB miss for instruction", ++ .ucode = 0x20, ++ }, ++ { .uname = "1G_INST", ++ .udesc = "1 GB unified TLB miss for instruction", ++ .ucode = 0x40, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x77, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_prefetch_instructions_dispatched[]={ ++ { .uname = "LOAD", ++ .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", ++ .ucode = 0x1, ++ }, ++ { .uname = "STORE", ++ .udesc = "Store (PrefetchW)", ++ .ucode = 0x2, ++ }, ++ { .uname = "NTA", ++ .udesc = "NTA (PrefetchNTA)", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_ineffective_sw_prefetches[]={ ++ { .uname = "SW_PREFETCH_HIT_IN_L1", ++ .udesc = "Software prefetch hit in the L1", ++ .ucode = 0x1, ++ }, ++ { .uname = "SW_PREFETCH_HIT_IN_L2", ++ .udesc = "Software prefetch hit in the L2", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x9, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_memory_requests[]={ ++ { .uname = "NON_CACHEABLE", ++ .udesc = "Requests to non-cacheable (UC) memory", ++ .ucode = 0x1, ++ }, ++ { .uname = "WRITE_COMBINING", ++ .udesc = "Requests to non-cacheable (WC, but not WC+/SS) memory", ++ .ucode = 0x2, ++ }, ++ { .uname = "STREAMING_STORE", ++ .udesc = "Requests to non-cacheable (WC+/SS, but not WC) memory", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x83, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_data_prefetcher[]={ ++ { .uname = "ATTEMPTED", ++ .udesc = "Prefetch attempts", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x2, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_mab_reqs[]={ ++ { .uname = "BUFFER_BIT_0", ++ .udesc = "Buffer entry index bit 0", ++ .ucode = 0x1, ++ }, ++ { .uname = "BUFFER_BIT_1", ++ .udesc = "Buffer entry index bit 1", ++ .ucode = 0x2, ++ }, ++ { .uname = "BUFFER_BIT_2", ++ .udesc = "Buffer entry index bit 2", ++ .ucode = 0x4, ++ }, ++ { .uname = "BUFFER_BIT_3", ++ .udesc = "Buffer entry index bit 3", ++ .ucode = 0x8, ++ }, ++ { .uname = "BUFFER_BIT_4", ++ .udesc = "Buffer entry index bit 4", ++ .ucode = 0x10, ++ }, ++ { .uname = "BUFFER_BIT_5", ++ .udesc = "Buffer entry index bit 5", ++ .ucode = 0x20, ++ }, ++ { .uname = "BUFFER_BIT_6", ++ .udesc = "Buffer entry index bit 6", ++ .ucode = 0x40, ++ }, ++ { .uname = "BUFFER_BIT_7", ++ .udesc = "Buffer entry index bit 7", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_system_read_responses[]={ ++ { .uname = "EXCLUSIVE", ++ .udesc = "Exclusive", ++ .ucode = 0x1, ++ }, ++ { .uname = "MODIFIED", ++ .udesc = "Modified (D18F0x68[ATMModeEn]==0), Modified written (D18F0x68[ATMModeEn]==1)", ++ .ucode = 0x2, ++ }, ++ { .uname = "SHARED", ++ .udesc = "Shared", ++ .ucode = 0x4, ++ }, ++ { .uname = "OWNED", ++ .udesc = "Owned", ++ .ucode = 0x8, ++ }, ++ { .uname = "DATA_ERROR", ++ .udesc = "Data Error", ++ .ucode = 0x10, ++ }, ++ { .uname = "MODIFIED_UNWRITTEN", ++ .udesc = "Modified unwritten", ++ .ucode = 0x20, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_octword_write_transfers[]={ ++ { .uname = "OCTWORD_WRITE_TRANSFER", ++ .udesc = "OW write transfer", ++ .ucode = 0x1, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x1, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_requests_to_l2[]={ ++ { .uname = "INSTRUCTIONS", ++ .udesc = "IC fill", ++ .ucode = 0x1, ++ }, ++ { .uname = "DATA", ++ .udesc = "DC fill", ++ .ucode = 0x2, ++ }, ++ { .uname = "TLB_WALK", ++ .udesc = "TLB fill (page table walks)", ++ .ucode = 0x4, ++ }, ++ { .uname = "SNOOP", ++ .udesc = "NB probe request", ++ .ucode = 0x8, ++ }, ++ { .uname = "CANCELLED", ++ .udesc = "Canceled request", ++ .ucode = 0x10, ++ }, ++ { .uname = "PREFETCHER", ++ .udesc = "L2 cache prefetcher request", ++ .ucode = 0x40, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x5f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l2_cache_miss[]={ ++ { .uname = "INSTRUCTIONS", ++ .udesc = "IC fill", ++ .ucode = 0x1, ++ }, ++ { .uname = "DATA", ++ .udesc = "DC fill (includes possible replays, whereas PMCx041 does not)", ++ .ucode = 0x2, ++ }, ++ { .uname = "TLB_WALK", ++ .udesc = "TLB page table walk", ++ .ucode = 0x4, ++ }, ++ { .uname = "PREFETCHER", ++ .udesc = "L2 Cache Prefetcher request", ++ .ucode = 0x10, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x17, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l2_cache_fill_writeback[]={ ++ { .uname = "L2_FILLS", ++ .udesc = "L2 fills from system", ++ .ucode = 0x1, ++ }, ++ { .uname = "L2_WRITEBACKS", ++ .udesc = "L2 Writebacks to system (Clean and Dirty)", ++ .ucode = 0x2, ++ }, ++ { .uname = "L2_WRITEBACKS_CLEAN", ++ .udesc = "L2 Clean Writebacks to system", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_page_splintering[]={ ++ { .uname = "GUEST_LARGER", ++ .udesc = "Guest page size is larger than host page size when nested paging is enabled", ++ .ucode = 0x1, ++ }, ++ { .uname = "MTRR_MISMATCH", ++ .udesc = "Splintering due to MTRRs, IORRs, APIC, TOMs or other special address region", ++ .ucode = 0x2, ++ }, ++ { .uname = "HOST_LARGER", ++ .udesc = "Host page size is larger than the guest page size", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l1_itlb_miss_and_l2_itlb_miss[]={ ++ { .uname = "4K_PAGE_FETCHES", ++ .udesc = "Instruction fetches to a 4 KB page", ++ .ucode = 0x1, ++ }, ++ { .uname = "2M_PAGE_FETCHES", ++ .udesc = "Instruction fetches to a 2 MB page", ++ .ucode = 0x2, ++ }, ++ { .uname = "1G_PAGE_FETCHES", ++ .udesc = "Instruction fetches to a 1 GB page", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_instruction_cache_invalidated[]={ ++ { .uname = "NON_SMC_PROBE_MISS", ++ .udesc = "Non-SMC invalidating probe that missed on in-flight instructions", ++ .ucode = 0x1, ++ }, ++ { .uname = "NON_SMC_PROBE_HIT", ++ .udesc = "Non-SMC invalidating probe that hit on in-flight instructions", ++ .ucode = 0x2, ++ }, ++ { .uname = "SMC_PROBE_MISS", ++ .udesc = "SMC invalidating probe that missed on in-flight instructions", ++ .ucode = 0x4, ++ }, ++ { .uname = "SMC_PROBE_HIT", ++ .udesc = "SMC invalidating probe that hit on in-flight instructions", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_retired_mmx_fp_instructions[]={ ++ { .uname = "X87", ++ .udesc = "X87 instructions", ++ .ucode = 0x1, ++ }, ++ { .uname = "MMX", ++ .udesc = "MMX(tm) instructions", ++ .ucode = 0x2, ++ }, ++ { .uname = "SSE", ++ .udesc = "SSE instructions (SSE,SSE2,SSE3,SSSE3,SSE4A,SSE4.1,SSE4.2,AVX,XOP,FMA4)", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_fpu_exceptions[]={ ++ { .uname = "TOTAL_FAULTS", ++ .udesc = "Total microfaults", ++ .ucode = 0x1, ++ }, ++ { .uname = "TOTAL_TRAPS", ++ .udesc = "Total microtraps", ++ .ucode = 0x2, ++ }, ++ { .uname = "INT2EXT_FAULTS", ++ .udesc = "Int2Ext faults", ++ .ucode = 0x4, ++ }, ++ { .uname = "EXT2INT_FAULTS", ++ .udesc = "Ext2Int faults", ++ .ucode = 0x8, ++ }, ++ { .uname = "BYPASS_FAULTS", ++ .udesc = "Bypass faults", ++ .ucode = 0x10, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x1f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_ibs_ops_tagged[]={ ++ { .uname = "TAGGED", ++ .udesc = "Number of ops tagged by IBS", ++ .ucode = 0x1, ++ }, ++ { .uname = "RETIRED", ++ .udesc = "Number of ops tagged by IBS that retired", ++ .ucode = 0x2, ++ }, ++ { .uname = "IGNORED", ++ .udesc = "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_ls_dispatch[]={ ++ { .uname = "LOADS", ++ .udesc = "Loads", ++ .ucode = 0x1, ++ }, ++ { .uname = "STORES", ++ .udesc = "Stores", ++ .ucode = 0x2, ++ }, ++ { .uname = "LOAD_OP_STORES", ++ .udesc = "Load-op-Stores", ++ .ucode = 0x4, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x7, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l2_prefetcher_trigger_events[]={ ++ { .uname = "LOAD_L1_MISS_SEEN_BY_PREFETCHER", ++ .udesc = "Load L1 miss seen by prefetcher", ++ .ucode = 0x1, ++ }, ++ { .uname = "STORE_L1_MISS_SEEN_BY_PREFETCHER", ++ .udesc = "Store L1 miss seen by prefetcher", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_dram_accesses[]={ ++ { .uname = "DCT0_PAGE_HIT", ++ .udesc = "DCT0 Page hit", ++ .ucode = 0x1, ++ }, ++ { .uname = "DCT0_PAGE_MISS", ++ .udesc = "DCT0 Page Miss", ++ .ucode = 0x2, ++ }, ++ { .uname = "DCT0_PAGE_CONFLICT", ++ .udesc = "DCT0 Page Conflict", ++ .ucode = 0x4, ++ }, ++ { .uname = "DCT1_PAGE_HIT", ++ .udesc = "DCT1 Page hit", ++ .ucode = 0x8, ++ }, ++ { .uname = "DCT1_PAGE_MISS", ++ .udesc = "DCT1 Page Miss", ++ .ucode = 0x10, ++ }, ++ { .uname = "DCT1_PAGE_CONFLICT", ++ .udesc = "DCT1 Page Conflict", ++ .ucode = 0x20, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_dram_controller_page_table_overflows[]={ ++ { .uname = "DCT0_PAGE_TABLE_OVERFLOW", ++ .udesc = "DCT0 Page Table Overflow", ++ .ucode = 0x1, ++ }, ++ { .uname = "DCT1_PAGE_TABLE_OVERFLOW", ++ .udesc = "DCT1 Page Table Overflow", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_memory_controller_dram_command_slots_missed[]={ ++ { .uname = "DCT0_COMMAND_SLOTS_MISSED", ++ .udesc = "DCT0 Command Slots Missed (in MemClks)", ++ .ucode = 0x1, ++ }, ++ { .uname = "DCT1_COMMAND_SLOTS_MISSED", ++ .udesc = "DCT1 Command Slots Missed (in MemClks)", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_memory_controller_turnarounds[]={ ++ { .uname = "DCT0_DIMM_TURNAROUND", ++ .udesc = "DCT0 DIMM (chip select) turnaround", ++ .ucode = 0x1, ++ }, ++ { .uname = "DCT0_READ_WRITE_TURNAROUND", ++ .udesc = "DCT0 Read to write turnaround", ++ .ucode = 0x2, ++ }, ++ { .uname = "DCT0_WRITE_READ_TURNAROUND", ++ .udesc = "DCT0 Write to read turnaround", ++ .ucode = 0x4, ++ }, ++ { .uname = "DCT1_DIMM_TURNAROUND", ++ .udesc = "DCT1 DIMM (chip select) turnaround", ++ .ucode = 0x8, ++ }, ++ { .uname = "DCT1_READ_WRITE_TURNAROUND", ++ .udesc = "DCT1 Read to write turnaround", ++ .ucode = 0x10, ++ }, ++ { .uname = "DCT1_WRITE_READ_TURNAROUND", ++ .udesc = "DCT1 Write to read turnaround", ++ .ucode = 0x20, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3f, ++ .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_memory_controller_bypass_counter_saturation[]={ ++ { .uname = "MEMORY_CONTROLLER_HIGH_PRIORITY_BYPASS", ++ .udesc = "Memory controller high priority bypass", ++ .ucode = 0x1, ++ }, ++ { .uname = "MEMORY_CONTROLLER_MEDIUM_PRIORITY_BYPASS", ++ .udesc = "Memory controller medium priority bypass", ++ .ucode = 0x2, ++ }, ++ { .uname = "DCT0_DCQ_BYPASS", ++ .udesc = "DCT0 DCQ bypass", ++ .ucode = 0x4, ++ }, ++ { .uname = "DCT1_DCQ_BYPASS", ++ .udesc = "DCT1 DCQ bypass", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_thermal_status[]={ ++ { .uname = "NUM_HTC_TRIP_POINT_CROSSED", ++ .udesc = "Number of times the HTC trip point is crossed", ++ .ucode = 0x4, ++ }, ++ { .uname = "NUM_CLOCKS_HTC_PSTATE_INACTIVE", ++ .udesc = "Number of clocks HTC P-state is inactive", ++ .ucode = 0x20, ++ }, ++ { .uname = "NUM_CLOCKS_HTC_PSTATE_ACTIVE", ++ .udesc = "Number of clocks HTC P-state is active", ++ .ucode = 0x40, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x64, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cpu_io_requests_to_memory_io[]={ ++ { .uname = "REMOTE_IO_TO_LOCAL_IO", ++ .udesc = "Remote IO to Local IO", ++ .ucode = 0x61, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "REMOTE_CPU_TO_LOCAL_IO", ++ .udesc = "Remote CPU to Local IO", ++ .ucode = 0x64, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_IO_TO_REMOTE_IO", ++ .udesc = "Local IO to Remote IO", ++ .ucode = 0x91, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_IO_TO_REMOTE_MEM", ++ .udesc = "Local IO to Remote Mem", ++ .ucode = 0x92, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_CPU_TO_REMOTE_IO", ++ .udesc = "Local CPU to Remote IO", ++ .ucode = 0x94, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_CPU_TO_REMOTE_MEM", ++ .udesc = "Local CPU to Remote Mem", ++ .ucode = 0x98, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_IO_TO_LOCAL_IO", ++ .udesc = "Local IO to Local IO", ++ .ucode = 0xa1, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_IO_TO_LOCAL_MEM", ++ .udesc = "Local IO to Local Mem", ++ .ucode = 0xa2, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_CPU_TO_LOCAL_IO", ++ .udesc = "Local CPU to Local IO", ++ .ucode = 0xa4, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "LOCAL_CPU_TO_LOCAL_MEM", ++ .udesc = "Local CPU to Local Mem", ++ .ucode = 0xa8, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cache_block_commands[]={ ++ { .uname = "VICTIM_BLOCK", ++ .udesc = "Victim Block (Writeback)", ++ .ucode = 0x1, ++ }, ++ { .uname = "READ_BLOCK", ++ .udesc = "Read Block (Dcache load miss refill)", ++ .ucode = 0x4, ++ }, ++ { .uname = "READ_BLOCK_SHARED", ++ .udesc = "Read Block Shared (Icache refill)", ++ .ucode = 0x8, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED", ++ .udesc = "Read Block Modified (Dcache store miss refill)", ++ .ucode = 0x10, ++ }, ++ { .uname = "CHANGE_TO_DIRTY", ++ .udesc = "Change-to-Dirty (first store to clean block already in cache)", ++ .ucode = 0x20, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3d, ++ .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_sized_commands[]={ ++ { .uname = "NON-POSTED_SZWR_BYTE", ++ .udesc = "Non-Posted SzWr Byte (1-32 bytes). Typical Usage: Legacy or mapped IO, typically 1-4 bytes.", ++ .ucode = 0x1, ++ }, ++ { .uname = "NON-POSTED_SZWR_DW", ++ .udesc = "Non-Posted SzWr DW (1-16 dwords). Typical Usage: Legacy or mapped IO, typically 1", ++ .ucode = 0x2, ++ }, ++ { .uname = "POSTED_SZWR_BYTE", ++ .udesc = "Posted SzWr Byte (1-32 bytes). Typical Usage: Subcache-line DMA writes, size varies; also", ++ .ucode = 0x4, ++ }, ++ { .uname = "POSTED_SZWR_DW", ++ .udesc = "Posted SzWr DW (1-16 dwords). Typical Usage: Block-oriented DMA writes, often cache-line", ++ .ucode = 0x8, ++ }, ++ { .uname = "SZRD_BYTE", ++ .udesc = "SzRd Byte (4 bytes). Typical Usage: Legacy or mapped IO.", ++ .ucode = 0x10, ++ }, ++ { .uname = "SZRD_DW", ++ .udesc = "SzRd DW (1-16 dwords). Typical Usage: Block-oriented DMA reads, typically cache-line size.", ++ .ucode = 0x20, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_probe_responses_and_upstream_requests[]={ ++ { .uname = "PROBE_MISS", ++ .udesc = "Probe miss", ++ .ucode = 0x1, ++ }, ++ { .uname = "PROBE_HIT_CLEAN", ++ .udesc = "Probe hit clean", ++ .ucode = 0x2, ++ }, ++ { .uname = "PROBE_HIT_DIRTY_WITHOUT_MEMORY_CANCEL", ++ .udesc = "Probe hit dirty without memory cancel (probed by Sized Write or Change2Dirty)", ++ .ucode = 0x4, ++ }, ++ { .uname = "PROBE_HIT_DIRTY_WITH_MEMORY_CANCEL", ++ .udesc = "Probe hit dirty with memory cancel (probed by DMA read or cache refill request)", ++ .ucode = 0x8, ++ }, ++ { .uname = "UPSTREAM_DISPLAY_REFRESH_ISOC_READS", ++ .udesc = "Upstream display refresh/ISOC reads", ++ .ucode = 0x10, ++ }, ++ { .uname = "UPSTREAM_NON-DISPLAY_REFRESH_READS", ++ .udesc = "Upstream non-display refresh reads", ++ .ucode = 0x20, ++ }, ++ { .uname = "UPSTREAM_ISOC_WRITES", ++ .udesc = "Upstream ISOC writes", ++ .ucode = 0x40, ++ }, ++ { .uname = "UPSTREAM_NON-ISOC_WRITES", ++ .udesc = "Upstream non-ISOC writes", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_gart_events[]={ ++ { .uname = "GART_APERTURE_HIT_ON_ACCESS_FROM_CPU", ++ .udesc = "GART aperture hit on access from CPU", ++ .ucode = 0x1, ++ }, ++ { .uname = "GART_APERTURE_HIT_ON_ACCESS_FROM_IO", ++ .udesc = "GART aperture hit on access from IO", ++ .ucode = 0x2, ++ }, ++ { .uname = "GART_MISS", ++ .udesc = "GART miss", ++ .ucode = 0x4, ++ }, ++ { .uname = "GART_REQUEST_HIT_TABLE_WALK_IN_PROGRESS", ++ .udesc = "GART Request hit table walk in progress", ++ .ucode = 0x8, ++ }, ++ { .uname = "GART_MULTIPLE_TABLE_WALK_IN_PROGRESS", ++ .udesc = "GART multiple table walk in progress", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x8f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_link_transmit_bandwidth[]={ ++ { .uname = "COMMAND_DW_SENT", ++ .udesc = "Command DW sent", ++ .ucode = 0x1, ++ .grpid = 0, ++ }, ++ { .uname = "DATA_DW_SENT", ++ .udesc = "Data DW sent", ++ .ucode = 0x2, ++ .grpid = 0, ++ }, ++ { .uname = "BUFFER_RELEASE_DW_SENT", ++ .udesc = "Buffer release DW sent", ++ .ucode = 0x4, ++ .grpid = 0, ++ }, ++ { .uname = "NOP_DW_SENT", ++ .udesc = "NOP DW sent (idle)", ++ .ucode = 0x8, ++ .grpid = 0, ++ }, ++ { .uname = "ADDRESS_DW_SENT", ++ .udesc = "Address (including extensions) DW sent", ++ .ucode = 0x10, ++ .grpid = 0, ++ }, ++ { .uname = "PER_PACKET_CRC_SENT", ++ .udesc = "Per packet CRC sent", ++ .ucode = 0x20, ++ .grpid = 0, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3f, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ .grpid = 0, ++ }, ++ { .uname = "SUBLINK_1", ++ .udesc = "When links are unganged, enable this umask to select sublink 1", ++ .ucode = 0x80, ++ .grpid = 1, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "SUBLINK_0", ++ .udesc = "When links are unganged, enable this umask to select sublink 0 (default when links ganged)", ++ .ucode = 0x00, ++ .grpid = 1, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++ ++ ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cpu_to_dram_requests_to_target_node[]={ ++ { .uname = "LOCAL_TO_NODE_0", ++ .udesc = "From Local node to Node 0", ++ .ucode = 0x1, ++ }, ++ { .uname = "LOCAL_TO_NODE_1", ++ .udesc = "From Local node to Node 1", ++ .ucode = 0x2, ++ }, ++ { .uname = "LOCAL_TO_NODE_2", ++ .udesc = "From Local node to Node 2", ++ .ucode = 0x4, ++ }, ++ { .uname = "LOCAL_TO_NODE_3", ++ .udesc = "From Local node to Node 3", ++ .ucode = 0x8, ++ }, ++ { .uname = "LOCAL_TO_NODE_4", ++ .udesc = "From Local node to Node 4", ++ .ucode = 0x10, ++ }, ++ { .uname = "LOCAL_TO_NODE_5", ++ .udesc = "From Local node to Node 5", ++ .ucode = 0x20, ++ }, ++ { .uname = "LOCAL_TO_NODE_6", ++ .udesc = "From Local node to Node 6", ++ .ucode = 0x40, ++ }, ++ { .uname = "LOCAL_TO_NODE_7", ++ .udesc = "From Local node to Node 7", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_io_to_dram_requests_to_target_node[]={ ++ { .uname = "LOCAL_TO_NODE_0", ++ .udesc = "From Local node to Node 0", ++ .ucode = 0x1, ++ }, ++ { .uname = "LOCAL_TO_NODE_1", ++ .udesc = "From Local node to Node 1", ++ .ucode = 0x2, ++ }, ++ { .uname = "LOCAL_TO_NODE_2", ++ .udesc = "From Local node to Node 2", ++ .ucode = 0x4, ++ }, ++ { .uname = "LOCAL_TO_NODE_3", ++ .udesc = "From Local node to Node 3", ++ .ucode = 0x8, ++ }, ++ { .uname = "LOCAL_TO_NODE_4", ++ .udesc = "From Local node to Node 4", ++ .ucode = 0x10, ++ }, ++ { .uname = "LOCAL_TO_NODE_5", ++ .udesc = "From Local node to Node 5", ++ .ucode = 0x20, ++ }, ++ { .uname = "LOCAL_TO_NODE_6", ++ .udesc = "From Local node to Node 6", ++ .ucode = 0x40, ++ }, ++ { .uname = "LOCAL_TO_NODE_7", ++ .udesc = "From Local node to Node 7", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3[]={ ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_0", ++ .udesc = "Read block From Local node to Node 0", ++ .ucode = 0x11, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_0", ++ .udesc = "Read block shared From Local node to Node 0", ++ .ucode = 0x12, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_0", ++ .udesc = "Read block modified From Local node to Node 0", ++ .ucode = 0x14, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_0", ++ .udesc = "Change-to-Dirty From Local node to Node 0", ++ .ucode = 0x18, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_1", ++ .udesc = "Read block From Local node to Node 1", ++ .ucode = 0x21, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_1", ++ .udesc = "Read block shared From Local node to Node 1", ++ .ucode = 0x22, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_1", ++ .udesc = "Read block modified From Local node to Node 1", ++ .ucode = 0x24, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_1", ++ .udesc = "Change-to-Dirty From Local node to Node 1", ++ .ucode = 0x28, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_2", ++ .udesc = "Read block From Local node to Node 2", ++ .ucode = 0x41, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_2", ++ .udesc = "Read block shared From Local node to Node 2", ++ .ucode = 0x42, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_2", ++ .udesc = "Read block modified From Local node to Node 2", ++ .ucode = 0x44, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_2", ++ .udesc = "Change-to-Dirty From Local node to Node 2", ++ .ucode = 0x48, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_3", ++ .udesc = "Read block From Local node to Node 3", ++ .ucode = 0x81, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_3", ++ .udesc = "Read block shared From Local node to Node 3", ++ .ucode = 0x82, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_3", ++ .udesc = "Read block modified From Local node to Node 3", ++ .ucode = 0x84, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_3", ++ .udesc = "Change-to-Dirty From Local node to Node 3", ++ .ucode = 0x88, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7[]={ ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_4", ++ .udesc = "Read block From Local node to Node 4", ++ .ucode = 0x11, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_4", ++ .udesc = "Read block shared From Local node to Node 4", ++ .ucode = 0x12, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_4", ++ .udesc = "Read block modified From Local node to Node 4", ++ .ucode = 0x14, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_4", ++ .udesc = "Change-to-Dirty From Local node to Node 4", ++ .ucode = 0x18, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_5", ++ .udesc = "Read block From Local node to Node 5", ++ .ucode = 0x21, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_5", ++ .udesc = "Read block shared From Local node to Node 5", ++ .ucode = 0x22, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_5", ++ .udesc = "Read block modified From Local node to Node 5", ++ .ucode = 0x24, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_5", ++ .udesc = "Change-to-Dirty From Local node to Node 5", ++ .ucode = 0x28, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_6", ++ .udesc = "Read block From Local node to Node 6", ++ .ucode = 0x41, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_6", ++ .udesc = "Read block shared From Local node to Node 6", ++ .ucode = 0x42, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_6", ++ .udesc = "Read block modified From Local node to Node 6", ++ .ucode = 0x44, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_6", ++ .udesc = "Change-to-Dirty From Local node to Node 6", ++ .ucode = 0x48, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_LOCAL_TO_NODE_7", ++ .udesc = "Read block From Local node to Node 7", ++ .ucode = 0x81, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_7", ++ .udesc = "Read block shared From Local node to Node 7", ++ .ucode = 0x82, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_7", ++ .udesc = "Read block modified From Local node to Node 7", ++ .ucode = 0x84, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_7", ++ .udesc = "Change-to-Dirty From Local node to Node 7", ++ .ucode = 0x88, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_cpu_command_requests_to_target_node[]={ ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_0", ++ .udesc = "Read Sized From Local node to Node 0", ++ .ucode = 0x11, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_0", ++ .udesc = "Write Sized From Local node to Node 0", ++ .ucode = 0x12, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_0", ++ .udesc = "Victim Block From Local node to Node 0", ++ .ucode = 0x14, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_1", ++ .udesc = "Read Sized From Local node to Node 1", ++ .ucode = 0x21, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_1", ++ .udesc = "Write Sized From Local node to Node 1", ++ .ucode = 0x22, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_1", ++ .udesc = "Victim Block From Local node to Node 1", ++ .ucode = 0x24, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_2", ++ .udesc = "Read Sized From Local node to Node 2", ++ .ucode = 0x41, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_2", ++ .udesc = "Write Sized From Local node to Node 2", ++ .ucode = 0x42, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_2", ++ .udesc = "Victim Block From Local node to Node 2", ++ .ucode = 0x44, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_3", ++ .udesc = "Read Sized From Local node to Node 3", ++ .ucode = 0x81, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_3", ++ .udesc = "Write Sized From Local node to Node 3", ++ .ucode = 0x82, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_3", ++ .udesc = "Victim Block From Local node to Node 3", ++ .ucode = 0x84, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_4", ++ .udesc = "Read Sized From Local node to Node 4", ++ .ucode = 0x19, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_4", ++ .udesc = "Write Sized From Local node to Node 4", ++ .ucode = 0x1a, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_4", ++ .udesc = "Victim Block From Local node to Node 4", ++ .ucode = 0x1c, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_5", ++ .udesc = "Read Sized From Local node to Node 5", ++ .ucode = 0x29, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_5", ++ .udesc = "Write Sized From Local node to Node 5", ++ .ucode = 0x2a, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_5", ++ .udesc = "Victim Block From Local node to Node 5", ++ .ucode = 0x2c, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_6", ++ .udesc = "Read Sized From Local node to Node 6", ++ .ucode = 0x49, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_6", ++ .udesc = "Write Sized From Local node to Node 6", ++ .ucode = 0x4a, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_6", ++ .udesc = "Victim Block From Local node to Node 6", ++ .ucode = 0x4c, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "READ_SIZED_LOCAL_TO_NODE_7", ++ .udesc = "Read Sized From Local node to Node 7", ++ .ucode = 0x89, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "WRITE_SIZED_LOCAL_TO_NODE_7", ++ .udesc = "Write Sized From Local node to Node 7", ++ .ucode = 0x8a, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_7", ++ .udesc = "Victim Block From Local node to Node 7", ++ .ucode = 0x8c, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "ALL_LOCAL_TO_NODE_0_3", ++ .udesc = "All From Local node to Node 0-3", ++ .ucode = 0xf7, ++ .uflags= AMD64_FL_NCOMBO, ++ }, ++ { .uname = "ALL_LOCAL_TO_NODE_4_7", ++ .udesc = "All From Local node to Node 4-7", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_request_cache_status_0[]={ ++ { .uname = "PROBE_HIT_S", ++ .udesc = "Probe Hit S", ++ .ucode = 0x1, ++ }, ++ { .uname = "PROBE_HIT_E", ++ .udesc = "Probe Hit E", ++ .ucode = 0x2, ++ }, ++ { .uname = "PROBE_HIT_MUW_OR_O", ++ .udesc = "Probe Hit MuW or O", ++ .ucode = 0x4, ++ }, ++ { .uname = "PROBE_HIT_M", ++ .udesc = "Probe Hit M", ++ .ucode = 0x8, ++ }, ++ { .uname = "PROBE_MISS", ++ .udesc = "Probe Miss", ++ .ucode = 0x10, ++ }, ++ { .uname = "DIRECTED_PROBE", ++ .udesc = "Directed Probe", ++ .ucode = 0x20, ++ }, ++ { .uname = "TRACK_CACHE_STAT_FOR_RDBLK", ++ .udesc = "Track Cache Stat for RdBlk", ++ .ucode = 0x40, ++ }, ++ { .uname = "TRACK_CACHE_STAT_FOR_RDBLKS", ++ .udesc = "Track Cache Stat for RdBlkS", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_request_cache_status_1[]={ ++ { .uname = "PROBE_HIT_S", ++ .udesc = "Probe Hit S", ++ .ucode = 0x1, ++ }, ++ { .uname = "PROBE_HIT_E", ++ .udesc = "Probe Hit E", ++ .ucode = 0x2, ++ }, ++ { .uname = "PROBE_HIT_MUW_OR_O", ++ .udesc = "Probe Hit MuW or O", ++ .ucode = 0x4, ++ }, ++ { .uname = "PROBE_HIT_M", ++ .udesc = "Probe Hit M", ++ .ucode = 0x8, ++ }, ++ { .uname = "PROBE_MISS", ++ .udesc = "Probe Miss", ++ .ucode = 0x10, ++ }, ++ { .uname = "DIRECTED_PROBE", ++ .udesc = "Directed Probe", ++ .ucode = 0x20, ++ }, ++ { .uname = "TRACK_CACHE_STAT_FOR_CHGTODIRTY", ++ .udesc = "Track Cache Stat for ChgToDirty", ++ .ucode = 0x40, ++ }, ++ { .uname = "TRACK_CACHE_STAT_FOR_RDBLKM", ++ .udesc = "Track Cache Stat for RdBlkM", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_memory_controller_requests[]={ ++ { .uname = "WRITE_REQUESTS_TO_DCT", ++ .udesc = "Write requests sent to the DCT", ++ .ucode = 0x1, ++ }, ++ { .uname = "READ_REQUESTS_TO_DCT", ++ .udesc = "Read requests (including prefetch requests) sent to the DCT", ++ .ucode = 0x2, ++ }, ++ { .uname = "PREFETCH_REQUESTS_TO_DCT", ++ .udesc = "Prefetch requests sent to the DCT", ++ .ucode = 0x4, ++ }, ++ { .uname = "32_BYTES_SIZED_WRITES", ++ .udesc = "32 Bytes Sized Writes", ++ .ucode = 0x8, ++ }, ++ { .uname = "64_BYTES_SIZED_WRITES", ++ .udesc = "64 Bytes Sized Writes", ++ .ucode = 0x10, ++ }, ++ { .uname = "32_BYTES_SIZED_READS", ++ .udesc = "32 Bytes Sized Reads", ++ .ucode = 0x20, ++ }, ++ { .uname = "64_BYTE_SIZED_READS", ++ .udesc = "64 Byte Sized Reads", ++ .ucode = 0x40, ++ }, ++ { .uname = "READ_REQUESTS_TO_DCT_WHILE_WRITES_PENDING", ++ .udesc = "Read requests sent to the DCT while writes requests are pending in the DCT", ++ .ucode = 0x80, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_read_request_to_l3_cache[]={ ++ { .uname = "READ_BLOCK_EXCLUSIVE", ++ .udesc = "Read Block Exclusive (Data cache read)", ++ .ucode = 0x1, ++ .grpid = 0, ++ }, ++ { .uname = "READ_BLOCK_SHARED", ++ .udesc = "Read Block Shared (Instruction cache read)", ++ .ucode = 0x2, ++ .grpid = 0, ++ }, ++ { .uname = "READ_BLOCK_MODIFY", ++ .udesc = "Read Block Modify", ++ .ucode = 0x4, ++ .grpid = 0, ++ }, ++ { .uname = "PREFETCH", ++ .udesc = "Count prefetches honly", ++ .ucode = 0x8, ++ .grpid = 0, ++ }, ++ { .uname = "READ_BLOCK_ANY", ++ .udesc = "Count any read request", ++ .ucode = 0x7, ++ .grpid = 0, ++ .uflags= AMD64_FL_DFL | AMD64_FL_NCOMBO, ++ }, ++ CORE_SELECT(1), ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l3_fills_caused_by_l2_evictions[]={ ++ { .uname = "SHARED", ++ .udesc = "Shared", ++ .ucode = 0x1, ++ .grpid = 0, ++ }, ++ { .uname = "EXCLUSIVE", ++ .udesc = "Exclusive", ++ .ucode = 0x2, ++ .grpid = 0, ++ }, ++ { .uname = "OWNED", ++ .udesc = "Owned", ++ .ucode = 0x4, ++ .grpid = 0, ++ }, ++ { .uname = "MODIFIED", ++ .udesc = "Modified", ++ .ucode = 0x8, ++ .grpid = 0, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xff, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ .grpid = 0, ++ }, ++ CORE_SELECT(1), ++ }; ++ ++static const amd64_umask_t amd64_fam15h_nb_l3_evictions[]={ ++ { .uname = "SHARED", ++ .udesc = "Shared", ++ .ucode = 0x1, ++ }, ++ { .uname = "EXCLUSIVE", ++ .udesc = "Exclusive", ++ .ucode = 0x2, ++ }, ++ { .uname = "OWNED", ++ .udesc = "Owned", ++ .ucode = 0x4, ++ }, ++ { .uname = "MODIFIED", ++ .udesc = "Modified", ++ .ucode = 0x8, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0xf, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_umask_t amd64_fam15h_nb_l3_latency[]={ ++ { .uname = "L3_REQUEST_CYCLE", ++ .udesc = "L3 Request cycle count.", ++ .ucode = 0x1, ++ }, ++ { .uname = "L3_REQUEST", ++ .udesc = "L3 request count.", ++ .ucode = 0x2, ++ }, ++ { .uname = "ALL", ++ .udesc = "All sub-events selected", ++ .ucode = 0x3, ++ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, ++ }, ++}; ++ ++static const amd64_entry_t amd64_fam15h_nb_pe[]={ ++{ .name = "DRAM_ACCESSES", ++ .desc = "DRAM Accesses", ++ .code = 0xe0, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_dram_accesses), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_dram_accesses, ++}, ++{ .name = "DRAM_CONTROLLER_PAGE_TABLE_OVERFLOWS", ++ .desc = "DRAM Controller Page Table Overflows", ++ .code = 0xe1, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_dram_controller_page_table_overflows), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_dram_controller_page_table_overflows, ++}, ++{ .name = "MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED", ++ .desc = "Memory Controller DRAM Command Slots Missed", ++ .code = 0xe2, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_dram_command_slots_missed), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_memory_controller_dram_command_slots_missed, ++}, ++{ .name = "MEMORY_CONTROLLER_TURNAROUNDS", ++ .desc = "Memory Controller Turnarounds", ++ .code = 0xe3, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_turnarounds), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_memory_controller_turnarounds, ++}, ++{ .name = "MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION", ++ .desc = "Memory Controller Bypass Counter Saturation", ++ .code = 0xe4, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_bypass_counter_saturation), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_memory_controller_bypass_counter_saturation, ++}, ++{ .name = "THERMAL_STATUS", ++ .desc = "Thermal Status", ++ .code = 0xe8, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_thermal_status), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_thermal_status, ++}, ++{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", ++ .desc = "CPU/IO Requests to Memory/IO", ++ .code = 0xe9, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_io_requests_to_memory_io), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_io_requests_to_memory_io, ++}, ++{ .name = "CACHE_BLOCK_COMMANDS", ++ .desc = "Cache Block Commands", ++ .code = 0xea, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cache_block_commands), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cache_block_commands, ++}, ++{ .name = "SIZED_COMMANDS", ++ .desc = "Sized Commands", ++ .code = 0xeb, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_sized_commands), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_sized_commands, ++}, ++{ .name = "PROBE_RESPONSES_AND_UPSTREAM_REQUESTS", ++ .desc = "Probe Responses and Upstream Requests", ++ .code = 0xec, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_probe_responses_and_upstream_requests), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_probe_responses_and_upstream_requests, ++}, ++{ .name = "GART_EVENTS", ++ .desc = "GART Events", ++ .code = 0xee, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_gart_events), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_gart_events, ++}, ++{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_0", ++ .desc = "Link Transmit Bandwidth Link 0", ++ .code = 0xf6, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_link_transmit_bandwidth, ++}, ++{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_1", ++ .desc = "Link Transmit Bandwidth Link 1", ++ .code = 0xf7, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_link_transmit_bandwidth, ++}, ++{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_2", ++ .desc = "Link Transmit Bandwidth Link 2", ++ .code = 0xf8, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_link_transmit_bandwidth, ++}, ++{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_3", ++ .desc = "Link Transmit Bandwidth Link 3", ++ .code = 0x1f9, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_link_transmit_bandwidth, ++}, ++{ .name = "CPU_TO_DRAM_REQUESTS_TO_TARGET_NODE", ++ .desc = "CPU to DRAM Requests to Target Node", ++ .code = 0x1e0, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_to_dram_requests_to_target_node), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_to_dram_requests_to_target_node, ++}, ++{ .name = "IO_TO_DRAM_REQUESTS_TO_TARGET_NODE", ++ .desc = "IO to DRAM Requests to Target Node", ++ .code = 0x1e1, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_io_to_dram_requests_to_target_node), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_io_to_dram_requests_to_target_node, ++}, ++{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_0_3", ++ .desc = "CPU Read Command Latency to Target Node 0-3", ++ .code = 0x1e2, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3, ++}, ++{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_0_3", ++ .desc = "CPU Read Command Requests to Target Node 0-3", ++ .code = 0x1e3, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3, ++}, ++{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_4_7", ++ .desc = "CPU Read Command Latency to Target Node 4-7", ++ .code = 0x1e4, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7, ++}, ++{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_4_7", ++ .desc = "CPU Read Command Requests to Target Node 4-7", ++ .code = 0x1e5, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7, ++}, ++{ .name = "CPU_COMMAND_LATENCY_TO_TARGET_NODE", ++ .desc = "CPU Command Latency to Target Node", ++ .code = 0x1e6, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_command_requests_to_target_node), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_command_requests_to_target_node, ++}, ++{ .name = "CPU_REQUESTS_TO_TARGET_NODE", ++ .desc = "CPU Requests to Target Node", ++ .code = 0x1e7, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_command_requests_to_target_node), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_cpu_command_requests_to_target_node, ++}, ++{ .name = "REQUEST_CACHE_STATUS_0", ++ .desc = "Request Cache Status 0", ++ .code = 0x1ea, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_request_cache_status_0), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_request_cache_status_0, ++}, ++{ .name = "REQUEST_CACHE_STATUS_1", ++ .desc = "Request Cache Status 1", ++ .code = 0x1eb, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_request_cache_status_1), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_request_cache_status_1, ++}, ++{ .name = "MEMORY_CONTROLLER_REQUESTS", ++ .desc = "Memory Controller Requests", ++ .code = 0x1f0, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_requests), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_memory_controller_requests, ++}, ++{ .name = "READ_REQUEST_TO_L3_CACHE", ++ .desc = "Read Request to L3 Cache", ++ .code = 0x4e0, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_read_request_to_l3_cache, ++}, ++{ .name = "L3_CACHE_MISSES", ++ .desc = "L3 Cache Misses", ++ .code = 0x4e1, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_read_request_to_l3_cache, ++}, ++{ .name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", ++ .desc = "L3 Fills caused by L2 Evictions", ++ .code = 0x4e2, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_fills_caused_by_l2_evictions), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_l3_fills_caused_by_l2_evictions, ++}, ++{ .name = "L3_EVICTIONS", ++ .desc = "L3 Evictions", ++ .code = 0x4e3, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_evictions), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_l3_evictions, ++}, ++{ .name = "NON_CANCELED_L3_READ_REQUESTS", ++ .desc = "Non-canceled L3 Read Requests", ++ .code = 0x4ed, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), ++ .ngrp = 2, ++ .umasks = amd64_fam15h_nb_read_request_to_l3_cache, ++}, ++{ .name = "L3_LATENCY", ++ .desc = "L3 Latency", ++ .code = 0x4ef, ++ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_latency), ++ .ngrp = 1, ++ .umasks = amd64_fam15h_nb_l3_latency, ++}, ++}; +-- +1.8.3.1 + +From ec046652845877d46cc8c62d86f47325380fbaa1 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Thu, 12 Dec 2013 22:46:19 +0100 +Subject: [PATCH 12/14] fix typos in IVB event descriptions + +Signed-off-by: Andreas Beckmann +--- + lib/events/intel_ivb_events.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h +index 3c5583e..407059b 100644 +--- a/lib/events/intel_ivb_events.h ++++ b/lib/events/intel_ivb_events.h +@@ -1173,7 +1173,7 @@ static const intel_x86_umask_t ivb_other_assists[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "AVX_STORE", +- .udesc = "Number of assists associated with 25-bit AVX stores", ++ .udesc = "Number of assists associated with 256-bit AVX stores", + .ucode = 0x0800, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -1277,7 +1277,7 @@ static const intel_x86_umask_t ivb_uops_dispatched_port[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3", +- .udesc = "Cycles in which a uop is disptached on port 3", ++ .udesc = "Cycles in which a uop is dispatched on port 3", + .ucode = 0x3000, + .uflags= INTEL_X86_NCOMBO, + }, +-- +1.8.3.1 + +From a2eb1c8257b07ebc2f5e7a1ace8d005d0a7a08f0 Mon Sep 17 00:00:00 2001 +From: Steve Kaufmann +Date: Mon, 13 Jan 2014 14:19:16 +0100 +Subject: [PATCH 13/14] fix spelling mistakes in event descriptions + +Applied spell-checker on event descriptions. + +Signed-off-by: Steve Kaufmann +--- + lib/events/amd64_events_fam14h.h | 4 ++-- + lib/events/amd64_events_fam15h.h | 2 +- + lib/events/amd64_events_fam15h_nb.h | 2 +- + lib/events/arm_1176_events.h | 2 +- + lib/events/arm_cortex_a8_events.h | 4 ++-- + lib/events/arm_cortex_a9_events.h | 2 +- + lib/events/intel_atom_events.h | 2 +- + lib/events/intel_coreduo_events.h | 6 +++--- + lib/events/intel_hsw_events.h | 10 +++++----- + lib/events/intel_ivb_events.h | 10 +++++----- + lib/events/intel_netburst_events.h | 4 ++-- + lib/events/intel_nhm_events.h | 12 ++++++------ + lib/events/intel_nhm_unc_events.h | 8 ++++---- + lib/events/intel_p6_events.h | 4 ++-- + lib/events/intel_pii_events.h | 4 ++-- + lib/events/intel_pm_events.h | 4 ++-- + lib/events/intel_ppro_events.h | 4 ++-- + lib/events/intel_slm_events.h | 8 ++++---- + lib/events/intel_snb_events.h | 14 +++++++------- + lib/events/intel_snbep_events.h | 14 +++++++------- + lib/events/intel_snbep_unc_cbo_events.h | 4 ++-- + lib/events/intel_wsm_events.h | 4 ++-- + lib/events/intel_wsm_unc_events.h | 10 +++++----- + lib/events/intel_x86_arch_events.h | 2 +- + lib/events/mips_74k_events.h | 6 +++--- + lib/events/sparc_ultra3_events.h | 8 ++++---- + lib/events/sparc_ultra3i_events.h | 10 +++++----- + lib/events/sparc_ultra3plus_events.h | 10 +++++----- + lib/events/sparc_ultra4plus_events.h | 12 ++++++------ + 29 files changed, 93 insertions(+), 93 deletions(-) + +diff --git a/lib/events/amd64_events_fam14h.h b/lib/events/amd64_events_fam14h.h +index e975521..0cf11a9 100644 +--- a/lib/events/amd64_events_fam14h.h ++++ b/lib/events/amd64_events_fam14h.h +@@ -121,7 +121,7 @@ static const amd64_umask_t amd64_fam14h_retired_serializing_ops[]={ + + static const amd64_umask_t amd64_fam14h_retired_x87_fpu_ops[]={ + { .uname = "ADD_SUB_OPS", +- .udesc = "Add/substract ops", ++ .udesc = "Add/subtract ops", + .ucode = 0x1, + }, + { .uname = "MULT_OPS", +@@ -1180,7 +1180,7 @@ static const amd64_entry_t amd64_fam14h_pe[]={ + .umasks = amd64_fam14h_l1_dtlb_hit, + }, + { .name = "DCACHE_SW_PREFETCHES", +- .desc = "Number of software prefetches that do not cuase an actual data cache refill", ++ .desc = "Number of software prefetches that do not cause an actual data cache refill", + .modmsk = AMD64_FAM10H_ATTRS, + .code = 0x52, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam14h_dcache_sw_prefetches), +diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h +index 7872468..0b8c17b 100644 +--- a/lib/events/amd64_events_fam15h.h ++++ b/lib/events/amd64_events_fam15h.h +@@ -1708,7 +1708,7 @@ static const amd64_umask_t amd64_fam15h_read_request_to_l3_cache[]={ + .grpid = 0, + }, + { .uname = "PREFETCH", +- .udesc = "Count prefetches honly", ++ .udesc = "Count prefetches only", + .ucode = 0x8, + .grpid = 0, + }, +diff --git a/lib/events/amd64_events_fam15h_nb.h b/lib/events/amd64_events_fam15h_nb.h +index 5969eb6..82799f0 100644 +--- a/lib/events/amd64_events_fam15h_nb.h ++++ b/lib/events/amd64_events_fam15h_nb.h +@@ -1711,7 +1711,7 @@ static const amd64_umask_t amd64_fam15h_nb_read_request_to_l3_cache[]={ + .grpid = 0, + }, + { .uname = "PREFETCH", +- .udesc = "Count prefetches honly", ++ .udesc = "Count prefetches only", + .ucode = 0x8, + .grpid = 0, + }, +diff --git a/lib/events/arm_1176_events.h b/lib/events/arm_1176_events.h +index 35a43fa..d31d810 100644 +--- a/lib/events/arm_1176_events.h ++++ b/lib/events/arm_1176_events.h +@@ -121,7 +121,7 @@ static const arm_entry_t arm_1176_pe []={ + }, + {.name = "PROC_RET_EXEC_PRED", + .code = 0x25, +- .desc = "Proceudre return instruction executed and address predicted" ++ .desc = "Procedure return instruction executed and address predicted" + }, + {.name = "PROC_RET_EXEC_PRED_INCORRECT", + .code = 0x26, +diff --git a/lib/events/arm_cortex_a8_events.h b/lib/events/arm_cortex_a8_events.h +index 2b61dda..f5c99e5 100644 +--- a/lib/events/arm_cortex_a8_events.h ++++ b/lib/events/arm_cortex_a8_events.h +@@ -86,7 +86,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ + }, + {.name = "PC_IMM_BRANCH", + .code = 0x0d, +- .desc = "Immedidate branches architecturally executed" ++ .desc = "Immediate branches architecturally executed" + }, + {.name = "PC_PROC_RETURN", + .code = 0x0e, +@@ -194,7 +194,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ + }, + {.name = "OP_EXECUTED", + .code = 0x55, +- .desc = "Operations excuted (includes sub-ops in multi-cycle instructions)" ++ .desc = "Operations executed (includes sub-ops in multi-cycle instructions)" + }, + {.name = "CYCLES_INST_STALL", + .code = 0x56, +diff --git a/lib/events/arm_cortex_a9_events.h b/lib/events/arm_cortex_a9_events.h +index c034bd3..ef5b337 100644 +--- a/lib/events/arm_cortex_a9_events.h ++++ b/lib/events/arm_cortex_a9_events.h +@@ -86,7 +86,7 @@ static const arm_entry_t arm_cortex_a9_pe []={ + }, + {.name = "PC_IMM_BRANCH", + .code = 0x0d, +- .desc = "Immedidate branches architecturally executed" ++ .desc = "Immediate branches architecturally executed" + }, + {.name = "UNALIGNED_ACCESS", + .code = 0x0f, +diff --git a/lib/events/intel_atom_events.h b/lib/events/intel_atom_events.h +index 0b72ad3..3b564be 100644 +--- a/lib/events/intel_atom_events.h ++++ b/lib/events/intel_atom_events.h +@@ -85,7 +85,7 @@ static const intel_x86_umask_t atom_icache[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "MISSES", +- .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", ++ .udesc = "Count all instructions fetches that miss the icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", + .ucode = 0x200, + }, + }; +diff --git a/lib/events/intel_coreduo_events.h b/lib/events/intel_coreduo_events.h +index 16336f9..9e0cb5e 100644 +--- a/lib/events/intel_coreduo_events.h ++++ b/lib/events/intel_coreduo_events.h +@@ -315,12 +315,12 @@ static const intel_x86_umask_t coreduo_sse_instructions_retired[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PACKED_DOUBLE", +- .udesc = "Number of SSE/SSE2 packed double percision instructions retired", ++ .udesc = "Number of SSE/SSE2 packed double precision instructions retired", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "DOUBLE", +- .udesc = "Number of SSE/SSE2 scalar double percision instructions retired", ++ .udesc = "Number of SSE/SSE2 scalar double precision instructions retired", + .ucode = 0x300, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -579,7 +579,7 @@ static const intel_x86_entry_t intel_coreduo_pe[]={ + .umasks = coreduo_l2_lines_in, /* identical to actual umasks list for this event */ + }, + { .name = "L2_IFETCH", +- .desc = "L2 instruction fetches from nstruction fetch unit (includes speculative fetches) ", ++ .desc = "L2 instruction fetches from instruction fetch unit (includes speculative fetches) ", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x28, +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index ccd4a2b..0491fdd 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -26,7 +26,7 @@ + + static const intel_x86_umask_t hsw_baclears[]={ + { .uname = "ANY", +- .udesc = "NUmber of front-end re-steers due to BPU misprediction", ++ .udesc = "Number of front-end re-steers due to BPU misprediction", + .ucode = 0x1f00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1511,7 +1511,7 @@ static const intel_x86_umask_t hsw_hle_retired[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORTED_MISC4", +- .udesc = "Number of times an HLE execution aborted due to incomptaible memory type", ++ .udesc = "Number of times an HLE execution aborted due to incompatible memory type", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, +@@ -1554,7 +1554,7 @@ static const intel_x86_umask_t hsw_rtm_retired[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORTED_MISC4", +- .udesc = "Number of times an RTM execution aborted due to incomptaible memory type", ++ .udesc = "Number of times an RTM execution aborted due to incompatible memory type", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, +@@ -1779,7 +1779,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ + .code = 0xc5, + }, + { .name = "BACLEARS", +- .desc = "Branch resteered", ++ .desc = "Branch re-steered", + .code = 0xe6, + .cntmsk = 0xff, + .ngrp = 1, +@@ -1934,7 +1934,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ + .umasks = hsw_inst_retired + }, + { .name = "INT_MISC", +- .desc = "Miscelleanous interruptions", ++ .desc = "Miscellaneous interruptions", + .code = 0xd, + .cntmsk = 0xff, + .ngrp = 1, +diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h +index 407059b..28e0216 100644 +--- a/lib/events/intel_ivb_events.h ++++ b/lib/events/intel_ivb_events.h +@@ -510,7 +510,7 @@ static const intel_x86_umask_t ivb_idq[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "MS_DSB_UOPS_OCCUR", +- .udesc = "Occurences of DSB MS going active", ++ .udesc = "Occurrences of DSB MS going active", + .uequiv = "MS_DSB_UOPS:c=1:e=1", + .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), + .uflags= INTEL_X86_NCOMBO, +@@ -861,7 +861,7 @@ static const intel_x86_umask_t ivb_ld_blocks[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "NO_SR", +- .udesc = "Number of times that split load operations are temporarily blocked because all resources for handlding the split accesses are in use", ++ .udesc = "Number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + .ucode = 0x800, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -1553,7 +1553,7 @@ static const intel_x86_umask_t ivb_offcore_response[]={ + + static const intel_x86_umask_t ivb_baclears[]={ + { .uname = "ANY", +- .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", ++ .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .ucode = 0x1f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1663,7 +1663,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .umasks = ivb_arith, + }, + { .name = "BACLEARS", +- .desc = "Branch resteered", ++ .desc = "Branch re-steered", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xe6, +@@ -1909,7 +1909,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .umasks = ivb_l2_l1d_wb_rqsts, + }, + { .name = "L2_LINES_IN", +- .desc = "L2 lines alloacated", ++ .desc = "L2 lines allocated", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xf1, +diff --git a/lib/events/intel_netburst_events.h b/lib/events/intel_netburst_events.h +index e24f22f..bf08b02 100644 +--- a/lib/events/intel_netburst_events.h ++++ b/lib/events/intel_netburst_events.h +@@ -1495,7 +1495,7 @@ static const netburst_entry_t netburst_events[] = { + + /* 44 */ + {.name = "machine_clear", +- .desc = "Number of occurances when the entire " ++ .desc = "Number of occurrences when the entire " + "pipeline of the machine is cleared", + .event_select = 0x2, + .escr_select = 0x5, +@@ -1506,7 +1506,7 @@ static const netburst_entry_t netburst_events[] = { + .desc = "Counts for a portion of the many cycles while the " + "machine is cleared for any cause. Use edge-" + "triggering for this bit only to get a count of " +- "occurances versus a duration", ++ "occurrences versus a duration", + .bit = 0, + }, + {.name = "MOCLEAR", +diff --git a/lib/events/intel_nhm_events.h b/lib/events/intel_nhm_events.h +index e47068f..265002d 100644 +--- a/lib/events/intel_nhm_events.h ++++ b/lib/events/intel_nhm_events.h +@@ -61,7 +61,7 @@ static const intel_x86_umask_t nhm_baclear[]={ + + static const intel_x86_umask_t nhm_bpu_clears[]={ + { .uname = "EARLY", +- .udesc = "Early Branch Prediciton Unit clears", ++ .udesc = "Early Branch Prediction Unit clears", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -332,7 +332,7 @@ static const intel_x86_umask_t nhm_fp_assist[]={ + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, + }, + { .uname = "INPUT", +- .udesc = "Floating poiint assists for invalid input value (Precise Event)", ++ .udesc = "Floating point assists for invalid input value (Precise Event)", + .ucode = 0x400, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, +@@ -740,7 +740,7 @@ static const intel_x86_umask_t nhm_l2_hw_prefetch[]={ + + static const intel_x86_umask_t nhm_l2_lines_in[]={ + { .uname = "ANY", +- .udesc = "L2 lines alloacated", ++ .udesc = "L2 lines allocated", + .ucode = 0x700, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1976,7 +1976,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ + .umasks = nhm_fp_assist, + }, + { .name = "FP_COMP_OPS_EXE", +- .desc = "Floating poing computational micro-ops", ++ .desc = "Floating point computational micro-ops", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xf, + .code = 0x10, +@@ -2179,7 +2179,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ + .umasks = nhm_l2_hw_prefetch, + }, + { .name = "L2_LINES_IN", +- .desc = "L2 lines alloacated", ++ .desc = "L2 lines allocated", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xf, + .code = 0xf1, +@@ -2348,7 +2348,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ + .code = 0x1b2, + }, + { .name = "PARTIAL_ADDRESS_ALIAS", +- .desc = "False dependencies due to partial address froming", ++ .desc = "False dependencies due to partial address forming", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xf, + .code = 0x107, +diff --git a/lib/events/intel_nhm_unc_events.h b/lib/events/intel_nhm_unc_events.h +index 513a730..6421a6b 100644 +--- a/lib/events/intel_nhm_unc_events.h ++++ b/lib/events/intel_nhm_unc_events.h +@@ -29,15 +29,15 @@ + + static const intel_x86_umask_t nhm_unc_unc_dram_open[]={ + { .uname = "CH0", +- .udesc = "DRAM Channel 0 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 0 open commands issued for read or write", + .ucode = 0x100, + }, + { .uname = "CH1", +- .udesc = "DRAM Channel 1 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 1 open commands issued for read or write", + .ucode = 0x200, + }, + { .uname = "CH2", +- .udesc = "DRAM Channel 2 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 2 open commands issued for read or write", + .ucode = 0x400, + }, + }; +@@ -878,7 +878,7 @@ static const intel_x86_entry_t intel_nhm_unc_pe[]={ + .flags = INTEL_X86_FIXED, + }, + { .name = "UNC_DRAM_OPEN", +- .desc = "DRAM open comamnds issued for read or write", ++ .desc = "DRAM open commands issued for read or write", + .modmsk = NHM_UNC_ATTRS, + .cntmsk = 0x1fe00000, + .code = 0x60, +diff --git a/lib/events/intel_p6_events.h b/lib/events/intel_p6_events.h +index ba9512d..f8a83a8 100644 +--- a/lib/events/intel_p6_events.h ++++ b/lib/events/intel_p6_events.h +@@ -168,7 +168,7 @@ static const intel_x86_entry_t intel_p6_pe[]={ + .code = 0xc0, + }, + { .name = "DATA_MEM_REFS", +- .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", ++ .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x43, +@@ -237,7 +237,7 @@ static const intel_x86_entry_t intel_p6_pe[]={ + .umasks = p6_l2_ifetch, + }, + { .name = "L2_ST", +- .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", ++ .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x2a, +diff --git a/lib/events/intel_pii_events.h b/lib/events/intel_pii_events.h +index 4bff1ec..5846c64 100644 +--- a/lib/events/intel_pii_events.h ++++ b/lib/events/intel_pii_events.h +@@ -132,7 +132,7 @@ static const intel_x86_entry_t intel_pii_pe[]={ + .code = 0xc0, + }, + { .name = "DATA_MEM_REFS", +- .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", ++ .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x43, +@@ -201,7 +201,7 @@ static const intel_x86_entry_t intel_pii_pe[]={ + .umasks = pii_l2_ifetch, + }, + { .name = "L2_ST", +- .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", ++ .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x2a, +diff --git a/lib/events/intel_pm_events.h b/lib/events/intel_pm_events.h +index 4fa8795..e32a883 100644 +--- a/lib/events/intel_pm_events.h ++++ b/lib/events/intel_pm_events.h +@@ -235,7 +235,7 @@ static const intel_x86_entry_t intel_pm_pe[]={ + .code = 0xc0, + }, + { .name = "DATA_MEM_REFS", +- .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", ++ .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x43, +@@ -304,7 +304,7 @@ static const intel_x86_entry_t intel_pm_pe[]={ + .umasks = pm_l2_ifetch, + }, + { .name = "L2_ST", +- .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", ++ .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x2a, +diff --git a/lib/events/intel_ppro_events.h b/lib/events/intel_ppro_events.h +index 63ad7b7..0555652 100644 +--- a/lib/events/intel_ppro_events.h ++++ b/lib/events/intel_ppro_events.h +@@ -73,7 +73,7 @@ static const intel_x86_entry_t intel_ppro_pe[]={ + .code = 0xc0, + }, + { .name = "DATA_MEM_REFS", +- .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", ++ .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x43, +@@ -142,7 +142,7 @@ static const intel_x86_entry_t intel_ppro_pe[]={ + .umasks = ppro_l2_ifetch, + }, + { .name = "L2_ST", +- .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", ++ .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .modmsk = INTEL_X86_ATTRS, + .cntmsk = 0x3, + .code = 0x2a, +diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h +index c540e64..558dbf8 100644 +--- a/lib/events/intel_slm_events.h ++++ b/lib/events/intel_slm_events.h +@@ -32,7 +32,7 @@ static const intel_x86_umask_t slm_icache[]={ + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "MISSES", +- .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", ++ .udesc = "Count all instructions fetches that miss the icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -97,7 +97,7 @@ static const intel_x86_umask_t slm_inst_retired[]={ + + static const intel_x86_umask_t slm_l2_reject_xq[]={ + { .uname = "ALL", +- .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indictes back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", ++ .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", + .ucode = 0x000, + .uflags= INTEL_X86_DFL, + }, +@@ -345,7 +345,7 @@ static const intel_x86_umask_t slm_rehabq[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "STA_FULL", +- .udesc = "Number of retired stores that are delayed becuase there is not a store address buffer available", ++ .udesc = "Number of retired stores that are delayed because there is not a store address buffer available", + .ucode = 0x2000, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -625,7 +625,7 @@ static const intel_x86_umask_t slm_ms_decoded[]={ + + static const intel_x86_umask_t slm_decode_restriction[]={ + { .uname = "PREDECODE_WRONG", +- .udesc = "Number of times the prediction (from the predecode cache) for intruction length is incorrect", ++ .udesc = "Number of times the prediction (from the predecode cache) for instruction length is incorrect", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +diff --git a/lib/events/intel_snb_events.h b/lib/events/intel_snb_events.h +index 68e8d2d..11937ea 100644 +--- a/lib/events/intel_snb_events.h ++++ b/lib/events/intel_snb_events.h +@@ -571,7 +571,7 @@ static const intel_x86_umask_t snb_idq[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "MS_DSB_UOPS_OCCUR", +- .udesc = "Occurences of DSB MS going active", ++ .udesc = "Occurrences of DSB MS going active", + .uequiv = "MS_DSB_UOPS:c=1:e=1", + .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), + .uflags= INTEL_X86_NCOMBO, +@@ -1408,17 +1408,17 @@ static const intel_x86_umask_t snb_uops_dispatched_port[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3_LD", +- .udesc = "Cycles in which a load uop is disptached on port 3", ++ .udesc = "Cycles in which a load uop is dispatched on port 3", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3_STA", +- .udesc = "Cycles in which a store uop is disptached on port 3", ++ .udesc = "Cycles in which a store uop is dispatched on port 3", + .ucode = 0x2000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3", +- .udesc = "Cycles in which a uop is disptached on port 3", ++ .udesc = "Cycles in which a uop is dispatched on port 3", + .ucode = 0x3000, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -1683,7 +1683,7 @@ static const intel_x86_umask_t snb_offcore_response[]={ + + static const intel_x86_umask_t snb_baclears[]={ + { .uname = "ANY", +- .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", ++ .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .ucode = 0x1f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1742,7 +1742,7 @@ static const intel_x86_entry_t intel_snb_pe[]={ + .umasks = snb_arith, + }, + { .name = "BACLEARS", +- .desc = "Branch resteered", ++ .desc = "Branch re-steered", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xe6, +@@ -2033,7 +2033,7 @@ static const intel_x86_entry_t intel_snb_pe[]={ + .umasks = snb_l2_l1d_wb_rqsts, + }, + { .name = "L2_LINES_IN", +- .desc = "L2 lines alloacated", ++ .desc = "L2 lines allocated", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xf1, +diff --git a/lib/events/intel_snbep_events.h b/lib/events/intel_snbep_events.h +index 0df2b5c..17b51c6 100644 +--- a/lib/events/intel_snbep_events.h ++++ b/lib/events/intel_snbep_events.h +@@ -571,7 +571,7 @@ static const intel_x86_umask_t snbep_idq[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "MS_DSB_UOPS_OCCUR", +- .udesc = "Occurences of DSB MS going active", ++ .udesc = "Occurrences of DSB MS going active", + .uequiv = "MS_DSB_UOPS:c=1:e=1", + .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), + .uflags= INTEL_X86_NCOMBO, +@@ -1421,17 +1421,17 @@ static const intel_x86_umask_t snbep_uops_dispatched_port[]={ + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3_LD", +- .udesc = "Cycles in which a load uop is disptached on port 3", ++ .udesc = "Cycles in which a load uop is dispatched on port 3", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3_STA", +- .udesc = "Cycles in which a store uop is disptached on port 3", ++ .udesc = "Cycles in which a store uop is dispatched on port 3", + .ucode = 0x2000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PORT_3", +- .udesc = "Cycles in which a uop is disptached on port 3", ++ .udesc = "Cycles in which a uop is dispatched on port 3", + .ucode = 0x3000, + .uflags= INTEL_X86_NCOMBO, + }, +@@ -1695,7 +1695,7 @@ static const intel_x86_umask_t snbep_offcore_response[]={ + + static const intel_x86_umask_t snbep_baclears[]={ + { .uname = "ANY", +- .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", ++ .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .ucode = 0x1f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1754,7 +1754,7 @@ static const intel_x86_entry_t intel_snbep_pe[]={ + .umasks = snbep_arith, + }, + { .name = "BACLEARS", +- .desc = "Branch resteered", ++ .desc = "Branch re-steered", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xe6, +@@ -2045,7 +2045,7 @@ static const intel_x86_entry_t intel_snbep_pe[]={ + .umasks = snbep_l2_l1d_wb_rqsts, + }, + { .name = "L2_LINES_IN", +- .desc = "L2 lines alloacated", ++ .desc = "L2 lines allocated", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xf1, +diff --git a/lib/events/intel_snbep_unc_cbo_events.h b/lib/events/intel_snbep_unc_cbo_events.h +index 125c5ad..b2f0878 100644 +--- a/lib/events/intel_snbep_unc_cbo_events.h ++++ b/lib/events/intel_snbep_unc_cbo_events.h +@@ -641,7 +641,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ + .umasks = snbep_unc_c_llc_victims, + }, + { .name = "UNC_C_MISC", +- .desc = "Miscelleanous C-Box events", ++ .desc = "Miscellaneous C-Box events", + .modmsk = SNBEP_UNC_CBO_ATTRS, + .cntmsk = 0x3, + .code = 0x39, +@@ -659,7 +659,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ + .umasks = snbep_unc_c_ring_ad_used, + }, + { .name = "UNC_C_RING_AK_USED", +- .desc = "Acknowledgement ring in use. Counts number of cycles ring is being used at this ring stop", ++ .desc = "Acknowledgment ring in use. Counts number of cycles ring is being used at this ring stop", + .modmsk = SNBEP_UNC_CBO_ATTRS, + .cntmsk = 0xc, + .code = 0x1c, +diff --git a/lib/events/intel_wsm_events.h b/lib/events/intel_wsm_events.h +index 07dffb0..d6f59cb 100644 +--- a/lib/events/intel_wsm_events.h ++++ b/lib/events/intel_wsm_events.h +@@ -299,7 +299,7 @@ static const intel_x86_umask_t wsm_dtlb_load_misses[]={ + + static const intel_x86_umask_t wsm_l2_lines_in[]={ + { .uname = "ANY", +- .udesc = "L2 lines alloacated", ++ .udesc = "L2 lines allocated", + .ucode = 0x700, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +@@ -1993,7 +1993,7 @@ static const intel_x86_entry_t intel_wsm_pe[]={ + .umasks = wsm_dtlb_load_misses, + }, + { .name = "L2_LINES_IN", +- .desc = "L2 lines alloacated", ++ .desc = "L2 lines allocated", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xf, + .code = 0xf1, +diff --git a/lib/events/intel_wsm_unc_events.h b/lib/events/intel_wsm_unc_events.h +index fa76e26..66fd961 100644 +--- a/lib/events/intel_wsm_unc_events.h ++++ b/lib/events/intel_wsm_unc_events.h +@@ -29,15 +29,15 @@ + + static const intel_x86_umask_t wsm_unc_unc_dram_open[]={ + { .uname = "CH0", +- .udesc = "DRAM Channel 0 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 0 open commands issued for read or write", + .ucode = 0x100, + }, + { .uname = "CH1", +- .udesc = "DRAM Channel 1 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 1 open commands issued for read or write", + .ucode = 0x200, + }, + { .uname = "CH2", +- .udesc = "DRAM Channel 2 open comamnds issued for read or write", ++ .udesc = "DRAM Channel 2 open commands issued for read or write", + .ucode = 0x400, + }, + }; +@@ -925,7 +925,7 @@ static const intel_x86_entry_t intel_wsm_unc_pe[]={ + .flags = INTEL_X86_FIXED, + }, + { .name = "UNC_DRAM_OPEN", +- .desc = "DRAM open comamnds issued for read or write", ++ .desc = "DRAM open commands issued for read or write", + .modmsk = NHM_UNC_ATTRS, + .cntmsk = 0x1fe00000, + .code = 0x60, +@@ -1334,7 +1334,7 @@ static const intel_x86_entry_t intel_wsm_unc_pe[]={ + .umasks = wsm_unc_unc_thermal_throttling_temp, /* identical to actual umasks list for this event */ + }, + { .name = "UNC_PROCHOT_ASSERTION", +- .desc = "Number of system ssertions of PROCHOT indicating the entire processor has exceeded the thermal limit", ++ .desc = "Number of system assertions of PROCHOT indicating the entire processor has exceeded the thermal limit", + .modmsk = NHM_UNC_ATTRS, + .cntmsk = 0x1fe00000, + .code = 0x82, +diff --git a/lib/events/intel_x86_arch_events.h b/lib/events/intel_x86_arch_events.h +index 0b9ff6a..c8c862b 100644 +--- a/lib/events/intel_x86_arch_events.h ++++ b/lib/events/intel_x86_arch_events.h +@@ -42,7 +42,7 @@ static intel_x86_entry_t intel_x86_arch_pe[]={ + {.name = "UNHALTED_REFERENCE_CYCLES", + .code = 0x013c, + .cntmsk = 0x400000000ull, /* temporary */ +- .desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core freqeuncy changes due to performance state transitions", ++ .desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core frequency changes due to performance state transitions", + }, + {.name = "LLC_REFERENCES", + .code = 0x4f2e, +diff --git a/lib/events/mips_74k_events.h b/lib/events/mips_74k_events.h +index 399da6e..523627b 100644 +--- a/lib/events/mips_74k_events.h ++++ b/lib/events/mips_74k_events.h +@@ -224,7 +224,7 @@ static const mips_entry_t mips_74k_pe []={ + { + .name = "DCACHE_MISSES", + .code = 0x98, +- .desc = "D-cache misses. This count is per instruction at grad- uation and includes load, store, prefetch, synci and address based cacheops", ++ .desc = "D-cache misses. This count is per instruction at graduation and includes load, store, prefetch, synci and address based cacheops", + }, + { + .name = "JTLB_DATA_ACCESSES", +@@ -244,7 +244,7 @@ static const mips_entry_t mips_74k_pe []={ + { + .name = "DCACHE_VTAG_MISMATCH", + .code = 0x9a, +- .desc = "The 74K core's D-cache has an auxiliary virtual tag, used to pick the right line early. When (occasionally) the physical tag match and virtual tag match do not line up, it is treated as a cache miss - in processing the miss the virtual tag is correcyed for future accesses. This event counts those bogus misses", ++ .desc = "The 74K core's D-cache has an auxiliary virtual tag, used to pick the right line early. When (occasionally) the physical tag match and virtual tag match do not line up, it is treated as a cache miss - in processing the miss the virtual tag is corrected for future accesses. This event counts those bogus misses", + }, + { + .name = "L2_CACHE_WRITEBACKS", +@@ -349,7 +349,7 @@ static const mips_entry_t mips_74k_pe []={ + { + .name = "INTEGER_INSNS", + .code = 0x28, +- .desc = "Integer instructions graduated (includes nop, ssnop, ehb as well as all arithmetic, locial, shift and extract type operations)", ++ .desc = "Integer instructions graduated (includes nop, ssnop, ehb as well as all arithmetic, logical, shift and extract type operations)", + }, + { + .name = "FPU_INSNS", +diff --git a/lib/events/sparc_ultra3_events.h b/lib/events/sparc_ultra3_events.h +index a074e3a..c19097d 100644 +--- a/lib/events/sparc_ultra3_events.h ++++ b/lib/events/sparc_ultra3_events.h +@@ -20,7 +20,7 @@ static const sparc_entry_t ultra3_pe[] = { + }, + { + .name = "IC_ref", +- .desc = "I-cache refrences", ++ .desc = "I-cache references", + .ctrl = PME_CTRL_S0, + .code = 0x8, + }, +@@ -84,13 +84,13 @@ static const sparc_entry_t ultra3_pe[] = { + }, + { + .name = "Rstall_storeQ", +- .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", ++ .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", + .ctrl = PME_CTRL_S0, + .code = 0x5, + }, + { + .name = "Rstall_IU_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S0, + .code = 0x6, + }, +@@ -204,7 +204,7 @@ static const sparc_entry_t ultra3_pe[] = { + }, + { + .name = "Rstall_FP_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S1, + .code = 0xb, + }, +diff --git a/lib/events/sparc_ultra3i_events.h b/lib/events/sparc_ultra3i_events.h +index a8ce584..f81213e 100644 +--- a/lib/events/sparc_ultra3i_events.h ++++ b/lib/events/sparc_ultra3i_events.h +@@ -20,7 +20,7 @@ static const sparc_entry_t ultra3i_pe[] = { + }, + { + .name = "IC_ref", +- .desc = "I-cache refrences", ++ .desc = "I-cache references", + .ctrl = PME_CTRL_S0, + .code = 0x8, + }, +@@ -84,13 +84,13 @@ static const sparc_entry_t ultra3i_pe[] = { + }, + { + .name = "Rstall_storeQ", +- .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", ++ .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", + .ctrl = PME_CTRL_S0, + .code = 0x5, + }, + { + .name = "Rstall_IU_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S0, + .code = 0x6, + }, +@@ -204,7 +204,7 @@ static const sparc_entry_t ultra3i_pe[] = { + }, + { + .name = "Rstall_FP_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S1, + .code = 0xb, + }, +@@ -383,7 +383,7 @@ static const sparc_entry_t ultra3i_pe[] = { + /* PIC1 events specific to UltraSPARC-III+/IIIi */ + { + .name = "Re_DC_missovhd", +- .desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", ++ .desc = "Used to measure D-cache stall counts separately for L2-cache hits and misses. This counter is used with the recirculation and cache access events to separately calculate the D-cache loads that hit and miss the L2-cache", + .ctrl = PME_CTRL_S1, + .code = 0x4, + }, +diff --git a/lib/events/sparc_ultra3plus_events.h b/lib/events/sparc_ultra3plus_events.h +index 04de4a6..b1dc8ca 100644 +--- a/lib/events/sparc_ultra3plus_events.h ++++ b/lib/events/sparc_ultra3plus_events.h +@@ -20,7 +20,7 @@ static const sparc_entry_t ultra3plus_pe[] = { + }, + { + .name = "IC_ref", +- .desc = "I-cache refrences", ++ .desc = "I-cache references", + .ctrl = PME_CTRL_S0, + .code = 0x8, + }, +@@ -84,13 +84,13 @@ static const sparc_entry_t ultra3plus_pe[] = { + }, + { + .name = "Rstall_storeQ", +- .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", ++ .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", + .ctrl = PME_CTRL_S0, + .code = 0x5, + }, + { + .name = "Rstall_IU_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S0, + .code = 0x6, + }, +@@ -204,7 +204,7 @@ static const sparc_entry_t ultra3plus_pe[] = { + }, + { + .name = "Rstall_FP_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S1, + .code = 0xb, + }, +@@ -421,7 +421,7 @@ static const sparc_entry_t ultra3plus_pe[] = { + /* PIC1 events specific to UltraSPARC-III+/IIIi processors */ + { + .name = "Re_DC_missovhd", +- .desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", ++ .desc = "Used to measure D-cache stall counts separately for L2-cache hits and misses. This counter is used with the recirculation and cache access events to separately calculate the D-cache loads that hit and miss the L2-cache", + .ctrl = PME_CTRL_S1, + .code = 0x4, + }, +diff --git a/lib/events/sparc_ultra4plus_events.h b/lib/events/sparc_ultra4plus_events.h +index 47512da..ca473a4 100644 +--- a/lib/events/sparc_ultra4plus_events.h ++++ b/lib/events/sparc_ultra4plus_events.h +@@ -32,13 +32,13 @@ static const sparc_entry_t ultra4plus_pe[] = { + }, + { + .name = "Rstall_storeQ", +- .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", ++ .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", + .ctrl = PME_CTRL_S0, + .code = 0x5, + }, + { + .name = "Rstall_IU_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S0, + .code = 0x6, + }, +@@ -50,7 +50,7 @@ static const sparc_entry_t ultra4plus_pe[] = { + }, + { + .name = "IC_ref", +- .desc = "I-cache refrences", ++ .desc = "I-cache references", + .ctrl = PME_CTRL_S0, + .code = 0x8, + }, +@@ -62,7 +62,7 @@ static const sparc_entry_t ultra4plus_pe[] = { + }, + { + .name = "Rstall_FP_use", +- .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", ++ .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", + .ctrl = PME_CTRL_S0, + .code = 0xa, + }, +@@ -179,7 +179,7 @@ static const sparc_entry_t ultra4plus_pe[] = { + }, + { + .name = "IPB_to_IC_fill", +- .desc = "I-cache filles from the instruction prefetch buffer", ++ .desc = "I-cache fills from the instruction prefetch buffer", + .ctrl = PME_CTRL_S0, + .code = 0x1e, + }, +@@ -577,7 +577,7 @@ static const sparc_entry_t ultra4plus_pe[] = { + }, + { + .name = "L3_miss", +- .desc = "Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (exclusing block stores) requests", ++ .desc = "Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (excluding block stores) requests", + .ctrl = PME_CTRL_S1, + .code = 0x31, + }, +-- +1.8.3.1 + +From e799ae1bafabe88d7a63787edd42953850cba676 Mon Sep 17 00:00:00 2001 +From: Steve Kaufmann +Date: Mon, 13 Jan 2014 14:21:09 +0100 +Subject: [PATCH 14/14] fix PMU name description typo in comment + +Signed-off-by: Steve Kaufmann +--- + lib/events/intel_snbep_events.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/events/intel_snbep_events.h b/lib/events/intel_snbep_events.h +index 17b51c6..a9d88f5 100644 +--- a/lib/events/intel_snbep_events.h ++++ b/lib/events/intel_snbep_events.h +@@ -24,7 +24,7 @@ + * + * This file has been automatically generated. + * +- * PMU: snb (Intel Sandy Bridge EP) ++ * PMU: snb_ep (Intel Sandy Bridge EP) + */ + + static const intel_x86_umask_t snbep_agu_bypass_cancel[]={ +-- +1.8.3.1 + diff --git a/SOURCES/libpfm-haswell.patch b/SOURCES/libpfm-haswell.patch new file mode 100644 index 0000000..cb0fd4d --- /dev/null +++ b/SOURCES/libpfm-haswell.patch @@ -0,0 +1,26 @@ +commit 4b0a11762f62f7763be31ade2d70ff0ae5f80fd4 +Author: Vince Weaver +Date: Fri Dec 6 10:17:18 2013 -0500 + + add missing Intel Haswell model numbers + + This patch adds support for a few more Haswell + models: 63, 69, 70, 71. + + Signed-off-by: Vince Weaver + +diff --git a/lib/pfmlib_intel_hsw.c b/lib/pfmlib_intel_hsw.c +index 7d01ff1..f4975f6 100644 +--- a/lib/pfmlib_intel_hsw.c ++++ b/lib/pfmlib_intel_hsw.c +@@ -40,6 +40,10 @@ pfm_hsw_detect(void *this) + + switch (pfm_intel_x86_cfg.model) { + case 60: /* Haswell */ ++ case 63: /* Haswell */ ++ case 69: /* Haswell */ ++ case 70: /* Haswell */ ++ case 71: /* Haswell */ + break; + default: + return PFM_ERR_NOTSUPP; diff --git a/SPECS/libpfm.spec b/SPECS/libpfm.spec index 2b1bf5a..7552556 100644 --- a/SPECS/libpfm.spec +++ b/SPECS/libpfm.spec @@ -10,7 +10,7 @@ Name: libpfm Version: 4.4.0 -Release: 2%{?dist} +Release: 6%{?dist} Summary: Library to encode performance events for use by perf tool @@ -19,6 +19,8 @@ License: MIT URL: http://perfmon2.sourceforge.net/ Source0: http://sourceforge.net/projects/perfmon2/files/libpfm4/%{name}-%{version}.tar.gz Patch1: libpfm-power8.patch +Patch10: libpfm-haswell.patch +Patch11: libpfm-events.patch %if %{with python} BuildRequires: python-devel @@ -64,6 +66,8 @@ Python bindings for libpfm4 and perf_event_open system call. %setup -q %patch1 -p1 +%patch10 -p1 +%patch11 -p1 %build %if %{with python} @@ -111,6 +115,18 @@ make \ %endif %changelog +* Fri Jan 24 2014 Daniel Mach - 4.4.0-6 +- Mass rebuild 2014-01-24 + +* Tue Jan 14 2014 William Cohen - 4.4.0-5 +- Update event descriptions. + +* Mon Jan 13 2014 William Cohen - 4.4.0-4 +- Add Haswell model numbers. + +* Fri Dec 27 2013 Daniel Mach - 4.4.0-3 +- Mass rebuild 2013-12-27 + * Fri Jul 19 2013 William Cohen 4.4.0-2 - Add IBM power 8 support.