commit 1f169c82d7e788f3a7096b212fa33d26c8155a85 Author: Stephane Eranian Date: Wed Jun 19 08:42:20 2013 +0200 fix event name typo for CPU_IO_REQUESTS_TO_MEMORY_IO Reported by Steve Kaufmann. Signed-off-by: Stephane Eranian diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h index 7195f13..5738e4c 100644 --- a/lib/events/amd64_events_fam15h.h +++ b/lib/events/amd64_events_fam15h.h @@ -2277,7 +2277,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ .ngrp = 1, .umasks = amd64_fam15h_thermal_status, }, -{ .name = "CPU_0O_REQUESTS_TO_MEMORY_IO", +{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", .desc = "CPU/IO Requests to Memory/IO", .code = 0xe9, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_io_requests_to_memory_io), commit 0f7cd3b77060def8a91218819493effe276350c8 Author: Stephane Eranian Date: Mon Jul 1 08:02:09 2013 +0200 fix event code for Intel Haswell LSD event Was 0xa0 instead of 0xa8 Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index bc0549e..e00291e 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -2228,7 +2228,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ }, { .name = "LSD", .desc = "Loop stream detector", - .code = 0xa0, + .code = 0xa8, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, commit 7d74c8db594447b7235daf3a54154b9a9f17da0d Author: Stephane Eranian Date: Mon Jul 1 08:32:37 2013 +0200 remove unsupported umask combo for HSW BR_MISP_EXEC and BR_INST_EXEC Some umask combinations were not supported Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index e00291e..edcc6bc 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -63,11 +63,6 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ .ucode = 0x9000, .uflags = INTEL_X86_NCOMBO, }, - { .uname = "TAKEN_INDIRECT_NEAR_CALL", - .udesc = "All taken indirect calls, including both register and memory indirect", - .ucode = 0xa000, - .uflags = INTEL_X86_NCOMBO, - }, { .uname = "ALL_CONDITIONAL", .udesc = "Speculative and retired macro-conditional branches", .ucode = 0xc100, @@ -93,9 +88,9 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ .ucode = 0xd000, .uflags = INTEL_X86_NCOMBO, }, - { .uname = "ANY_INDIRECT_NEAR_CALL", + { .uname = "TAKEN_INDIRECT_NEAR_CALL", .udesc = "All indirect calls, including both register and memory indirect", - .ucode = 0xe000, + .ucode = 0xa000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_BRANCHES", @@ -174,16 +169,6 @@ static const intel_x86_umask_t hsw_br_misp_exec[]={ .ucode = 0xc400, .uflags = INTEL_X86_NCOMBO, }, - { .uname = "ANY_RETURN_NEAR", - .udesc = "Speculative and retired mispredicted indirect branches with return mnemonic", - .ucode = 0xc800, - .uflags = INTEL_X86_NCOMBO, - }, - { .uname = "ANY_INDIRECT_NEAR_CALL", - .udesc = "All mispredicted indirect calls, including both register and memory indirect", - .ucode = 0xe000, - .uflags = INTEL_X86_NCOMBO, - }, { .uname = "ALL_BRANCHES", .udesc = "Speculative and retired mispredicted macro conditional branches", .ucode = 0xff00, commit b52f161160dc0ddb9dfcdd51e61b4a9171a293ce Author: Stephane Eranian Date: Mon Jul 1 09:34:12 2013 +0200 add Haswell UOPS_EXECUTED:STALL_CYCLES Handy alias to UOPS_EXECUTED:CORE:c=1:i Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index edcc6bc..d479862 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -1101,6 +1101,13 @@ static const intel_x86_umask_t hsw_uops_executed[]={ .ucode = 0x200, .uflags = INTEL_X86_DFL, }, + { .uname = "STALL_CYCLES", + .udesc = "Number of cycles with no uops executed", + .ucode = 0x200 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ + .uequiv = "CORE:c=1:i=1", + .uflags = INTEL_X86_NCOMBO, + .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, + }, }; static const intel_x86_umask_t hsw_uops_executed_port[]={ commit 55446fad9f3e2501ae5c249919870466faebb6d2 Author: Stephane Eranian Date: Wed Jul 31 10:32:33 2013 +0200 add os_detect() callback to pfmlib_pmu struct Add new callbacks to invoke an OS-specific callback to detect the PMU. Even though a PMU can be detected via a CPUID that does not mean that it is also supported by the host kernel. So proivide an opportunity for PMU support to invoke an OS specific detection routine. For instance, on Linux with perf_events, a PMU is supported by the kernel if it has an entry in /sys/devices/ Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index 71bddd9..e810eca 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -171,6 +171,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= #define PFMLIB_NUM_PMUS (int)(sizeof(pfmlib_pmus)/sizeof(pfmlib_pmu_t *)) static pfmlib_os_t pfmlib_os_none; +pfmlib_os_t *pfmlib_os = &pfmlib_os_none; static pfmlib_os_t *pfmlib_oses[]={ &pfmlib_os_none, @@ -611,6 +612,17 @@ pfmlib_init_pmus(void) if (ret != PFM_SUCCESS) continue; + /* + * check if exported by OS if needed + */ + if (p->os_detect[pfmlib_os->id]) { + ret = p->os_detect[pfmlib_os->id](p); + if (ret != PFM_SUCCESS) { + DPRINT("%s PMU not exported by OS\n", p->name); + continue; + } + } + ret = pfmlib_pmu_activate(p); if (ret == PFM_SUCCESS) nsuccess++; @@ -642,9 +654,13 @@ pfmlib_init_os(void) if (os->detect(os) != PFM_SUCCESS) continue; + if (os != &pfmlib_os_none && pfmlib_os == &pfmlib_os_none) + pfmlib_os = os; + DPRINT("OS layer %s activated\n", os->name); os->flags = PFMLIB_OS_FL_ACTIVATED; } + DPRINT("default OS layer: %s\n", pfmlib_os->name); } int @@ -667,11 +683,13 @@ pfm_initialize(void) pfmlib_init_env(); + /* must be done before pfmlib_init_pmus() */ + pfmlib_init_os(); + ret = pfmlib_init_pmus(); if (ret != PFM_SUCCESS) return ret; - pfmlib_init_os(); pfm_cfg.initdone = 1; diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 8e203d4..758b65d 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -128,6 +128,7 @@ typedef struct pfmlib_pmu { int (*get_event_encoding[PFM_OS_MAX])(void *this, pfmlib_event_desc_t *e); void (*validate_pattrs[PFM_OS_MAX])(void *this, pfmlib_event_desc_t *e); + int (*os_detect[PFM_OS_MAX])(void *this); int (*validate_table)(void *this, FILE *fp); int (*get_num_events)(void *this); /* optional */ void (*display_reg)(void *this, pfmlib_event_desc_t *e, void *val); /* optional */ @@ -286,6 +287,7 @@ extern pfmlib_pmu_t arm_1176_support; extern pfmlib_pmu_t mips_74k_support; extern pfmlib_pmu_t s390x_cpum_cf_support; +extern pfmlib_os_t *pfmlib_os; extern pfmlib_os_t pfmlib_os_perf; extern pfmlib_os_t pfmlib_os_perf_ext; @@ -352,6 +354,10 @@ pfmlib_pidx2idx(pfmlib_pmu_t *pmu, int pidx) #define PFMLIB_ENCODE_PERF(f) \ .get_event_encoding[PFM_OS_PERF_EVENT] = f, \ .get_event_encoding[PFM_OS_PERF_EVENT_EXT] = f + +#define PFMLIB_OS_DETECT(f) \ + .os_detect[PFM_OS_PERF_EVENT] = f, \ + .os_detect[PFM_OS_PERF_EVENT_EXT] = f #else #define PFMLIB_VALID_PERF_PATTRS(f) \ .validate_pattrs[PFM_OS_PERF_EVENT] = NULL, \ @@ -360,6 +366,10 @@ pfmlib_pidx2idx(pfmlib_pmu_t *pmu, int pidx) #define PFMLIB_ENCODE_PERF(f) \ .get_event_encoding[PFM_OS_PERF_EVENT] = NULL, \ .get_event_encoding[PFM_OS_PERF_EVENT_EXT] = NULL + +#define PFMLIB_OS_DETECT(f) \ + .os_detect[PFM_OS_PERF_EVENT] = NULL, \ + .os_detect[PFM_OS_PERF_EVENT_EXT] = NULL #endif #endif /* __PFMLIB_PRIV_H__ */ commit bc8e5737e8e95354aee90edfa29f9c2944cf0a1c Author: Stephane Eranian Date: Wed Jul 31 10:35:09 2013 +0200 Add Intel x86 perf_events OS detection function for PMU To be used with os_detect() callback. Useful for uncore PMUs because depending on the host hardware not all PMUs may necessarily be there, e.g., desktop vs. laptop. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_x86_perf_event.c b/lib/pfmlib_intel_x86_perf_event.c index 443a959..f29af94 100644 --- a/lib/pfmlib_intel_x86_perf_event.c +++ b/lib/pfmlib_intel_x86_perf_event.c @@ -276,3 +276,13 @@ pfm_intel_x86_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) } } } + +int +pfm_intel_x86_perf_detect(void *this) +{ + pfmlib_pmu_t *pmu = this; + char file[64]; + + snprintf(file,sizeof(file), "/sys/devices/%s", pmu->perf_name); + return access(file, R_OK|X_OK) ? PFM_ERR_NOTSUPP : PFM_SUCCESS; +} diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h index ce369bc..a0810cd 100644 --- a/lib/pfmlib_intel_x86_priv.h +++ b/lib/pfmlib_intel_x86_priv.h @@ -322,6 +322,7 @@ extern int pfm_intel_x86_get_event_attr_info(void *this, int idx, int attr_idx, extern int pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info); extern int pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e); extern int pfm_intel_x86_perf_event_encoding(pfmlib_event_desc_t *e, void *data); +extern int pfm_intel_x86_perf_detect(void *this); extern unsigned int pfm_intel_x86_get_event_nattrs(void *this, int pidx); extern int intel_x86_attr2mod(void *this, int pidx, int attr_idx); commit 0912bb54b9e24b06141f34b1d7efe29b826863eb Author: Stephane Eranian Date: Wed Jul 31 10:36:18 2013 +0200 add Intel IvyBridge uncore PMU os_detect() callback Use the os_detect() callback to detect which of the uncore PMUs are actually supported by host hardware. Not all PMUs are necessarily available, e.g., laptop vs. desktop. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_ivb_unc.c b/lib/pfmlib_intel_ivb_unc.c index bc94807..c2f2165 100644 --- a/lib/pfmlib_intel_ivb_unc.c +++ b/lib/pfmlib_intel_ivb_unc.c @@ -27,7 +27,7 @@ #define INTEL_SNB_UNC_ATTRS \ (_INTEL_X86_ATTR_I|_INTEL_X86_ATTR_E|_INTEL_X86_ATTR_C) -/* same event table and SNB */ +/* same event table as SNB */ #include "events/intel_snb_unc_events.h" static int @@ -68,6 +68,7 @@ pfmlib_pmu_t intel_ivb_unc_cbo##n##_support={ \ .pmu_detect = pfm_ivb_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), \ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ commit 4ddac8ecc7c6ebe283df8d45690735f76f17bf85 Author: Stephane Eranian Date: Wed Jul 31 10:39:03 2013 +0200 add Intel SandyBridge uncore PMU os_detect() callback Use the os_detect() callback to detect which of the uncore PMUs are actually supported by host hardware. Not all PMUs are necessarily available, e.g., laptop vs. desktop. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_snb_unc.c b/lib/pfmlib_intel_snb_unc.c index f4bfaba..b0be6e7 100644 --- a/lib/pfmlib_intel_snb_unc.c +++ b/lib/pfmlib_intel_snb_unc.c @@ -68,6 +68,7 @@ pfmlib_pmu_t intel_snb_unc_cbo##n##_support={ \ .pmu_detect = pfm_snb_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), \ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ commit 138ec47914922851256e1275e508d94d3ecf7956 Author: Stephane Eranian Date: Wed Jul 31 16:01:05 2013 +0200 fix modmsk for Intel Haswell CYCLE_ACTIVITY event Was marked as V3 when it is V4 Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index d479862..ccd4a2b 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -1855,7 +1855,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .code = 0xa3, .cntmsk = 0xf, .ngrp = 1, - .modmsk = INTEL_V3_ATTRS & ~_INTEL_X86_ATTR_C, + .modmsk = INTEL_V4_ATTRS & ~_INTEL_X86_ATTR_C, .numasks = LIBPFM_ARRAY_SIZE(hsw_cycle_activity), .umasks = hsw_cycle_activity }, commit e245899e77591e3dca793fd81c9f1add4eaebfd8 Author: Stephane Eranian Date: Wed Jul 31 16:02:02 2013 +0200 add Intel Haswell support for TSX event modifiers Add the following event modifiers on Intel Haswell: - intx: monitor only inside transactional memory region - intxcp: do not monitor inside aborted transactional memory region Also update the validation test accordingly. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c index f069317..435ce9e 100644 --- a/lib/pfmlib_intel_x86.c +++ b/lib/pfmlib_intel_x86.c @@ -40,6 +40,8 @@ const pfmlib_attr_desc_t intel_x86_mods[]={ PFM_ATTR_I("c", "counter-mask in range [0-255]"), /* counter-mask */ PFM_ATTR_B("t", "measure any thread"), /* monitor on both threads */ PFM_ATTR_I("ldlat", "load latency threshold (cycles, [3-65535])"), /* load latency threshold */ + PFM_ATTR_B("intx", "monitor only inside transactional memory region"), + PFM_ATTR_B("intxcp", "do not count occurrences inside aborted transactional memory region"), PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; @@ -490,7 +492,18 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) return PFM_ERR_ATTR_VAL; ldlat = ival; break; - + case INTEL_X86_ATTR_INTX: /* in_tx */ + if (modhw & _INTEL_X86_ATTR_INTX) + return PFM_ERR_ATTR_SET; + reg.sel_intx = !!ival; + umodmsk |= _INTEL_X86_ATTR_INTX; + break; + case INTEL_X86_ATTR_INTXCP: /* in_tx_cp */ + if (modhw & _INTEL_X86_ATTR_INTXCP) + return PFM_ERR_ATTR_SET; + reg.sel_intxcp = !!ival; + umodmsk |= _INTEL_X86_ATTR_INTXCP; + break; } } } @@ -627,6 +640,12 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) case INTEL_X86_ATTR_LDLAT: evt_strcat(e->fstr, ":%s=%d", intel_x86_mods[id].name, ldlat); break; + case INTEL_X86_ATTR_INTX: + evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intx); + break; + case INTEL_X86_ATTR_INTXCP: + evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intxcp); + break; } } return PFM_SUCCESS; diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h index a0810cd..1671ca6 100644 --- a/lib/pfmlib_intel_x86_priv.h +++ b/lib/pfmlib_intel_x86_priv.h @@ -101,7 +101,9 @@ typedef union pfm_intel_x86_reg { unsigned long sel_en:1; /* enable */ unsigned long sel_inv:1; /* invert counter mask */ unsigned long sel_cnt_mask:8; /* counter mask */ - unsigned long sel_res2:32; + unsigned long sel_intx:1; /* only in tx region */ + unsigned long sel_intxcp:1; /* excl. aborted tx region */ + unsigned long sel_res2:30; } perfevtsel; struct { @@ -149,14 +151,18 @@ typedef union pfm_intel_x86_reg { #define INTEL_X86_ATTR_C 4 /* counter mask */ #define INTEL_X86_ATTR_T 5 /* any thread */ #define INTEL_X86_ATTR_LDLAT 6 /* load latency threshold */ - -#define _INTEL_X86_ATTR_U (1 << INTEL_X86_ATTR_U) -#define _INTEL_X86_ATTR_K (1 << INTEL_X86_ATTR_K) -#define _INTEL_X86_ATTR_I (1 << INTEL_X86_ATTR_I) -#define _INTEL_X86_ATTR_E (1 << INTEL_X86_ATTR_E) -#define _INTEL_X86_ATTR_C (1 << INTEL_X86_ATTR_C) -#define _INTEL_X86_ATTR_T (1 << INTEL_X86_ATTR_T) -#define _INTEL_X86_ATTR_LDLAT (1 << INTEL_X86_ATTR_LDLAT) +#define INTEL_X86_ATTR_INTX 7 /* in transaction */ +#define INTEL_X86_ATTR_INTXCP 8 /* not aborted transaction */ + +#define _INTEL_X86_ATTR_U (1 << INTEL_X86_ATTR_U) +#define _INTEL_X86_ATTR_K (1 << INTEL_X86_ATTR_K) +#define _INTEL_X86_ATTR_I (1 << INTEL_X86_ATTR_I) +#define _INTEL_X86_ATTR_E (1 << INTEL_X86_ATTR_E) +#define _INTEL_X86_ATTR_C (1 << INTEL_X86_ATTR_C) +#define _INTEL_X86_ATTR_T (1 << INTEL_X86_ATTR_T) +#define _INTEL_X86_ATTR_INTX (1 << INTEL_X86_ATTR_INTX) +#define _INTEL_X86_ATTR_INTXCP (1 << INTEL_X86_ATTR_INTXCP) +#define _INTEL_X86_ATTR_LDLAT (1 << INTEL_X86_ATTR_LDLAT) #define INTEL_X86_ATTRS \ (_INTEL_X86_ATTR_I|_INTEL_X86_ATTR_E|_INTEL_X86_ATTR_C|_INTEL_X86_ATTR_U|_INTEL_X86_ATTR_K) @@ -166,7 +172,7 @@ typedef union pfm_intel_x86_reg { #define INTEL_FIXED2_ATTRS (_INTEL_X86_ATTR_U|_INTEL_X86_ATTR_K) #define INTEL_FIXED3_ATTRS (INTEL_FIXED2_ATTRS|_INTEL_X86_ATTR_T) #define INTEL_V3_ATTRS (INTEL_V2_ATTRS|_INTEL_X86_ATTR_T) -#define INTEL_V4_ATTRS (INTEL_V3_ATTRS) +#define INTEL_V4_ATTRS (INTEL_V3_ATTRS | _INTEL_X86_ATTR_INTX | _INTEL_X86_ATTR_INTXCP) /* let's define some handy shortcuts! */ #define sel_event_select perfevtsel.sel_event_select @@ -180,6 +186,8 @@ typedef union pfm_intel_x86_reg { #define sel_inv perfevtsel.sel_inv #define sel_cnt_mask perfevtsel.sel_cnt_mask #define sel_anythr perfevtsel.sel_anythr +#define sel_intx perfevtsel.sel_intx +#define sel_intxcp perfevtsel.sel_intxcp /* * shift relative to start of register diff --git a/tests/validate_x86.c b/tests/validate_x86.c index e3e2866..f351bc0 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1891,7 +1891,7 @@ static const test_event_t x86_test_events[]={ .count = 2, .codes[0] = 0x5101cd, .codes[1] = 3, - .fstr = "hsw::MEM_TRANS_RETIRED:LOAD_LATENCY:k=0:u=1:e=0:i=0:c=0:t=0:ldlat=3", + .fstr = "hsw::MEM_TRANS_RETIRED:LOAD_LATENCY:k=0:u=1:e=0:i=0:c=0:t=0:ldlat=3:intx=0:intxcp=0", }, { SRC_LINE, .name = "hsw::mem_trans_retired:latency_above_threshold:ldlat=1000000", @@ -1903,22 +1903,40 @@ static const test_event_t x86_test_events[]={ .count = 2, .codes[0] = 0x5301cd, .codes[1] = 3, - .fstr = "hsw::MEM_TRANS_RETIRED:LOAD_LATENCY:k=1:u=1:e=0:i=0:c=0:t=0:ldlat=3", + .fstr = "hsw::MEM_TRANS_RETIRED:LOAD_LATENCY:k=1:u=1:e=0:i=0:c=0:t=0:ldlat=3:intx=0:intxcp=0", }, { SRC_LINE, .name = "hsw::mem_trans_retired:load_latency:ldlat=1000000", .ret = PFM_ERR_ATTR_VAL, }, { SRC_LINE, - .name = "hsw::mem_trans_retired:latency_above_threshold:ldlat=2", + .name = "hsw::mem_trans_retired:latency_above_threshold:ldlat=2:intx=0:intxcp=0", .ret = PFM_ERR_ATTR_VAL, }, { SRC_LINE, + .name = "hsw::inst_Retired:any_p:intx", + .count = 1, + .codes[0] = 0x1005300c0, + .fstr = "hsw::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=1:intxcp=0", + }, + { SRC_LINE, + .name = "hsw::inst_Retired:any_p:intx:intxcp", + .count = 1, + .codes[0] = 0x3005300c0, + .fstr = "hsw::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=1:intxcp=1", + }, + { SRC_LINE, + .name = "hsw::inst_Retired:any_p:intx=0:intxcp", + .count = 1, + .codes[0] = 0x2005300c0, + .fstr = "hsw::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=1", + }, + { SRC_LINE, .name = "hsw::cycle_activity:cycles_l2_pending", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x15301a3, - .fstr = "hsw::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:t=0", + .fstr = "hsw::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:t=0:intx=0:intxcp=0", }, { SRC_LINE, .name = "hsw::cycle_activity:cycles_l2_pending:c=8", @@ -1929,14 +1947,14 @@ static const test_event_t x86_test_events[]={ .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x5304c8, - .fstr = "hsw::HLE_RETIRED:ABORTED:k=1:u=1:e=0:i=0:c=0:t=0", + .fstr = "hsw::HLE_RETIRED:ABORTED:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", }, { SRC_LINE, .name = "hsw::rtm_retired:aborted", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x5304c9, - .fstr = "hsw::RTM_RETIRED:ABORTED:k=1:u=1:e=0:i=0:c=0:t=0", + .fstr = "hsw::RTM_RETIRED:ABORTED:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", }, { SRC_LINE, .name = "ivb_unc_cbo0::unc_clockticks", commit f5cedb91ab060cc16f178e98a4432a17b010f2b6 Author: Stephane Eranian Date: Thu Aug 1 11:14:26 2013 +0200 add Intel Haswell intx, intxcp description Signed-off-by: Stephane Eranian diff --git a/docs/man3/libpfm_intel_hsw.3 b/docs/man3/libpfm_intel_hsw.3 index 913577d..fb36acf 100644 --- a/docs/man3/libpfm_intel_hsw.3 +++ b/docs/man3/libpfm_intel_hsw.3 @@ -50,6 +50,14 @@ Measure on both threads at the same time assuming hyper-threading is enabled. Th Pass a latency threshold to the MEM_TRANS_RETIRED:LOAD_LATENCY event. This is an integer attribute that must be in the range [3:65535]. It is required for this event. Note that the event must be used with precise sampling (PEBS). +.TP +.B intx +Monitor the event only when executing inside a transactional memory region (in tx). Event +does not count otherwise. This is a boolean modifiers. Default value is 0. +.TP +.B intxcp +Do not count occurrences of the event when they are inside an aborted transactional memory +region. This is a boolean modifier. Default value is 0. .SH OFFCORE_RESPONSE events Intel Haswell provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. commit a3e9c3ec4d87c0a82e5622c6421133493e7cc0a4 Author: Stephane Eranian Date: Sat Aug 10 15:35:25 2013 +0200 drop umask from snbep_unc_pcu:COREx_TRANSITION_CYCLES Because they do not use .occ_sel bitfield. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_snbep_unc_pcu_events.h b/lib/events/intel_snbep_unc_pcu_events.h index dd4aa3e..10dc6b3 100644 --- a/lib/events/intel_snbep_unc_pcu_events.h +++ b/lib/events/intel_snbep_unc_pcu_events.h @@ -72,73 +72,49 @@ static const intel_x86_entry_t intel_snbep_unc_p_pe[]={ .desc = "Core C State Transition Cycles", .code = 0x3 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE1_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x4 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE2_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x5 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE3_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x6 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE4_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x7 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE5_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x8 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE6_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0x9 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_CORE7_TRANSITION_CYCLES", .desc = "Core C State Transition Cycles", .code = 0xa | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, - .ngrp = 1, .modmsk = SNBEP_UNC_PCU_ATTRS, - .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_p_occupancy_counters), - .umasks = snbep_unc_p_occupancy_counters }, { .name = "UNC_P_DEMOTIONS_CORE0", .desc = "Core C State Demotions", diff --git a/tests/validate_x86.c b/tests/validate_x86.c index f351bc0..0b5decc 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1346,11 +1346,11 @@ static const test_event_t x86_test_events[]={ .fstr = "snbep_unc_pcu::UNC_P_CLOCKTICKS:e=0:i=0:t=1", }, { SRC_LINE, - .name = "snbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES:C6", + .name = "snbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES", .ret = PFM_SUCCESS, .count = 1, - .codes[0] = 0x200303, - .fstr = "snbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES:C6:e=0:i=0:t=0", + .codes[0] = 0x200003, + .fstr = "snbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES:e=0:i=0:t=0", }, { SRC_LINE, .name = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES", commit 88c5411265da2525de1e2160a1fdd00356f0f2bd Author: Stephane Eranian Date: Wed Aug 14 15:22:17 2013 +0200 extend pfm_intel_x86_config_t with processor stepping info To be used in future patches where we need the stepping to enable certain features (e.g., Intel Atom). Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c index 435ce9e..34c843d 100644 --- a/lib/pfmlib_intel_x86.c +++ b/lib/pfmlib_intel_x86.c @@ -160,6 +160,7 @@ pfm_intel_x86_detect(void) pfm_intel_x86_cfg.family = (a >> 8) & 0xf; // bits 11 - 8 pfm_intel_x86_cfg.model = (a >> 4) & 0xf; // Bits 7 - 4 + pfm_intel_x86_cfg.stepping = a & 0xf; // Bits 0 - 3 /* extended family */ if (pfm_intel_x86_cfg.family == 0xf) diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h index 1671ca6..0f0c1f5 100644 --- a/lib/pfmlib_intel_x86_priv.h +++ b/lib/pfmlib_intel_x86_priv.h @@ -242,6 +242,7 @@ typedef struct { int model; int family; /* 0 means nothing detected yet */ int arch_version; + int stepping; } pfm_intel_x86_config_t; extern pfm_intel_x86_config_t pfm_intel_x86_cfg; commit 62a92fbf78059ca6951059acbb86ff91b8f5eff9 Author: Michael Werner Date: Mon Aug 19 13:29:54 2013 +0200 Fix handling of edge, invert, threshold modifiers for Intel SNB-EP uncore PMU There was a bug in the handling of required modifiers for some SNB-EP uncore PMU, such as PCU UNC_P_FREQ_BAND0_CYCLES. It was not possible to use edge, invert or threshold modifiers. This patch fixes the problem and adds the necessary validation tests. Signed-off-by: Michael Werner Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c index 94ec7de..9813e1c 100644 --- a/lib/pfmlib_intel_snbep_unc.c +++ b/lib/pfmlib_intel_snbep_unc.c @@ -426,7 +426,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) return PFM_ERR_ATTR; } - if (modmsk_r && (umodmsk ^ modmsk_r)) { + if (modmsk_r && !(umodmsk & modmsk_r)) { DPRINT("required modifiers missing: 0x%x\n", modmsk_r); return PFM_ERR_ATTR; } diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 0b5decc..08c717a 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1401,6 +1401,38 @@ static const test_event_t x86_test_events[]={ .fstr = "snbep_unc_pcu::UNC_P_FREQ_BAND3_CYCLES:e=0:i=0:t=0:ff=40", }, { SRC_LINE, + .name = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:e", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x4000b, + .codes[1] = 0x20, + .fstr = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=1:i=0:t=0:ff=32", + }, + { SRC_LINE, + .name = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:i", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x80000b, + .codes[1] = 0x20, + .fstr = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=0:i=1:t=0:ff=32", + }, + { SRC_LINE, + .name = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:e:i", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x84000b, + .codes[1] = 0x20, + .fstr = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=1:i=1:t=0:ff=32", + }, + { SRC_LINE, + .name = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:e:i:t=4", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x484000b, + .codes[1] = 0x20, + .fstr = "snbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=1:i=1:t=4:ff=32", + }, + { SRC_LINE, .name = "SNBEP_UNC_PCU::UNC_P_POWER_STATE_OCCUPANCY:CORES_C0", .ret = PFM_SUCCESS, .count = 1, commit 7b715b1255ad77b273cf3242aeb401c76b9c80b6 Author: Stephane Eranian Date: Mon Aug 26 20:57:53 2013 +0200 fix bug with PFM_OS_PERF_EVENT attributes There was a bug in the perf_event_mods[] attribute table that would cause the fully qualified event string to miss certain attributes such as mg, mh. This was due to the fact that in a pfmlib_attr_desc_t table the entries must be ordered by the values of the respective PERF_ATTR_XX because there is direct indexing by attribute value. This patch adds a PERF_ATTR_SKIP pseudo attribute to pad the entries and make the mg, mh aligned in the table with their corresponding PERF_ATTR_XX macros. The perf_get_os_attr_info() and perf_get_os_nattrs() are modified to skip entries. Note that PERF_SKIP_ENTRIES is ONLY supported for the perf_event OS attributes for now. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_x86_perf_event.c b/lib/pfmlib_intel_x86_perf_event.c index f29af94..cbb9cb3 100644 --- a/lib/pfmlib_intel_x86_perf_event.c +++ b/lib/pfmlib_intel_x86_perf_event.c @@ -271,7 +271,9 @@ pfm_intel_x86_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) } if (compact) { + /* e->npattrs modified by call */ pfmlib_compact_pattrs(e, i); + /* compensate for i++ */ i--; } } diff --git a/lib/pfmlib_perf_event.c b/lib/pfmlib_perf_event.c index 91465d4..1686140 100644 --- a/lib/pfmlib_perf_event.c +++ b/lib/pfmlib_perf_event.c @@ -44,6 +44,10 @@ static const pfmlib_attr_desc_t perf_event_mods[]={ PFM_ATTR_B("u", "monitor at user level"), /* monitor user level */ PFM_ATTR_B("k", "monitor at kernel level"), /* monitor kernel level */ PFM_ATTR_B("h", "monitor at hypervisor level"), /* monitor hypervisor level */ + PFM_ATTR_SKIP, + PFM_ATTR_SKIP, + PFM_ATTR_SKIP, + PFM_ATTR_SKIP, PFM_ATTR_B("mg", "monitor guest execution"), /* monitor guest level */ PFM_ATTR_B("mh", "monitor host execution"), /* monitor host level */ PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ @@ -312,11 +316,12 @@ static int perf_get_os_nattrs(void *this, pfmlib_event_desc_t *e) { pfmlib_os_t *os = this; - int i = 0; + int i, n = 0; - for (; os->atdesc[i].name; i++); - - return i; + for (i = 0; os->atdesc[i].name; i++) + if (!is_empty_attr(os->atdesc+i)) + n++; + return n; } static int @@ -324,10 +329,14 @@ perf_get_os_attr_info(void *this, pfmlib_event_desc_t *e) { pfmlib_os_t *os = this; pfm_event_attr_info_t *info; - int i, j = e->npattrs; + int i, k, j = e->npattrs; + + for (i = k = 0; os->atdesc[i].name; i++) { + /* skip padding entries */ + if (is_empty_attr(os->atdesc+i)) + continue; - for (i = 0; os->atdesc[i].name; i++, j++) { - info = e->pattrs+j; + info = e->pattrs + j + k; info->name = os->atdesc[i].name; info->desc = os->atdesc[i].desc; @@ -337,8 +346,9 @@ perf_get_os_attr_info(void *this, pfmlib_event_desc_t *e) info->type = os->atdesc[i].type; info->is_dfl = 0; info->ctrl = PFM_ATTR_CTRL_PERF_EVENT; + k++; } - e->npattrs += i; + e->npattrs += k; return PFM_SUCCESS; } diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 758b65d..38d8110 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -25,6 +25,7 @@ #ifndef __PFMLIB_PRIV_H__ #define __PFMLIB_PRIV_H__ #include +#include #define PFM_PLM_ALL (PFM_PLM0|PFM_PLM1|PFM_PLM2|PFM_PLM3|PFM_PLMH) @@ -34,6 +35,7 @@ #define PFM_ATTR_I(y, d) { .name = (y), .type = PFM_ATTR_MOD_INTEGER, .desc = (d) } #define PFM_ATTR_B(y, d) { .name = (y), .type = PFM_ATTR_MOD_BOOL, .desc = (d) } +#define PFM_ATTR_SKIP { .name = "" } /* entry not populated (skipped) */ #define PFM_ATTR_NULL { .name = NULL } #define PFMLIB_EVT_MAX_NAME_LEN 256 @@ -372,4 +374,10 @@ pfmlib_pidx2idx(pfmlib_pmu_t *pmu, int pidx) .os_detect[PFM_OS_PERF_EVENT_EXT] = NULL #endif +static inline int +is_empty_attr(const pfmlib_attr_desc_t *a) +{ + return !a || !a->name || strlen(a->name) == 0 ? 1 : 0; +} + #endif /* __PFMLIB_PRIV_H__ */ commit 59c86f89471a5a0f02c4d020eac83e6a6306810e Author: Stephane Eranian Date: Thu Oct 3 13:51:49 2013 +0200 add PFMLIB_PMU_FL_NO_SMPL pmu flag Add PFMLIB_PMU_FL_NO_SMPL as a generic PMU flag (pmu->flags) to indicate that the PMU does not support sampling. This flag can be used to prohibit using sampling related modifiers in the OS layer, for instance. The patch updates the SNB-EP uncore PMUs to use this flag because they do not support sampling. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_ivb_unc.c b/lib/pfmlib_intel_ivb_unc.c index c2f2165..d3160d2 100644 --- a/lib/pfmlib_intel_ivb_unc.c +++ b/lib/pfmlib_intel_ivb_unc.c @@ -64,7 +64,8 @@ pfmlib_pmu_t intel_ivb_unc_cbo##n##_support={ \ .max_encoding = 1,\ .pe = intel_snb_unc_##p##_pe, \ .atdesc = intel_x86_mods, \ - .flags = PFMLIB_PMU_FL_RAW_UMASK, \ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_ivb_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), \ diff --git a/lib/pfmlib_intel_snb_unc.c b/lib/pfmlib_intel_snb_unc.c index b0be6e7..b76c5c0 100644 --- a/lib/pfmlib_intel_snb_unc.c +++ b/lib/pfmlib_intel_snb_unc.c @@ -64,7 +64,8 @@ pfmlib_pmu_t intel_snb_unc_cbo##n##_support={ \ .max_encoding = 1,\ .pe = intel_snb_unc_##p##_pe, \ .atdesc = intel_x86_mods, \ - .flags = PFMLIB_PMU_FL_RAW_UMASK, \ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_snb_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), \ diff --git a/lib/pfmlib_intel_snbep_unc_cbo.c b/lib/pfmlib_intel_snbep_unc_cbo.c index 8ebd13e..b1dd1d0 100644 --- a/lib/pfmlib_intel_snbep_unc_cbo.c +++ b/lib/pfmlib_intel_snbep_unc_cbo.c @@ -80,7 +80,8 @@ pfmlib_pmu_t intel_snbep_unc_cb##n##_support = {\ .max_encoding = 2,\ .pe = intel_snbep_unc_c_pe,\ .atdesc = snbep_unc_mods,\ - .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ diff --git a/lib/pfmlib_intel_snbep_unc_ha.c b/lib/pfmlib_intel_snbep_unc_ha.c index e50ce0d..28e6b71 100644 --- a/lib/pfmlib_intel_snbep_unc_ha.c +++ b/lib/pfmlib_intel_snbep_unc_ha.c @@ -76,7 +76,8 @@ pfmlib_pmu_t intel_snbep_unc_ha_support = { .max_encoding = 3, /* address matchers */ .pe = intel_snbep_unc_h_pe, .atdesc = snbep_unc_mods, - .flags = PFMLIB_PMU_FL_RAW_UMASK, + .flags = PFMLIB_PMU_FL_RAW_UMASK + | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), diff --git a/lib/pfmlib_intel_snbep_unc_imc.c b/lib/pfmlib_intel_snbep_unc_imc.c index 35bfa69..c0cc0e1 100644 --- a/lib/pfmlib_intel_snbep_unc_imc.c +++ b/lib/pfmlib_intel_snbep_unc_imc.c @@ -46,7 +46,8 @@ pfmlib_pmu_t intel_snbep_unc_imc##n##_support = { \ .max_encoding = 1, \ .pe = intel_snbep_unc_m_pe, \ .atdesc = snbep_unc_mods, \ - .flags = PFMLIB_PMU_FL_RAW_UMASK, \ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ diff --git a/lib/pfmlib_intel_snbep_unc_pcu.c b/lib/pfmlib_intel_snbep_unc_pcu.c index fb4499f..5d4dd47 100644 --- a/lib/pfmlib_intel_snbep_unc_pcu.c +++ b/lib/pfmlib_intel_snbep_unc_pcu.c @@ -78,7 +78,8 @@ pfmlib_pmu_t intel_snbep_unc_pcu_support = { .max_encoding = 2, .pe = intel_snbep_unc_p_pe, .atdesc = snbep_unc_mods, - .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_PMU_FL_UNC_OCC, + .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_PMU_FL_UNC_OCC + | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), diff --git a/lib/pfmlib_intel_snbep_unc_perf_event.c b/lib/pfmlib_intel_snbep_unc_perf_event.c index 601806f..e6cc625 100644 --- a/lib/pfmlib_intel_snbep_unc_perf_event.c +++ b/lib/pfmlib_intel_snbep_unc_perf_event.c @@ -110,6 +110,8 @@ pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) void pfm_intel_snbep_unc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) { + pfmlib_pmu_t *pmu = this; + int no_smpl = pmu->flags & PFMLIB_PMU_FL_NO_SMPL; int i, compact; for (i = 0; i < e->npattrs; i++) { @@ -130,11 +132,20 @@ pfm_intel_snbep_unc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) if (e->pattrs[i].idx == PERF_ATTR_H) compact = 1; + if (no_smpl + && ( e->pattrs[i].idx == PERF_ATTR_FR + || e->pattrs[i].idx == PERF_ATTR_PR + || e->pattrs[i].idx == PERF_ATTR_PE)) + compact = 1; + /* * uncore has no priv level support */ - if ( e->pattrs[i].idx == PERF_ATTR_U - || e->pattrs[i].idx == PERF_ATTR_K) + if (pmu->supported_plm == 0 + && ( e->pattrs[i].idx == PERF_ATTR_U + || e->pattrs[i].idx == PERF_ATTR_K + || e->pattrs[i].idx == PERF_ATTR_MG + || e->pattrs[i].idx == PERF_ATTR_MH)) compact = 1; } diff --git a/lib/pfmlib_intel_snbep_unc_qpi.c b/lib/pfmlib_intel_snbep_unc_qpi.c index 985a494..9cc4bac 100644 --- a/lib/pfmlib_intel_snbep_unc_qpi.c +++ b/lib/pfmlib_intel_snbep_unc_qpi.c @@ -65,7 +65,8 @@ pfmlib_pmu_t intel_snbep_unc_qpi##n##_support = {\ .max_encoding = 3,\ .pe = intel_snbep_unc_q_pe,\ .atdesc = snbep_unc_mods,\ - .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ diff --git a/lib/pfmlib_intel_snbep_unc_r2pcie.c b/lib/pfmlib_intel_snbep_unc_r2pcie.c index 1fd3ca8..c6a64b9 100644 --- a/lib/pfmlib_intel_snbep_unc_r2pcie.c +++ b/lib/pfmlib_intel_snbep_unc_r2pcie.c @@ -45,7 +45,8 @@ pfmlib_pmu_t intel_snbep_unc_r2pcie_support = { .max_encoding = 1, .pe = intel_snbep_unc_r2_pe, .atdesc = snbep_unc_mods, - .flags = PFMLIB_PMU_FL_RAW_UMASK, + .flags = PFMLIB_PMU_FL_RAW_UMASK + | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), diff --git a/lib/pfmlib_intel_snbep_unc_r3qpi.c b/lib/pfmlib_intel_snbep_unc_r3qpi.c index 288abb1..28bb83a 100644 --- a/lib/pfmlib_intel_snbep_unc_r3qpi.c +++ b/lib/pfmlib_intel_snbep_unc_r3qpi.c @@ -46,7 +46,8 @@ pfmlib_pmu_t intel_snbep_unc_r3qpi##n##_support = {\ .max_encoding = 1,\ .pe = intel_snbep_unc_r3_pe,\ .atdesc = snbep_unc_mods,\ - .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK\ + | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ diff --git a/lib/pfmlib_intel_snbep_unc_ubo.c b/lib/pfmlib_intel_snbep_unc_ubo.c index b9b4585..3b8abff 100644 --- a/lib/pfmlib_intel_snbep_unc_ubo.c +++ b/lib/pfmlib_intel_snbep_unc_ubo.c @@ -45,7 +45,8 @@ pfmlib_pmu_t intel_snbep_unc_ubo_support = { .max_encoding = 1, .pe = intel_snbep_unc_u_pe, .atdesc = snbep_unc_mods, - .flags = PFMLIB_PMU_FL_RAW_UMASK, + .flags = PFMLIB_PMU_FL_RAW_UMASK + | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), diff --git a/lib/pfmlib_intel_x86_perf_event.c b/lib/pfmlib_intel_x86_perf_event.c index cbb9cb3..a7a2d4d 100644 --- a/lib/pfmlib_intel_x86_perf_event.c +++ b/lib/pfmlib_intel_x86_perf_event.c @@ -233,6 +233,7 @@ pfm_intel_x86_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) pfmlib_pmu_t *pmu = this; int i, compact; int has_pebs = intel_x86_event_has_pebs(this, e); + int no_smpl = pmu->flags & PFMLIB_PMU_FL_NO_SMPL; for (i = 0; i < e->npattrs; i++) { compact = 0; @@ -261,12 +262,19 @@ pfm_intel_x86_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) if (e->pattrs[i].idx == PERF_ATTR_H) compact = 1; + if (no_smpl + && ( e->pattrs[i].idx == PERF_ATTR_FR + || e->pattrs[i].idx == PERF_ATTR_PR + || e->pattrs[i].idx == PERF_ATTR_PE)) + compact = 1; /* - * uncore has no priv level support + * no priv level support */ - if (pmu->type == PFM_PMU_TYPE_UNCORE - && (e->pattrs[i].idx == PERF_ATTR_U - || e->pattrs[i].idx == PERF_ATTR_K)) + if (pmu->supported_plm == 0 + && ( e->pattrs[i].idx == PERF_ATTR_U + || e->pattrs[i].idx == PERF_ATTR_K + || e->pattrs[i].idx == PERF_ATTR_MG + || e->pattrs[i].idx == PERF_ATTR_MH)) compact = 1; } diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 38d8110..d3ed361 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -156,6 +156,7 @@ typedef struct { #define PFMLIB_PMU_FL_ACTIVE 0x2 /* PMU is initialized + detected on host */ #define PFMLIB_PMU_FL_RAW_UMASK 0x4 /* PMU supports PFM_ATTR_RAW_UMASKS */ #define PFMLIB_PMU_FL_ARCH_DFL 0x8 /* PMU is arch default */ +#define PFMLIB_PMU_FL_NO_SMPL 0x10 /* PMU does not support sampling */ typedef struct { int initdone; commit 31a7d65a8f61a45fad197d1394d8cce12f7652f3 Author: Stephane Eranian Date: Thu Oct 3 13:58:01 2013 +0200 grab PMU perf_event type from sysfs This patch enables grabbing the attr->type from sysfs for PMU models which provide the pmu->perf_name value. Otherwise, the type default to PERF_TYPE_RAW (which works only for core PMU). this is useful for uncore PMUs as their type is usually assigned dynamically. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_x86_perf_event.c b/lib/pfmlib_intel_x86_perf_event.c index a7a2d4d..460a2c8 100644 --- a/lib/pfmlib_intel_x86_perf_event.c +++ b/lib/pfmlib_intel_x86_perf_event.c @@ -99,8 +99,24 @@ pfm_intel_x86_get_perf_encoding(void *this, pfmlib_event_desc_t *e) DPRINT("%s: unsupported count=%d\n", e->count); return PFM_ERR_NOTSUPP; } - + /* default PMU type */ attr->type = PERF_TYPE_RAW; + + /* + * if PMU specifies a perf PMU name, then grab the type + * from sysfs as it is most likely dynamically assigned. + * This allows this function to use used by some uncore PMUs + */ + if (pmu->perf_name) { + int type = find_pmu_type_by_name(pmu->perf_name); + if (type == PFM_ERR_NOTSUPP) { + DPRINT("perf PMU %s, not supported by OS\n", pmu->perf_name); + } else { + DPRINT("PMU %s perf type=%d\n", pmu->name, type); + attr->type = type; + } + } + attr->config = e->codes[0]; if (e->count > 1) { commit 9fb795c7f4142bddf9d66fa577290bf4e42953d7 Author: Stephane Eranian Date: Fri Oct 11 13:35:19 2013 +0200 validate : check if PMU is supported by library On some configurations, it may be that support for a particular PMU is disabled at compile time. Yet, the x86 validation suite ignores that fact, so we may get errors because of unsupported PMU. Catch those and don't consider them as errors, just print a warning. Signed-off-by: Stephane Eranian diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 08c717a..7e93533 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -2002,6 +2002,34 @@ static const test_event_t x86_test_events[]={ }; #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) +static int +check_pmu_supported(const char *evt) +{ + pfm_pmu_info_t info; + char *p; + int i, ret; + + memset(&info, 0, sizeof(info)); + info.size = sizeof(info); + + /* look for pmu_name::.... */ + p = strchr(evt, ':'); + if (!p) + return 1; + if (*(p+1) != ':') + return 1; + + pfm_for_all_pmus(i) { + ret = pfm_get_pmu_info(i, &info); + if (ret != PFM_SUCCESS) + continue; + if (!strncmp(info.name, evt, p - evt)) + return 1; + } + /* PMU not there */ + return 0; +} + static int check_test_events(FILE *fp) { const test_event_t *e; @@ -2016,6 +2044,10 @@ static int check_test_events(FILE *fp) fstr = NULL; ret = pfm_get_event_encoding(e->name, PFM_PLM0 | PFM_PLM3, &fstr, NULL, &codes, &count); if (ret != e->ret) { + if (ret == PFM_ERR_NOTFOUND && !check_pmu_supported(e->name)) { + fprintf(fp,"Line %d, Event%d %s, skipped because no PMU support\n", e->line, i, e->name); + continue; + } fprintf(fp,"Line %d, Event%d %s, ret=%s(%d) expected %s(%d)\n", e->line, i, e->name, pfm_strerror(ret), ret, pfm_strerror(e->ret), e->ret); errors++; } else { commit 7af6bc46302812d29cfbc23d24430d31f09049da Author: Stephane Eranian Date: Mon Nov 4 19:07:23 2013 +0100 add missing events/umasks for Intel Ivy Bridge ivb::FP_COMP_OPS_EXE:X87 ivb::FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE ivb::FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ivb::FP_COMP_OPS_EXE:SSE_PACKED_SINGLE ivb::SIMD_FP_256:PACKED_SINGLE ivb::SIMD_FP_256:PACKED_DOUBLE ivb::LSD:UOPS ivb::UOPS_EXECUTED:THREAD ivb::ICACHE:IFETCH_STALLS ivb::LD_BLOCKS:NO_SR ivb::OTHER_ASSISTS:WB Added aliases: ivb::DTLB_LOAD_ACCESS -> TLB_ACCESS ivb::LONGEST_LAT_CACHE -> L3_LAT_CACHE Thanks to Vince Weaver for spotting those new updates in the Sep 2013 SDM Vol3b edition. Also added a few more IVB validation tests. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h index e473756..3c5583e 100644 --- a/lib/events/intel_ivb_events.h +++ b/lib/events/intel_ivb_events.h @@ -406,7 +406,12 @@ static const intel_x86_umask_t ivb_icache[]={ { .uname = "MISSES", .udesc = "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses", .ucode = 0x200, - .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "IFETCH_STALL", + .udesc = "Number of cycles wher a code-fetch stalled due to L1 instruction cache miss or iTLB miss", + .ucode = 0x400, + .uflags= INTEL_X86_NCOMBO, }, }; @@ -853,7 +858,12 @@ static const intel_x86_umask_t ivb_ld_blocks[]={ { .uname = "STORE_FORWARD", .udesc = "Loads blocked by overlapping with store buffer that cannot be forwarded", .ucode = 0x200, - .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "NO_SR", + .udesc = "Number of times that split load operations are temporarily blocked because all resources for handlding the split accesses are in use", + .ucode = 0x800, + .uflags= INTEL_X86_NCOMBO, }, }; @@ -1167,6 +1177,11 @@ static const intel_x86_umask_t ivb_other_assists[]={ .ucode = 0x0800, .uflags= INTEL_X86_NCOMBO, }, + { .uname = "WB", + .udesc = "Number of times the microcode assist is invoked by hardware upon uop writeback", + .ucode = 0x8000, + .uflags= INTEL_X86_NCOMBO, + }, }; static const intel_x86_umask_t ivb_resource_stalls[]={ @@ -1206,11 +1221,17 @@ static const intel_x86_umask_t ivb_rs_events[]={ }; static const intel_x86_umask_t ivb_tlb_access[]={ - { .uname = "LOAD_STLB_HIT", - .udesc = "Number of load operations that missed L1TLN but hit L2TLB", + { .uname = "STLB_HIT", + .udesc = "Number of load operations that missed L1TLB but hit L2TLB", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, + { .uname = "LOAD_STLB_HIT", + .udesc = "Number of load operations that missed L1TLB but hit L2TLB", + .ucode = 0x400, + .uequiv= "STLB_HIT", + .uflags= INTEL_X86_NCOMBO, + }, }; static const intel_x86_umask_t ivb_tlb_flush[]={ @@ -1230,7 +1251,12 @@ static const intel_x86_umask_t ivb_uops_executed[]={ { .uname = "CORE", .udesc = "Counts total number of uops executed from any thread per cycle", .ucode = 0x200, - .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "THREAD", + .udesc = "Counts total number of uops executed per thread each cycle", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, }, }; @@ -1577,6 +1603,55 @@ static const intel_x86_umask_t ivb_cycle_activity[]={ }, }; +static const intel_x86_umask_t ivb_fp_comp_ops_exe[]={ + { .uname = "X87", + .udesc = "Number of X87 uops executed", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "SSE_FP_PACKED_DOUBLE", + .udesc = "Number of SSE or AVX-128 double precision FP packed uops executed", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "SSE_FP_SCALAR_SINGLE", + .udesc = "Number of SSE or AVX-128 single precision FP scalar uops executed", + .ucode = 0x2000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "SSE_PACKED_SINGLE", + .udesc = "Number of SSE or AVX-128 single precision FP packed uops executed", + .ucode = 0x4000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "SSE_SCALAR_DOUBLE", + .udesc = "Number of SSE or AVX-128 double precision FP scalar uops executed", + .ucode = 0x8000, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivb_simd_fp_256[]={ + { .uname = "PACKED_SINGLE", + .udesc = "Counts 256-bit packed single-precision", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PACKED_DOUBLE", + .udesc = "Counts 256-bit packed double-precision", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivb_lsd[]={ + { .uname = "UOPS", + .udesc = "Number of uops delivered by the Loop Stream Detector (LSD)", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + static const intel_x86_entry_t intel_ivb_pe[]={ { .name = "ARITH", .desc = "Counts arithmetic multiply operations", @@ -1942,6 +2017,16 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .ngrp = 1, .umasks = ivb_l3_lat_cache, }, +{ .name = "LONGEST_LAT_CACHE", + .desc = "Core-originated cacheable demand requests to L3", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0x2e, + .numasks = LIBPFM_ARRAY_SIZE(ivb_l3_lat_cache), + .ngrp = 1, + .equiv = "L3_LAT_CACHE", + .umasks = ivb_l3_lat_cache, +}, { .name = "MACHINE_CLEARS", .desc = "Machine clear asserted", .modmsk = INTEL_V3_ATTRS, @@ -2107,6 +2192,15 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .ngrp = 1, .umasks = ivb_rs_events, }, +{ .name = "DTLB_LOAD_ACCESS", + .desc = "TLB access", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0x5f, + .numasks = LIBPFM_ARRAY_SIZE(ivb_tlb_access), + .ngrp = 1, + .umasks = ivb_tlb_access, +}, { .name = "TLB_ACCESS", .desc = "TLB access", .modmsk = INTEL_V3_ATTRS, @@ -2114,6 +2208,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .code = 0x5f, .numasks = LIBPFM_ARRAY_SIZE(ivb_tlb_access), .ngrp = 1, + .equiv = "DTLB_LOAD_ACCESS", .umasks = ivb_tlb_access, }, { .name = "TLB_FLUSH", @@ -2175,6 +2270,33 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .ngrp = 1, .umasks = ivb_uops_retired, }, +{ .name = "FP_COMP_OPS_EXE", + .desc = "Counts number of floating point events", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0x10, + .numasks = LIBPFM_ARRAY_SIZE(ivb_fp_comp_ops_exe), + .ngrp = 1, + .umasks = ivb_fp_comp_ops_exe, +}, +{ .name = "SIMD_FP_256", + .desc = "Counts 256-bit packed floating point instructions", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0x11, + .numasks = LIBPFM_ARRAY_SIZE(ivb_simd_fp_256), + .ngrp = 1, + .umasks = ivb_simd_fp_256, +}, +{ .name = "LSD", + .desc = "Loop stream detector", + .modmsk = INTEL_V3_ATTRS, + .cntmsk = 0xff, + .code = 0xa8, + .numasks = LIBPFM_ARRAY_SIZE(ivb_lsd), + .ngrp = 1, + .umasks = ivb_lsd, +}, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V3_ATTRS, diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 7e93533..955df41 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1062,7 +1062,21 @@ static const test_event_t x86_test_events[]={ .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x53045f, - .fstr = "ivb::TLB_ACCESS:LOAD_STLB_HIT:k=1:u=1:e=0:i=0:c=0:t=0", + .fstr = "ivb::DTLB_LOAD_ACCESS:STLB_HIT:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, + .name = "ivb::TLB_ACCESS:STLB_HIT", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x53045f, + .fstr = "ivb::DTLB_LOAD_ACCESS:STLB_HIT:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, + .name = "ivb::DTLB_LOAD_ACCESS:STLB_HIT", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x53045f, + .fstr = "ivb::DTLB_LOAD_ACCESS:STLB_HIT:k=1:u=1:e=0:i=0:c=0:t=0", }, { SRC_LINE, .name = "ivb::MOVE_ELIMINATION:INT_NOT_ELIMINATED", @@ -1086,6 +1100,31 @@ static const test_event_t x86_test_events[]={ .fstr = "ivb::RESOURCE_STALLS:RS:ROB:k=1:u=1:e=0:i=0:c=0:t=0", }, { SRC_LINE, + .name = "ivb::UOPS_EXECUTED:THREAD", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x5301b1, + .fstr = "ivb::UOPS_EXECUTED:THREAD:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, + .name = "ivb::UOPS_EXECUTED:THREAD:e:c=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x15701b1, + .fstr = "ivb::UOPS_EXECUTED:THREAD:k=1:u=1:e=1:i=0:c=1:t=0", + }, + { SRC_LINE, + .name = "ivb::UOPS_EXECUTED:THREAD:e", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivb::UOPS_EXECUTED:THREAD:c=1:i", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1d301b1, + .fstr = "ivb::UOPS_EXECUTED:THREAD:k=1:u=1:e=0:i=1:c=1:t=0", + }, + { SRC_LINE, .name = "ivb::CPU_CLK_UNHALTED:REF_P", .ret = PFM_SUCCESS, .count = 1, commit fe3e6e865e98cd7f1743a26896e777873ae8b682 Author: Stephane Eranian Date: Mon Nov 11 17:58:19 2013 +0100 add missing Core select umasks to AMD Fam15h event table As documented in BKDG Fam15 rev 3.08. As per encoding, the core_select umasks cannot be combined. User has to select either ANY_CORE or one specific core, e.g., CORE_1. Default is ANY_CORE for all relevant events. Patch adds corresponding tests to validation test suite. Signed-off-by: Stephane Eranian diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h index 5738e4c..ac2b111 100644 --- a/lib/events/amd64_events_fam15h.h +++ b/lib/events/amd64_events_fam15h.h @@ -40,6 +40,62 @@ * Processors, Rev 0.90, May 18, 2010 */ +#define CORE_SELECT(b) \ + { .uname = "CORE_0",\ + .udesc = "Measure on Core0",\ + .ucode = 0 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_1",\ + .udesc = "Measure on Core1",\ + .ucode = 1 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_2",\ + .udesc = "Measure on Core2",\ + .ucode = 2 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_3",\ + .udesc = "Measure on Core3",\ + .ucode = 3 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_4",\ + .udesc = "Measure on Core4",\ + .ucode = 4 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_5",\ + .udesc = "Measure on Core5",\ + .ucode = 5 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_6",\ + .udesc = "Measure on Core6",\ + .ucode = 6 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_7",\ + .udesc = "Measure on Core7",\ + .ucode = 7 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "ANY_CORE",\ + .udesc = "Measure on any core",\ + .ucode = 0xf << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL,\ + } + static const amd64_umask_t amd64_fam15h_dispatched_fpu_ops[]={ { .uname = "OPS_PIPE0", .udesc = "Total number uops assigned to Pipe 0", @@ -1639,20 +1695,30 @@ static const amd64_umask_t amd64_fam15h_read_request_to_l3_cache[]={ { .uname = "READ_BLOCK_EXCLUSIVE", .udesc = "Read Block Exclusive (Data cache read)", .ucode = 0x1, + .grpid = 0, }, { .uname = "READ_BLOCK_SHARED", .udesc = "Read Block Shared (Instruction cache read)", .ucode = 0x2, + .grpid = 0, }, { .uname = "READ_BLOCK_MODIFY", .udesc = "Read Block Modify", .ucode = 0x4, + .grpid = 0, }, - { .uname = "ALL", - .udesc = "All sub-events selected", + { .uname = "PREFETCH", + .udesc = "Count prefetches honly", + .ucode = 0x8, + .grpid = 0, + }, + { .uname = "READ_BLOCK_ANY", + .udesc = "Count any read request", .ucode = 0x7, - .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + .grpid = 0, + .uflags= AMD64_FL_DFL | AMD64_FL_NCOMBO, }, + CORE_SELECT(1), }; static const amd64_umask_t amd64_fam15h_l3_fills_caused_by_l2_evictions[]={ @@ -1682,13 +1748,7 @@ static const amd64_umask_t amd64_fam15h_l3_fills_caused_by_l2_evictions[]={ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, .grpid = 0, }, - { .uname = "ALL_CORES", - .udesc = "All core", - .ucode = 0xf0, - .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, - .grpid = 1, - }, - + CORE_SELECT(1), }; static const amd64_umask_t amd64_fam15h_l3_evictions[]={ @@ -2421,7 +2481,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ .desc = "Read Request to L3 Cache", .code = 0x4e0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), - .ngrp = 1, + .ngrp = 2, .umasks = amd64_fam15h_read_request_to_l3_cache, }, { .name = "L3_CACHE_MISSES", @@ -2449,7 +2509,7 @@ static const amd64_entry_t amd64_fam15h_pe[]={ .desc = "Non-canceled L3 Read Requests", .code = 0x4ed, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), - .ngrp = 1, + .ngrp = 2, .umasks = amd64_fam15h_read_request_to_l3_cache, }, { .name = "L3_LATENCY", diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 955df41..0fc36c7 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1917,6 +1917,42 @@ static const test_event_t x86_test_events[]={ .fstr = "amd64_fam15h_interlagos::DISPATCHED_FPU_OPS:0xff:k=0:u=1:e=0:i=0:c=0:h=0:g=0" }, { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_modify:core_3", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x4005334e0ull, + .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_MODIFY:CORE_3", + }, + { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x40053f7e0ull, + .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_ANY:ANY_CORE", + }, + { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:PREFETCH:READ_BLOCK_MODIFY:core_4", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x400534de0ull, + .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:READ_BLOCK_MODIFY:PREFETCH:CORE_4", + }, + { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1", + .ret = PFM_ERR_FEATCOMB, /* must use individual umasks to combine with prefetch */ + }, + { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1:core_3", + .ret = PFM_ERR_FEATCOMB, /* core umasks cannot be combined */ + }, + { SRC_LINE, + .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:prefetch:core_0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x4005308e0ull, + .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:PREFETCH:CORE_0", + }, + { SRC_LINE, .name = "ivb_ep::mem_load_uops_llc_miss_retired:local_dram", .ret = PFM_SUCCESS, .count = 1, commit 751acd759e6185d2a378afa5203189011f108afb Author: Stephane Eranian Date: Tue Oct 8 14:12:32 2013 +0200 add Intel RAPL PMU support This patch adds support for the Intel RAPL energy consumption counters present on Intel SandyBridge, IvyBridge and Haswell processors. The PMU is detected based on OS-support, i.e., the /sys/devices/power directory must exist. The following RAPL events are defined: - RAPL_ENERGY_CORES - RAPL_ENERGY_PKG - RAPL_EENRGY_DRAM (servers only) The new PMU is called rapl. The library does not provide any unit conversion for the RAW count. The kernel exports the raw count as increments of 1/(2^32) Joules or about 0.23 nano-Joules. Signed-off-by: Stephane Eranian diff --git a/docs/man3/libpfm_intel_rapl.3 b/docs/man3/libpfm_intel_rapl.3 new file mode 100644 index 0000000..cf7835c --- /dev/null +++ b/docs/man3/libpfm_intel_rapl.3 @@ -0,0 +1,39 @@ +.TH LIBPFM 3 "November, 2013" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_rapl - support for Intel RAPL PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: rapl +.B PMU desc: Intel RAPL (Intel SandyBridge, IvyBridge, Haswell) +.sp +.SH DESCRIPTION +The library supports the Intel Running Average Power Limit (RAPL) +energy consomption counters. This is a socket-level set of counters +which reports energy consumption in Joules. There are up to 3 counters +each measuring only one event. The following events are defined: +.TP +.B RAPL_ENERGY_CORES +On all processors, the event reports the number of Joules consumed by all cores. + On all processors, +.TP +.B RAPL_ENERGYC_PKG +On all processors, th event reports the number of Joules consumed by all +the cores and Last Level cache (L3). +.TP +.B RAPL_ENERGY_DRAM +On server processors, the event reports the number of Joules consumed +n by the DRAM controller. + +By construction, the events are socket-level and can only be measured in +system-wide mode. It is necessary and sufficient to measure only one CPU +per socket to get meaningful results. + +.SH MODIFIERS +The PMU does not support any modifiers. +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 89b9fd8..85c0022 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -191,6 +191,7 @@ typedef enum { PFM_PMU_INTEL_IVB_UNC_CB3, /* Intel IvyBridge C-box 3 uncore PMU */ PFM_PMU_POWER8, /* IBM POWER8 */ + PFM_PMU_INTEL_RAPL, /* Intel RAPL */ /* MUST ADD NEW PMU MODELS HERE */ diff --git a/lib/Makefile b/lib/Makefile index 98411b8..e5f5d5f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ pfmlib_intel_snb.c pfmlib_intel_snb_unc.c \ pfmlib_intel_ivb.c pfmlib_intel_ivb_unc.c \ pfmlib_intel_hsw.c \ + pfmlib_intel_rapl.c \ pfmlib_intel_snbep_unc.c \ pfmlib_intel_snbep_unc_cbo.c \ pfmlib_intel_snbep_unc_ha.c \ diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index e810eca..bf28291 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -97,6 +97,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &intel_ivb_unc_cbo3_support, &intel_ivb_ep_support, &intel_hsw_support, + &intel_rapl_support, &intel_snbep_unc_cb0_support, &intel_snbep_unc_cb1_support, &intel_snbep_unc_cb2_support, diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c new file mode 100644 index 0000000..25c51f6 --- /dev/null +++ b/lib/pfmlib_intel_rapl.c @@ -0,0 +1,154 @@ +/* + * pfmlib_intel_rapl.c : Intel RAPL PMU + * + * Copyright (c) 2013 Google, Inc + * Contributed by Stephane Eranian + * + * Based on: + * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * RAPL PMU (SNB, IVB, HSW) + */ + +/* private headers */ +#include "pfmlib_priv.h" +/* + * for now, we reuse the x86 table entry format and callback to avoid duplicating + * code. We may revisit this later on + */ +#include "pfmlib_intel_x86_priv.h" + +extern pfmlib_pmu_t intel_rapl_support; + +#define RAPL_COMMON_EVENTS \ + { .name = "RAPL_ENERGY_CORES",\ + .desc = "Number of Joules consumed by all cores on the package. Unit is 2^-32 Joules",\ + .cntmsk = 0x1,\ + .code = 0x1,\ + },\ + { .name = "RAPL_ENERGY_PKG",\ + .desc = "Number of Joules consumed by all cores and Last level cache on the package. Unit is 2^-32 Joules",\ + .cntmsk = 0x2,\ + .code = 0x2,\ + } + +static const intel_x86_entry_t intel_rapl_cln_pe[]={ + RAPL_COMMON_EVENTS +}; + +static const intel_x86_entry_t intel_rapl_srv_pe[]={ + RAPL_COMMON_EVENTS, + { .name = "RAPL_ENERGY_DRAM", + .desc = "Number of Joules consumed by the DRAM. Unit is 2^-32 Joules", + .cntmsk = 0x4, + .code = 0x3, + }, +}; + +static int +pfm_rapl_detect(void *this) +{ + int ret; + + ret = pfm_intel_x86_detect(); + if (ret != PFM_SUCCESS) + return ret; + + if (pfm_intel_x86_cfg.family != 6) + return PFM_ERR_NOTSUPP; + + switch(pfm_intel_x86_cfg.model) { + case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ + case 70: /* Haswell */ + /* already setup by default */ + break; + case 45: /* Sandy Bridg-EP */ + case 62: /* Ivy Bridge-EP */ + intel_rapl_support.pe = intel_rapl_srv_pe; + intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_srv_pe); + break; + default: + return PFM_ERR_NOTSUPP; + } + return PFM_SUCCESS; +} + +static int +pfm_intel_rapl_get_encoding(void *this, pfmlib_event_desc_t *e) + +{ + pfmlib_pmu_t *pmu = this; + const intel_x86_entry_t *pe; + + /* shut up the compiler */ + pmu = pmu; + + pe = this_pe(this); + + e->fstr[0] = '\0'; + + e->codes[0] = pe[e->event].code; + e->count = 1; + evt_strcat(e->fstr, "%s", pe[e->event].name); + + __pfm_vbprintf("[0x%"PRIx64" event=0x%x] %s\n", + e->codes[0], + e->codes[0], e->fstr); + + return PFM_SUCCESS; +} + +/* + * number modifiers for RAPL + * define an empty modifier to avoid firing the + * sanity pfm_intel_x86_validate_table(). We are + * using this function to avoid duplicating code. + */ +static const pfmlib_attr_desc_t rapl_mods[]= +{}; + +pfmlib_pmu_t intel_rapl_support={ + .desc = "Intel RAPL", + .name = "rapl", + .perf_name = "power", + .pmu = PFM_PMU_INTEL_RAPL, + .pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_cln_pe), + .type = PFM_PMU_TYPE_UNCORE, + .num_cntrs = 0, + .num_fixed_cntrs = 3, + .max_encoding = 1, + .pe = intel_rapl_cln_pe, /* default, maybe updated */ + .pmu_detect = pfm_rapl_detect, + .atdesc = rapl_mods, + + .get_event_encoding[PFM_OS_NONE] = pfm_intel_rapl_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, +}; diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index d3ed361..59e4235 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -240,6 +240,7 @@ extern pfmlib_pmu_t intel_ivb_unc_cbo2_support; extern pfmlib_pmu_t intel_ivb_unc_cbo3_support; extern pfmlib_pmu_t intel_ivb_ep_support; extern pfmlib_pmu_t intel_hsw_support; +extern pfmlib_pmu_t intel_rapl_support; extern pfmlib_pmu_t intel_snbep_unc_cb0_support; extern pfmlib_pmu_t intel_snbep_unc_cb1_support; extern pfmlib_pmu_t intel_snbep_unc_cb2_support; diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 0fc36c7..6690a96 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -2074,6 +2074,24 @@ static const test_event_t x86_test_events[]={ .name = "ivb_unc_cbo1::unc_clockticks", .ret = PFM_ERR_NOTFOUND }, + { SRC_LINE, + .name = "rapl::rapl_energy_cores", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1, + .fstr = "rapl::RAPL_ENERGY_CORES", + }, + { SRC_LINE, + .name = "rapl::rapl_energy_pkg", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x2, + .fstr = "rapl::RAPL_ENERGY_PKG", + }, + { SRC_LINE, + .name = "rapl::rapl_energy_cores:u", + .ret = PFM_ERR_ATTR, + }, }; #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) commit 59bc6b3c586561137d9c1172a34deac3f8887999 Author: Andreas Beckmann Date: Thu Nov 28 10:39:45 2013 +0100 fix typos in event description for Cortex A9 and P4 Event description typos fixes. Signed-off-by: Andreas Beckmann diff --git a/lib/events/arm_cortex_a8_events.h b/lib/events/arm_cortex_a8_events.h index 467813e..2b61dda 100644 --- a/lib/events/arm_cortex_a8_events.h +++ b/lib/events/arm_cortex_a8_events.h @@ -82,7 +82,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ }, {.name = "PC_WRITE", .code = 0x0c, - .desc = "Software change of PC. Equivelant to branches" + .desc = "Software change of PC. Equivalent to branches" }, {.name = "PC_IMM_BRANCH", .code = 0x0d, diff --git a/lib/events/arm_cortex_a9_events.h b/lib/events/arm_cortex_a9_events.h index 4f56fac..c034bd3 100644 --- a/lib/events/arm_cortex_a9_events.h +++ b/lib/events/arm_cortex_a9_events.h @@ -82,7 +82,7 @@ static const arm_entry_t arm_cortex_a9_pe []={ }, {.name = "PC_WRITE", .code = 0x0c, - .desc = "Software change of PC. Equivelant to branches" + .desc = "Software change of PC. Equivalent to branches" }, {.name = "PC_IMM_BRANCH", .code = 0x0d, diff --git a/lib/events/intel_netburst_events.h b/lib/events/intel_netburst_events.h index 73f58da..e24f22f 100644 --- a/lib/events/intel_netburst_events.h +++ b/lib/events/intel_netburst_events.h @@ -396,7 +396,7 @@ static const netburst_entry_t netburst_events[] = { .desc = "Number of entries (clipped at 15) in the IOQ that are " "active. An allocated entry can be a sector (64 bytes) " "or a chunk of 8 bytes. This event must be programmed in " - "conjuction with IOQ_allocation. All 'TYPE_BIT*' event-masks " + "conjunction with IOQ_allocation. All 'TYPE_BIT*' event-masks " "together are treated as a single 5-bit value", .event_select = 0x1A, .escr_select = 0x6, commit e4ced34f7558780ebb204f4fede45f9eeebfacb7 Author: Stephane Eranian Date: Sun Sep 22 22:16:12 2013 +0200 add Intel Silvermont core PMU support Add support for Intel Silvermont Atom processors. Note that there is not support for the Average Latency measurement in this patch. Based on the Software Optimization Guide, Chapter 15. Signed-off-by: Stephane Eranian diff --git a/README b/README index 905b768..6f83e2b 100644 --- a/README +++ b/README @@ -48,6 +48,7 @@ The library supports many PMUs. The current version can handle: Intel Sandy Bridge Intel Ivy Bridge Intel Haswell + Intel Silvermont Intel Knights Corner Intel architectural perfmon v1, v2, v3 diff --git a/docs/Makefile b/docs/Makefile index 59678f4..23cd70d 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -46,6 +46,7 @@ ARCH_MAN=libpfm_intel_core.3 \ libpfm_intel_ivb.3 \ libpfm_intel_ivb_unc.3 \ libpfm_intel_hsw.3 \ + libpfm_intel_slm.3 \ libpfm_intel_snbep_unc_cbo.3 \ libpfm_intel_snbep_unc_ha.3 \ libpfm_intel_snbep_unc_imc.3 \ diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 85c0022..b89e661 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -193,6 +193,8 @@ typedef enum { PFM_PMU_POWER8, /* IBM POWER8 */ PFM_PMU_INTEL_RAPL, /* Intel RAPL */ + PFM_PMU_INTEL_SLM, /* Intel Silvermont */ + /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/lib/Makefile b/lib/Makefile index e5f5d5f..02dcfbb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -71,6 +71,7 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ pfmlib_intel_snbep_unc_r2pcie.c \ pfmlib_intel_snbep_unc_r3qpi.c \ pfmlib_intel_knc.c \ + pfmlib_intel_slm.c \ pfmlib_intel_netburst.c \ pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ @@ -217,7 +218,8 @@ INC_X86= pfmlib_intel_x86_priv.h \ events/intel_snbep_unc_ubo_events.h \ events/intel_snbep_unc_r2pcie_events.h \ events/intel_snbep_unc_r3qpi_events.h \ - events/intel_knc_events.h + events/intel_knc_events.h \ + events/intel_slm_events.h INC_MIPS=events/mips_74k_events.h events/mips_74k_events.h diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h new file mode 100644 index 0000000..c540e64 --- /dev/null +++ b/lib/events/intel_slm_events.h @@ -0,0 +1,896 @@ +/* + * Copyright (c) 2013 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * PMU: slm (Intel Silvermont) + */ + +static const intel_x86_umask_t slm_icache[]={ + { .uname = "ACCESSES", + .udesc = "Instruction fetches, including uncacheacble fetches", + .ucode = 0x300, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "MISSES", + .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "HIT", + .udesc = "Count all instructions fetches from the instruction cache", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_uops_retired[]={ + { .uname = "ANY", + .udesc = "Micro-ops retired", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "MS", + .udesc = "Micro-ops retired that were supplied fro MSROM", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "X87", + .udesc = "Micro-ops retired that used X87 hardware", + .ucode = 0x0200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "MUL", + .udesc = "Micro-ops retired that used MUL hardware", + .ucode = 0x0400, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "DIV", + .udesc = "Micro-ops retired that used DIV hardware", + .ucode = 0x0800, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "STALLED_CYCLES", + .udesc = "Cycles no micro-ops retired", + .ucode = 0x1000 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "STALLS", + .udesc = "Periods no micro-ops retired", + .ucode = 0x1000 | INTEL_X86_MOD_EDGE | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_inst_retired[]={ + { .uname = "ANY_P", + .udesc = "Instructions retired using generic counter (precise event)", + .ucode = 0x0, + .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, + }, + { .uname = "ANY", + .udesc = "Instructions retired using generic counter (precise event)", + .uequiv = "ANY_P", + .ucode = 0x0, + .uflags= INTEL_X86_PEBS, + }, +}; + +static const intel_x86_umask_t slm_l2_reject_xq[]={ + { .uname = "ALL", + .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indictes back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", + .ucode = 0x000, + .uflags= INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t slm_machine_clears[]={ + { .uname = "SMC", + .udesc = "Self-Modifying Code detected", + .ucode = 0x100, + .uflags= INTEL_X86_DFL, + }, + { .uname = "MEMORY_ORDERING", + .udesc = "Number of stalled cycles due to memory ordering", + .ucode = 0x200, + }, + { .uname = "FP_ASSIST", + .udesc = "Number of stalled cycle due to FPU assist", + .ucode = 0x400, + }, + { .uname = "ALL", + .udesc = "Count any the machine clears", + .ucode = 0x800, + }, + { .uname = "ANY", + .udesc = "Count any the machine clears", + .uequiv = "ALL", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t slm_br_inst_retired[]={ + { .uname = "ANY", + .udesc = "Any retired branch instruction (Precise Event)", + .ucode = 0x0, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, + }, + { .uname = "ALL_BRANCHES", + .udesc = "Any Retired branch instruction (Precise Event)", + .uequiv = "ANY", + .ucode = 0x0, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "JCC", + .udesc = "JCC instructions retired (Precise Event)", + .ucode = 0x7e00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "TAKEN_JCC", + .udesc = "Taken JCC instructions retired (Precise Event)", + .ucode = 0xfe00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "CALL", + .udesc = "Near call instructions retired (Precise Event)", + .ucode = 0xf900, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "REL_CALL", + .udesc = "Near relative call instructions retired (Precise Event)", + .ucode = 0xfd00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "IND_CALL", + .udesc = "Near indirect call instructions retired (Precise Event)", + .ucode = 0xfb00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "RETURN", + .udesc = "Near ret instructions retired (Precise Event)", + .ucode = 0xc00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "NON_RETURN_IND", + .udesc = "Number of near indirect jmp and near indirect call instructions retired (Precise Event)", + .ucode = 0xeb00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "FAR_BRANCH", + .udesc = "Far branch instructions retired (Precise Event)", + .uequiv = "FAR", + .ucode = 0xbf00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "FAR", + .udesc = "Far branch instructions retired (Precise Event)", + .ucode = 0xbf00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, +}; + +static const intel_x86_umask_t slm_baclears[]={ + { .uname = "ANY", + .udesc = "BACLEARS asserted", + .uequiv = "ALL", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "ALL", + .udesc = "BACLEARS asserted", + .ucode = 0x100, + .uflags= INTEL_X86_DFL | INTEL_X86_NCOMBO, + }, + { .uname = "INDIRECT", + .udesc = "Number of baclears for indirect branches", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "UNCOND", + .udesc = "Number of baclears for unconditional branches", + .ucode = 0x400, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "RETURN", + .udesc = "Number of baclears for return branches", + .ucode = 0x800, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "COND", + .udesc = "Number of baclears for conditional branches", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_cpu_clk_unhalted[]={ + { .uname = "CORE_P", + .udesc = "Core cycles when core is not halted", + .ucode = 0x0, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "BUS", + .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", + .uequiv = "REF_P", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "REF_P", + .udesc = "Number of reference cycles that the core is not in a halted state. The core enters the halted state when it is running the HLT instruction. In mobile systems, the core frequency may change from time to time. This event is not affected by core frequency changes but counts as if the core is running a the same maximum frequency all the time", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_mem_uop_retired[]={ + { .uname = "LD_DCU_MISS", + .udesc = "Number of load uops retired that miss in L1 data cache. Note that prefetch misses will not be counted", + .ucode = 0x100, + }, + { .uname = "LD_L2_HIT", + .udesc = "Number of load uops retired that hit L2 (Precise Event)", + .ucode = 0x200, + .uflags= INTEL_X86_PEBS, + }, + { .uname = "LD_L2_MISS", + .udesc = "Number of load uops retired that missed L2 (Precise Event)", + .ucode = 0x400, + .uflags= INTEL_X86_PEBS, + }, + { .uname = "LD_DTLB_MISS", + .udesc = "Number of load uops retired that had a DTLB miss (Precise Event)", + .ucode = 0x800, + .uflags= INTEL_X86_PEBS, + }, + { .uname = "LD_UTLB_MISS", + .udesc = "Number of load uops retired that had a UTLB miss", + .ucode = 0x1000, + }, + { .uname = "HITM", + .udesc = "Number of load uops retired that got data from the other core or from the other module and the line was modified (Precise Event)", + .ucode = 0x2000, + .uflags= INTEL_X86_PEBS, + }, + { .uname = "ANY_LD", + .udesc = "Number of load uops retired", + .ucode = 0x4000, + }, + { .uname = "ANY_ST", + .udesc = "Number of store uops retired", + .ucode = 0x8000, + }, +}; + +static const intel_x86_umask_t slm_page_walks[]={ + { .uname = "D_SIDE_CYCLES", + .udesc = "Number of cycles when a D-side page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks", + .ucode = 0x100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "D_SIDE_WALKS", + .udesc = "Number of D-side page walks", + .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), + .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "I_SIDE_CYCLES", + .udesc = "Number of cycles when a I-side page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks", + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "I_SIDE_WALKS", + .udesc = "Number of I-side page walks", + .ucode = 0x200 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), + .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_llc_rqsts[]={ + { .uname = "MISS", + .udesc = "Number of L2 cache misses", + .ucode = 0x4100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "ANY", + .udesc = "Number of L2 cache references", + .ucode = 0x4f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t slm_rehabq[]={ + { .uname = "LD_BLOCK_ST_FORWARD", + .udesc = "Number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch (Precise Event)", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "LD_BLOCK_STD_NOTREADY", + .udesc = "Number of times forward was technically possible but did not occur because the store data was not available at the right time", + .ucode = 0x0200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "ST_SPLITS", + .udesc = "Number of retired stores that experienced cache line boundary splits", + .ucode = 0x0400, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "LD_SPLITS", + .udesc = "Number of retired loads that experienced cache line boundary splits (Precise Event)", + .ucode = 0x0800, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "LOCK", + .udesc = "Number of retired memory operations with lock semantics. These are either implicit locked instructions such as XCHG or instructions with an explicit LOCK prefix", + .ucode = 0x1000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "STA_FULL", + .udesc = "Number of retired stores that are delayed becuase there is not a store address buffer available", + .ucode = 0x2000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "ANY_LD", + .udesc = "Number of load uops reissued from RehabQ", + .ucode = 0x4000, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "ANY_ST", + .udesc = "Number of store uops reissued from RehabQ", + .ucode = 0x8000, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_offcore_response[]={ + { .uname = "DMND_DATA_RD", + .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", + .ucode = 1ULL << (0 + 8), + .grpid = 0, + }, + { .uname = "DMND_RFO", + .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", + .ucode = 1ULL << (1 + 8), + .grpid = 0, + }, + { .uname = "DMND_IFETCH", + .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", + .ucode = 1ULL << (2 + 8), + .grpid = 0, + }, + { .uname = "WB", + .udesc = "Request: number of writebacks (modified to exclusive) transactions", + .ucode = 1ULL << (3 + 8), + .grpid = 0, + }, + { .uname = "PF_L2_DATA_RD", + .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", + .ucode = 1ULL << (4 + 8), + .grpid = 0, + }, + { .uname = "PF_RFO", + .udesc = "Request: number of RFO requests generated by L2 prefetchers", + .ucode = 1ULL << (5 + 8), + .grpid = 0, + }, + { .uname = "PF_IFETCH", + .udesc = "Request: number of code reads generated by L2 prefetchers", + .ucode = 1ULL << (6 + 8), + .grpid = 0, + }, + { .uname = "PARTIAL_READ", + .udesc = "Request: number of demand reads of partial cachelines (including UC, WC)", + .ucode = 1ULL << (7 + 8), + .grpid = 0, + }, + { .uname = "PARTIAL_WRITE", + .udesc = "Request: number of demand RFO requests to write to partial cache lines (includes UC, WT, WP)", + .ucode = 1ULL << (8 + 8), + .grpid = 0, + }, + { .uname = "UC_IFETCH", + .udesc = "Request: number of UC instruction fetches", + .ucode = 1ULL << (9 + 8), + .grpid = 0, + }, + { .uname = "BUS_LOCKS", + .udesc = "Request: number bus lock and split lock requests", + .ucode = 1ULL << (10 + 8), + .grpid = 0, + }, + { .uname = "STRM_ST", + .udesc = "Request: number of streaming store requests", + .ucode = 1ULL << (11 + 8), + .grpid = 0, + }, + { .uname = "SW_PREFETCH", + .udesc = "Request: number of software prefetch requests", + .ucode = 1ULL << (12 + 8), + .grpid = 0, + }, + { .uname = "PF_L1_DATA_RD", + .udesc = "Request: number of data cacheline reads generated by L1 prefetchers", + .ucode = 1ULL << (13 + 8), + .grpid = 0, + }, + { .uname = "PARTIAL_STRM_ST", + .udesc = "Request: number of partial streaming store requests", + .ucode = 1ULL << (14 + 8), + .grpid = 0, + }, + { .uname = "OTHER", + .udesc = "Request: counts one any other request that crosses IDI, including I/O", + .ucode = 1ULL << (15+8), + .grpid = 0, + }, + { .uname = "ANY_IFETCH", + .udesc = "Request: combination of PF_IFETCH | DMND_IFETCH | UC_IFETCH", + .uequiv = "PF_IFETCH:DMND_IFETCH:UC_IFETCH", + .ucode = (1ULL << 6 | 1ULL << 2 | 1ULL << 9) << 8, + .grpid = 0, + }, + { .uname = "ANY_REQUEST", + .udesc = "Request: combination of all request umasks", + .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_L2_DATA_RD:PF_RFO:PF_IFETCH:PARTIAL_READ:PARTIAL_WRITE:UC_IFETCH:BUS_LOCKS:STRM_ST:SW_PREFETCH:PF_L1_DATA_RD:PARTIAL_STRM_ST:OTHER", + .ucode = 0xffff00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + .grpid = 0, + }, + { .uname = "ANY_DATA", + .udesc = "Request: combination of DMND_DATA | PF_L1_DATA_RD | PF_L2_DATA_RD", + .uequiv = "DMND_DATA_RD:PF_L1_DATA_RD:PF_L2_DATA_RD", + .ucode = (1ULL << 0 | 1ULL << 4 | 1ULL << 13) << 8, + .grpid = 0, + }, + { .uname = "ANY_RFO", + .udesc = "Request: combination of DMND_RFO | PF_RFO", + .uequiv = "DMND_RFO:PF_RFO", + .ucode = (1ULL << 1 | 1ULL << 5) << 8, + .grpid = 0, + }, + { .uname = "ANY_RESPONSE", + .udesc = "Response: count any response type", + .ucode = 1ULL << (16+8), + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, + .grpid = 1, + }, + { .uname = "L2_HIT", + .udesc = "Supplier: counts L2 hits in M/E/S states", + .ucode = 1ULL << (18+8), + .grpid = 1, + }, + { .uname = "SNP_NONE", + .udesc = "Snoop: counts number of times no snoop-related information is available", + .ucode = 1ULL << (31+8), + .grpid = 2, + }, + { .uname = "SNP_MISS", + .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", + .ucode = 1ULL << (33+8), + .grpid = 2, + }, + { .uname = "SNP_HIT", + .udesc = "Snoop: counts number of times a snoop hits in the other module where no modified copies were found in the L1 cache of the other core", + .ucode = 1ULL << (34+8), + .grpid = 2, + }, + { .uname = "SNP_HITM", + .udesc = "Snoop: counts number of times a snoop hits in the other module where modified copies were found in the L1 cache of the other core", + .ucode = 1ULL << (36+8), + .grpid = 2, + }, + { .uname = "NON_DRAM", + .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", + .ucode = 1ULL << (37+8), + .grpid = 2, + }, + { .uname = "SNP_ANY", + .udesc = "Snoop: any snoop reason", + .ucode = 0x7dULL << (31+8), + .uequiv = "SNP_NONE:SNP_MISS:SNP_HIT:SNP_HITM:NON_DRAM", + .uflags= INTEL_X86_DFL, + .grpid = 2, + }, +}; + +static const intel_x86_umask_t slm_br_misp_inst_retired[]={ + { .uname = "ALL_BRANCHES", + .udesc = "All mispredicted branches (Precise Event)", + .uequiv = "ANY", + .ucode = 0x0000, /* architectural encoding */ + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "ANY", + .udesc = "All mispredicted branches (Precise Event)", + .ucode = 0x0000, /* architectural encoding */ + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, + }, + { .uname = "JCC", + .udesc = "Number of mispredicted conditional branch instructions retired (Precise Event)", + .ucode = 0x7e00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "NON_RETURN_IND", + .udesc = "Number of mispredicted non-return branch instructions retired (Precise Event)", + .ucode = 0xeb00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "FAR", + .udesc = "Number of mispredicted far branch instructions retired (Precise Event)", + .ucode = 0xbf00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "RETURN", + .udesc = "Number of mispredicted return branch instructions retired (Precise Event)", + .ucode = 0xf700, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "CALL", + .udesc = "Number of mispredicted call branch instructions retired (Precise Event)", + .ucode = 0xf900, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "IND_CALL", + .udesc = "Number of mispredicted indirect call branch instructions retired (Precise Event)", + .ucode = 0xfb00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "REL_CALL", + .udesc = "Number of mispredicted relative call branch instructions retired (Precise Event)", + .ucode = 0xfd00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "TAKEN_JCC", + .udesc = "Number of mispredicted taken conditional branch instructions retired (Precise Event)", + .ucode = 0xfe00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, +}; + +static const intel_x86_umask_t slm_no_alloc_cycles[]={ + { .uname = "ANY", + .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated for any reason", + .ucode = 0x3f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "NOT_DELIVERED", + .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated but the back-end is not stalled", + .ucode = 0x5000, + .uflags= INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t slm_rs_full_stall[]={ + { .uname = "MEC", + .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for the MEC cluster is full", + .ucode = 0x0100, + }, + { .uname = "IEC_PORT0", + .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 0 integer cluster is full", + .ucode = 0x0200, + }, + { .uname = "IEC_PORT1", + .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 1 integer cluster is full", + .ucode = 0x0400, + }, + { .uname = "FPC_PORT0", + .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 0 floating-pointer cluster is full", + .ucode = 0x0800, + }, + { .uname = "FPC_PORT1", + .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for port 1 floating-pointer cluster is full", + .ucode = 0x1000, + }, + { .uname = "ANY", + .udesc = "Number of cycles when the allocation pipeline is stalled due any one of the RS being full", + .ucode = 0x1f00, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t slm_cycles_div_busy[]={ + { .uname = "ANY", + .udesc = "Number of cycles the divider is busy", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t slm_ms_decoded[]={ + { .uname = "ENTRY", + .udesc = "Number of times the MSROM starts a flow of uops", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t slm_decode_restriction[]={ + { .uname = "PREDECODE_WRONG", + .udesc = "Number of times the prediction (from the predecode cache) for intruction length is incorrect", + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_entry_t intel_slm_pe[]={ +{ .name = "UNHALTED_CORE_CYCLES", + .desc = "Unhalted core cycles", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x200000003ull, + .code = 0x3c, +}, +{ .name = "UNHALTED_REFERENCE_CYCLES", + .desc = "Unhalted reference cycle", + .modmsk = INTEL_FIXED3_ATTRS, + .cntmsk = 0x400000000ull, + .code = 0x0300, /* pseudo encoding */ + .flags = INTEL_X86_FIXED, +}, +{ .name = "INSTRUCTION_RETIRED", + .desc = "Instructions retired", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x100000003ull, + .code = 0xc0, +}, +{ .name = "INSTRUCTIONS_RETIRED", + .desc = "This is an alias for INSTRUCTION_RETIRED", + .modmsk = INTEL_V2_ATTRS, + .equiv = "INSTRUCTION_RETIRED", + .cntmsk = 0x10003, + .code = 0xc0, +}, +{ .name = "LLC_REFERENCES", + .desc = "Last level of cache references", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x4f2e, +}, +{ .name = "LAST_LEVEL_CACHE_REFERENCES", + .desc = "This is an alias for LLC_REFERENCES", + .modmsk = INTEL_V2_ATTRS, + .equiv = "LLC_REFERENCES", + .cntmsk = 0x3, + .code = 0x4f2e, +}, +{ .name = "LLC_MISSES", + .desc = "Last level of cache misses", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x412e, +}, +{ .name = "LAST_LEVEL_CACHE_MISSES", + .desc = "This is an alias for LLC_MISSES", + .modmsk = INTEL_V2_ATTRS, + .equiv = "LLC_MISSES", + .cntmsk = 0x3, + .code = 0x412e, +}, +{ .name = "BRANCH_INSTRUCTIONS_RETIRED", + .desc = "Branch instructions retired", + .modmsk = INTEL_V2_ATTRS, + .equiv = "BR_INST_RETIRED:ANY", + .cntmsk = 0x3, + .code = 0xc4, +}, +{ .name = "MISPREDICTED_BRANCH_RETIRED", + .desc = "Mispredicted branch instruction retired", + .equiv = "BR_MISP_INST_RETIRED", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc5, + .flags= INTEL_X86_PEBS, +}, +/* begin model specific events */ +{ .name = "DECODE_RESTRICTION", + .desc = "Instruction length prediction delay", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xe9, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(slm_decode_restriction), + .umasks = slm_decode_restriction, +}, +{ .name = "L2_REJECT_XQ", + .desc = "Rejected L2 requests to XQ", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x30, + .numasks = LIBPFM_ARRAY_SIZE(slm_l2_reject_xq), + .ngrp = 1, + .umasks = slm_l2_reject_xq, +}, +{ .name = "ICACHE", + .desc = "Instruction fetches", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x80, + .numasks = LIBPFM_ARRAY_SIZE(slm_icache), + .ngrp = 1, + .umasks = slm_icache, +}, +{ .name = "UOPS_RETIRED", + .desc = "Micro-ops retired", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc2, + .numasks = LIBPFM_ARRAY_SIZE(slm_uops_retired), + .ngrp = 1, + .umasks = slm_uops_retired, +}, +{ .name = "INST_RETIRED", + .desc = "Instructions retired", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc0, + .flags= INTEL_X86_PEBS, + .numasks = LIBPFM_ARRAY_SIZE(slm_inst_retired), + .ngrp = 1, + .umasks = slm_inst_retired, +}, +{ .name = "CYCLES_DIV_BUSY", + .desc = "Cycles the divider is busy", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xcd, + .numasks = LIBPFM_ARRAY_SIZE(slm_cycles_div_busy), + .ngrp = 1, + .umasks = slm_cycles_div_busy, +}, +{ .name = "RS_FULL_STALL", + .desc = "RS full", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xcb, + .numasks = LIBPFM_ARRAY_SIZE(slm_rs_full_stall), + .ngrp = 1, + .umasks = slm_rs_full_stall, +}, +{ .name = "LLC_RQSTS", + .desc = "L2 cache requests", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x2e, + .numasks = LIBPFM_ARRAY_SIZE(slm_llc_rqsts), + .ngrp = 1, + .umasks = slm_llc_rqsts, +}, +{ .name = "MACHINE_CLEARS", + .desc = "Self-Modifying Code detected", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc3, + .numasks = LIBPFM_ARRAY_SIZE(slm_machine_clears), + .ngrp = 1, + .umasks = slm_machine_clears, +}, +{ .name = "BR_INST_RETIRED", + .desc = "Retired branch instructions", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc4, + .numasks = LIBPFM_ARRAY_SIZE(slm_br_inst_retired), + .flags= INTEL_X86_PEBS, + .ngrp = 1, + .umasks = slm_br_inst_retired, +}, +{ .name = "BR_MISP_INST_RETIRED", + .desc = "Mispredicted retired branch instructions", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xc5, + .flags= INTEL_X86_PEBS, + .numasks = LIBPFM_ARRAY_SIZE(slm_br_misp_inst_retired), + .ngrp = 1, + .umasks = slm_br_misp_inst_retired, +}, +{ .name = "MS_DECODED", + .desc = "MS decoder", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xe7, + .numasks = LIBPFM_ARRAY_SIZE(slm_ms_decoded), + .ngrp = 1, + .umasks = slm_ms_decoded, +}, +{ .name = "BACLEARS", + .desc = "Branch address calculator", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xe6, + .numasks = LIBPFM_ARRAY_SIZE(slm_baclears), + .ngrp = 1, + .umasks = slm_baclears, +}, +{ .name = "NO_ALLOC_CYCLES", + .desc = "Front-end allocation", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0xca, + .numasks = LIBPFM_ARRAY_SIZE(slm_no_alloc_cycles), + .ngrp = 1, + .umasks = slm_no_alloc_cycles, +}, +{ .name = "CPU_CLK_UNHALTED", + .desc = "Core cycles when core is not halted", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x3c, + .numasks = LIBPFM_ARRAY_SIZE(slm_cpu_clk_unhalted), + .ngrp = 1, + .umasks = slm_cpu_clk_unhalted, +}, +{ .name = "MEM_UOP_RETIRED", + .desc = "Retired loads micro-ops", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x4, + .flags= INTEL_X86_PEBS, + .numasks = LIBPFM_ARRAY_SIZE(slm_mem_uop_retired), + .ngrp = 1, + .umasks = slm_mem_uop_retired, +}, +{ .name = "PAGE_WALKS", + .desc = "Number of page-walks executed", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x5, + .numasks = LIBPFM_ARRAY_SIZE(slm_page_walks), + .ngrp = 1, + .umasks = slm_page_walks, +}, +{ .name = "CORE_REJECT", + .desc = "Demand and L1 prefetcher requests rejected by L2", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x31, +}, +{ .name = "REHABQ", + .desc = "Memory reference queue", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x3, + .code = 0x03, + .flags= INTEL_X86_PEBS, + .numasks = LIBPFM_ARRAY_SIZE(slm_rehabq), + .ngrp = 1, + .umasks = slm_rehabq, +}, +{ .name = "OFFCORE_RESPONSE_0", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xf, + .code = 0x01b7, + .flags= INTEL_X86_NHM_OFFCORE, + .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), + .ngrp = 3, + .umasks = slm_offcore_response, +}, +{ .name = "OFFCORE_RESPONSE_1", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", + .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xf, + .code = 0x02b7, + .flags= INTEL_X86_NHM_OFFCORE, + .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), + .ngrp = 3, + .umasks = slm_offcore_response, /* identical to actual umasks list for this event */ +}, +}; diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index bf28291..0ed9441 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -119,6 +119,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &intel_snbep_unc_r3qpi0_support, &intel_snbep_unc_r3qpi1_support, &intel_knc_support, + &intel_slm_support, &intel_x86_arch_support, /* must always be last for x86 */ #endif @@ -826,6 +827,7 @@ pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) goto found_attr; } } + DPRINT("cannot find attribute %s\n", s); return PFM_ERR_ATTR; found_attr: type = ainfo->type; diff --git a/lib/pfmlib_intel_slm.c b/lib/pfmlib_intel_slm.c new file mode 100644 index 0000000..ba61450 --- /dev/null +++ b/lib/pfmlib_intel_slm.c @@ -0,0 +1,89 @@ +/* + * pfmlib_intel_slm.c : Intel Silvermont core PMU + * + * Copyright (c) 2013 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Based on Intel Software Optimization Guide June 2013 + */ + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "events/intel_slm_events.h" + +static int +pfm_intel_slm_detect(void *this) +{ + int ret; + + ret = pfm_intel_x86_detect(); + if (ret != PFM_SUCCESS) + return ret; + if (pfm_intel_x86_cfg.family != 6) + return PFM_ERR_NOTSUPP; + + switch(pfm_intel_x86_cfg.model) { + case 55: /* Silvermont */ + case 77: /* Silvermont Avoton */ + break; + default: + return PFM_ERR_NOTSUPP; + } + return PFM_SUCCESS; +} + +static int +pfm_intel_slm_init(void *this) +{ + pfm_intel_x86_cfg.arch_version = 2; + return PFM_SUCCESS; +} + +pfmlib_pmu_t intel_slm_support={ + .desc = "Intel Silvermont", + .name = "slm", + .pmu = PFM_PMU_INTEL_SLM, + .pme_count = LIBPFM_ARRAY_SIZE(intel_slm_pe), + .type = PFM_PMU_TYPE_CORE, + .num_cntrs = 4, + .num_fixed_cntrs = 3, + .max_encoding = 2, + .pe = intel_slm_pe, + .atdesc = intel_x86_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK + | INTEL_X86_PMU_FL_ECMASK, + .supported_plm = INTEL_X86_PLM, + + .pmu_detect = pfm_intel_slm_detect, + .pmu_init = pfm_intel_slm_init, + + .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), + + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, +}; diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c index 34c843d..baa9f63 100644 --- a/lib/pfmlib_intel_x86.c +++ b/lib/pfmlib_intel_x86.c @@ -599,6 +599,7 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) e->codes[0] = reg.val; +DPRINT("sel_edge=%d cnt=%d\n", reg.sel_edge, reg.sel_cnt_mask); /* * on recent processors (except Atom), edge requires cmask >=1 */ diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 59e4235..75a2d30 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -262,6 +262,7 @@ extern pfmlib_pmu_t intel_snbep_unc_r2pcie_support; extern pfmlib_pmu_t intel_snbep_unc_r3qpi0_support; extern pfmlib_pmu_t intel_snbep_unc_r3qpi1_support; extern pfmlib_pmu_t intel_knc_support; +extern pfmlib_pmu_t intel_slm_support; extern pfmlib_pmu_t power4_support; extern pfmlib_pmu_t ppc970_support; extern pfmlib_pmu_t ppc970mp_support; diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 6690a96..cb781b9 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -2092,6 +2092,95 @@ static const test_event_t x86_test_events[]={ .name = "rapl::rapl_energy_cores:u", .ret = PFM_ERR_ATTR, }, + { SRC_LINE, + .name = "slm::offcore_response_0:snp_hitm", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1]=0x100001ffffull, + .fstr = "slm::OFFCORE_RESPONSE_0:ANY_REQUEST:ANY_RESPONSE:SNP_HITM:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_0:any_data", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1]=0x12011, + .fstr = "slm::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_L2_DATA_RD:PF_L1_DATA_RD:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_0:uc_ifetch", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1]=0x10200, + .fstr = "slm::OFFCORE_RESPONSE_0:UC_IFETCH:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_0:any_ifetch", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1]=0x10244, + .fstr = "slm::OFFCORE_RESPONSE_0:DMND_IFETCH:PF_IFETCH:UC_IFETCH:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_1:snp_hitm", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5302b7, + .codes[1]=0x100001ffffull, + .fstr = "slm::OFFCORE_RESPONSE_1:ANY_REQUEST:ANY_RESPONSE:SNP_HITM:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_1:any_data", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5302b7, + .codes[1]=0x12011, + .fstr = "slm::OFFCORE_RESPONSE_1:DMND_DATA_RD:PF_L2_DATA_RD:PF_L1_DATA_RD:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_1:uc_ifetch", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5302b7, + .codes[1]=0x10200, + .fstr = "slm::OFFCORE_RESPONSE_1:UC_IFETCH:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::offcore_response_1:any_ifetch", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5302b7, + .codes[1]=0x10244, + .fstr = "slm::OFFCORE_RESPONSE_1:DMND_IFETCH:PF_IFETCH:UC_IFETCH:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::decode_restriction:predecode_wrong", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x5301e9, + .fstr = "slm::DECODE_RESTRICTION:PREDECODE_WRONG:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::rs_full_stall:fpc_port0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x5308cb, + .fstr = "slm::RS_FULL_STALL:FPC_PORT0:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::no_alloc_cycles:any", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x533fca, + .fstr = "slm::NO_ALLOC_CYCLES:ANY:k=1:u=1:e=0:i=0:c=0", + }, + { SRC_LINE, + .name = "slm::no_alloc_cycles:any:t=1", + .ret = PFM_ERR_ATTR + }, }; #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) commit 93f4b19b49ee849cd3f822e87be12238305c40ba Author: Stephane Eranian Date: Sun Dec 1 10:27:53 2013 +0100 add AMD Fam15h Norhtbridge PMU support This patch adds proper support for AMD Fam15h uncore PMU (Northbridge). The Northbridge (NB) events were in the core PMU event list before. But on Fam15h, the NB PMU is completely distinct and thus must be implemented in a separate table. Furthermore, the NB perf_event support is also independent from core PMU and uses dynamic PMU registration. This patch handles this correctly too. The test suite is updated to take those changes into consideration. Signed-off-by: Stephane Eranian diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index b89e661..ac9ed03 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -194,6 +194,7 @@ typedef enum { PFM_PMU_INTEL_RAPL, /* Intel RAPL */ PFM_PMU_INTEL_SLM, /* Intel Silvermont */ + PFM_PMU_AMD64_FAM15H_NB, /* AMD AMD64 Fam15h NorthBridge */ /* MUST ADD NEW PMU MODELS HERE */ diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h index ac2b111..7872468 100644 --- a/lib/events/amd64_events_fam15h.h +++ b/lib/events/amd64_events_fam15h.h @@ -2295,228 +2295,4 @@ static const amd64_entry_t amd64_fam15h_pe[]={ .modmsk = AMD64_FAM15H_ATTRS, .code = 0x1d8, }, -{ .name = "DRAM_ACCESSES", - .desc = "DRAM Accesses", - .code = 0xe0, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_dram_accesses), - .ngrp = 1, - .umasks = amd64_fam15h_dram_accesses, -}, -{ .name = "DRAM_CONTROLLER_PAGE_TABLE_OVERFLOWS", - .desc = "DRAM Controller Page Table Overflows", - .code = 0xe1, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_dram_controller_page_table_overflows), - .ngrp = 1, - .umasks = amd64_fam15h_dram_controller_page_table_overflows, -}, -{ .name = "MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED", - .desc = "Memory Controller DRAM Command Slots Missed", - .code = 0xe2, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_dram_command_slots_missed), - .ngrp = 1, - .umasks = amd64_fam15h_memory_controller_dram_command_slots_missed, -}, -{ .name = "MEMORY_CONTROLLER_TURNAROUNDS", - .desc = "Memory Controller Turnarounds", - .code = 0xe3, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_turnarounds), - .ngrp = 1, - .umasks = amd64_fam15h_memory_controller_turnarounds, -}, -{ .name = "MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION", - .desc = "Memory Controller Bypass Counter Saturation", - .code = 0xe4, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_bypass_counter_saturation), - .ngrp = 1, - .umasks = amd64_fam15h_memory_controller_bypass_counter_saturation, -}, -{ .name = "THERMAL_STATUS", - .desc = "Thermal Status", - .code = 0xe8, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_thermal_status), - .ngrp = 1, - .umasks = amd64_fam15h_thermal_status, -}, -{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", - .desc = "CPU/IO Requests to Memory/IO", - .code = 0xe9, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_io_requests_to_memory_io), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_io_requests_to_memory_io, -}, -{ .name = "CACHE_BLOCK_COMMANDS", - .desc = "Cache Block Commands", - .code = 0xea, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cache_block_commands), - .ngrp = 1, - .umasks = amd64_fam15h_cache_block_commands, -}, -{ .name = "SIZED_COMMANDS", - .desc = "Sized Commands", - .code = 0xeb, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_sized_commands), - .ngrp = 1, - .umasks = amd64_fam15h_sized_commands, -}, -{ .name = "PROBE_RESPONSES_AND_UPSTREAM_REQUESTS", - .desc = "Probe Responses and Upstream Requests", - .code = 0xec, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_probe_responses_and_upstream_requests), - .ngrp = 1, - .umasks = amd64_fam15h_probe_responses_and_upstream_requests, -}, -{ .name = "GART_EVENTS", - .desc = "GART Events", - .code = 0xee, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_gart_events), - .ngrp = 1, - .umasks = amd64_fam15h_gart_events, -}, -{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_0", - .desc = "Link Transmit Bandwidth Link 0", - .code = 0xf6, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), - .ngrp = 2, - .umasks = amd64_fam15h_link_transmit_bandwidth, -}, -{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_1", - .desc = "Link Transmit Bandwidth Link 1", - .code = 0xf7, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), - .ngrp = 2, - .umasks = amd64_fam15h_link_transmit_bandwidth, -}, -{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_2", - .desc = "Link Transmit Bandwidth Link 2", - .code = 0xf8, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), - .ngrp = 2, - .umasks = amd64_fam15h_link_transmit_bandwidth, -}, -{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_3", - .desc = "Link Transmit Bandwidth Link 3", - .code = 0x1f9, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_link_transmit_bandwidth), - .ngrp = 2, - .umasks = amd64_fam15h_link_transmit_bandwidth, -}, -{ .name = "CPU_TO_DRAM_REQUESTS_TO_TARGET_NODE", - .desc = "CPU to DRAM Requests to Target Node", - .code = 0x1e0, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_to_dram_requests_to_target_node), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_to_dram_requests_to_target_node, -}, -{ .name = "IO_TO_DRAM_REQUESTS_TO_TARGET_NODE", - .desc = "IO to DRAM Requests to Target Node", - .code = 0x1e1, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_io_to_dram_requests_to_target_node), - .ngrp = 1, - .umasks = amd64_fam15h_io_to_dram_requests_to_target_node, -}, -{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_0_3", - .desc = "CPU Read Command Latency to Target Node 0-3", - .code = 0x1e2, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_0_3), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_0_3, -}, -{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_0_3", - .desc = "CPU Read Command Requests to Target Node 0-3", - .code = 0x1e3, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_0_3), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_0_3, -}, -{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_4_7", - .desc = "CPU Read Command Latency to Target Node 4-7", - .code = 0x1e4, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_4_7), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_4_7, -}, -{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_4_7", - .desc = "CPU Read Command Requests to Target Node 4-7", - .code = 0x1e5, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_read_command_requests_to_target_node_4_7), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_read_command_requests_to_target_node_4_7, -}, -{ .name = "CPU_COMMAND_LATENCY_TO_TARGET_NODE", - .desc = "CPU Command Latency to Target Node", - .code = 0x1e6, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_command_requests_to_target_node), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_command_requests_to_target_node, -}, -{ .name = "CPU_REQUESTS_TO_TARGET_NODE", - .desc = "CPU Requests to Target Node", - .code = 0x1e7, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cpu_command_requests_to_target_node), - .ngrp = 1, - .umasks = amd64_fam15h_cpu_command_requests_to_target_node, -}, -{ .name = "REQUEST_CACHE_STATUS_0", - .desc = "Request Cache Status 0", - .code = 0x1ea, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_request_cache_status_0), - .ngrp = 1, - .umasks = amd64_fam15h_request_cache_status_0, -}, -{ .name = "REQUEST_CACHE_STATUS_1", - .desc = "Request Cache Status 1", - .code = 0x1eb, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_request_cache_status_1), - .ngrp = 1, - .umasks = amd64_fam15h_request_cache_status_1, -}, -{ .name = "MEMORY_CONTROLLER_REQUESTS", - .desc = "Memory Controller Requests", - .code = 0x1f0, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_controller_requests), - .ngrp = 1, - .umasks = amd64_fam15h_memory_controller_requests, -}, -{ .name = "READ_REQUEST_TO_L3_CACHE", - .desc = "Read Request to L3 Cache", - .code = 0x4e0, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), - .ngrp = 2, - .umasks = amd64_fam15h_read_request_to_l3_cache, -}, -{ .name = "L3_CACHE_MISSES", - .desc = "L3 Cache Misses", - .code = 0x4e1, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), - .ngrp = 2, - .umasks = amd64_fam15h_read_request_to_l3_cache, -}, -{ .name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", - .desc = "L3 Fills caused by L2 Evictions", - .code = 0x4e2, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_fills_caused_by_l2_evictions), - .ngrp = 2, - .umasks = amd64_fam15h_l3_fills_caused_by_l2_evictions, -}, -{ .name = "L3_EVICTIONS", - .desc = "L3 Evictions", - .code = 0x4e3, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_evictions), - .ngrp = 1, - .umasks = amd64_fam15h_l3_evictions, -}, -{ .name = "NON_CANCELED_L3_READ_REQUESTS", - .desc = "Non-canceled L3 Read Requests", - .code = 0x4ed, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_read_request_to_l3_cache), - .ngrp = 2, - .umasks = amd64_fam15h_read_request_to_l3_cache, -}, -{ .name = "L3_LATENCY", - .desc = "L3 Latency", - .code = 0x4ef, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l3_latency), - .ngrp = 1, - .umasks = amd64_fam15h_l3_latency, -}, }; diff --git a/lib/events/amd64_events_fam15h_nb.h b/lib/events/amd64_events_fam15h_nb.h new file mode 100644 index 0000000..5969eb6 --- /dev/null +++ b/lib/events/amd64_events_fam15h_nb.h @@ -0,0 +1,2022 @@ +/* + * Copyright (c) 2013 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: amd64_fam15h_nb_nb (AMD64 Fam15h Interlagos NorthBridge) + * + * Based on libpfm patch by Robert Richter : + * Family 15h Microarchitecture performance monitor events + * + * History: + * + * Nov 30 2013 -- Stephane Eranian , eranian@gmail.com: + * Split core and Northbridge events as PMU is distinct + * + * Apr 29 2011 -- Robert Richter, robert.richter@amd.com: + * Source: BKDG for AMD Family 15h Models 00h-0Fh Processors, + * 42301, Rev 1.15, April 18, 2011 + * + * Dec 09 2010 -- Robert Richter, robert.richter@amd.com: + * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h + * Processors, Rev 0.90, May 18, 2010 + */ + +#define CORE_SELECT(b) \ + { .uname = "CORE_0",\ + .udesc = "Measure on Core0",\ + .ucode = 0 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_1",\ + .udesc = "Measure on Core1",\ + .ucode = 1 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_2",\ + .udesc = "Measure on Core2",\ + .ucode = 2 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_3",\ + .udesc = "Measure on Core3",\ + .ucode = 3 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_4",\ + .udesc = "Measure on Core4",\ + .ucode = 4 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_5",\ + .udesc = "Measure on Core5",\ + .ucode = 5 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_6",\ + .udesc = "Measure on Core6",\ + .ucode = 6 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "CORE_7",\ + .udesc = "Measure on Core7",\ + .ucode = 7 << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO,\ + },\ + { .uname = "ANY_CORE",\ + .udesc = "Measure on any core",\ + .ucode = 0xf << 4,\ + .grpid = b,\ + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL,\ + } + +static const amd64_umask_t amd64_fam15h_nb_dispatched_fpu_ops[]={ + { .uname = "OPS_PIPE0", + .udesc = "Total number uops assigned to Pipe 0", + .ucode = 0x1, + }, + { .uname = "OPS_PIPE1", + .udesc = "Total number uops assigned to Pipe 1", + .ucode = 0x2, + }, + { .uname = "OPS_PIPE2", + .udesc = "Total number uops assigned to Pipe 2", + .ucode = 0x4, + }, + { .uname = "OPS_PIPE3", + .udesc = "Total number uops assigned to Pipe 3", + .ucode = 0x8, + }, + { .uname = "OPS_DUAL_PIPE0", + .udesc = "Total number dual-pipe uops assigned to Pipe 0", + .ucode = 0x10, + }, + { .uname = "OPS_DUAL_PIPE1", + .udesc = "Total number dual-pipe uops assigned to Pipe 1", + .ucode = 0x20, + }, + { .uname = "OPS_DUAL_PIPE2", + .udesc = "Total number dual-pipe uops assigned to Pipe 2", + .ucode = 0x40, + }, + { .uname = "OPS_DUAL_PIPE3", + .udesc = "Total number dual-pipe uops assigned to Pipe 3", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_retired_sse_ops[]={ + { .uname = "SINGLE_ADD_SUB_OPS", + .udesc = "Single-precision add/subtract FLOPS", + .ucode = 0x1, + }, + { .uname = "SINGLE_MUL_OPS", + .udesc = "Single-precision multiply FLOPS", + .ucode = 0x2, + }, + { .uname = "SINGLE_DIV_OPS", + .udesc = "Single-precision divide/square root FLOPS", + .ucode = 0x4, + }, + { .uname = "SINGLE_MUL_ADD_OPS", + .udesc = "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .ucode = 0x8, + }, + { .uname = "DOUBLE_ADD_SUB_OPS", + .udesc = "Double precision add/subtract FLOPS", + .ucode = 0x10, + }, + { .uname = "DOUBLE_MUL_OPS", + .udesc = "Double precision multiply FLOPS", + .ucode = 0x20, + }, + { .uname = "DOUBLE_DIV_OPS", + .udesc = "Double precision divide/square root FLOPS", + .ucode = 0x40, + }, + { .uname = "DOUBLE_MUL_ADD_OPS", + .udesc = "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_move_scalar_optimization[]={ + { .uname = "SSE_MOVE_OPS", + .udesc = "Number of SSE Move Ops", + .ucode = 0x1, + }, + { .uname = "SSE_MOVE_OPS_ELIM", + .udesc = "Number of SSE Move Ops eliminated", + .ucode = 0x2, + }, + { .uname = "OPT_CAND", + .udesc = "Number of Ops that are candidates for optimization (Z-bit set or pass)", + .ucode = 0x4, + }, + { .uname = "SCALAR_OPS_OPTIMIZED", + .udesc = "Number of Scalar ops optimized", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_retired_serializing_ops[]={ + { .uname = "SSE_RETIRED", + .udesc = "SSE bottom-executing uops retired", + .ucode = 0x1, + }, + { .uname = "SSE_MISPREDICTED", + .udesc = "SSE control word mispredict traps due to mispredictions", + .ucode = 0x2, + }, + { .uname = "X87_RETIRED", + .udesc = "X87 bottom-executing uops retired", + .ucode = 0x4, + }, + { .uname = "X87_MISPREDICTED", + .udesc = "X87 control word mispredict traps due to mispredictions", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_segment_register_loads[]={ + { .uname = "ES", + .udesc = "ES", + .ucode = 0x1, + }, + { .uname = "CS", + .udesc = "CS", + .ucode = 0x2, + }, + { .uname = "SS", + .udesc = "SS", + .ucode = 0x4, + }, + { .uname = "DS", + .udesc = "DS", + .ucode = 0x8, + }, + { .uname = "FS", + .udesc = "FS", + .ucode = 0x10, + }, + { .uname = "GS", + .udesc = "GS", + .ucode = 0x20, + }, + { .uname = "HS", + .udesc = "HS", + .ucode = 0x40, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_load_q_store_q_full[]={ + { .uname = "LOAD_QUEUE", + .udesc = "The number of cycles that the load buffer is full", + .ucode = 0x1, + }, + { .uname = "STORE_QUEUE", + .udesc = "The number of cycles that the store buffer is full", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_locked_ops[]={ + { .uname = "EXECUTED", + .udesc = "Number of locked instructions executed", + .ucode = 0x1, + }, + { .uname = "CYCLES_NON_SPECULATIVE_PHASE", + .udesc = "Number of cycles spent in non-speculative phase, excluding cache miss penalty", + .ucode = 0x4, + }, + { .uname = "CYCLES_WAITING", + .udesc = "Number of cycles spent in non-speculative phase, including the cache miss penalty", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xd, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cancelled_store_to_load[]={ + { .uname = "SIZE_ADDRESS_MISMATCHES", + .udesc = "Store is smaller than load or different starting byte but partial overlap", + .ucode = 0x1, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x1, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_data_cache_misses[]={ + { .uname = "DC_MISS_STREAMING_STORE", + .udesc = "First data cache miss or streaming store to a 64B cache line", + .ucode = 0x1, + }, + { .uname = "STREAMING_STORE", + .udesc = "First streaming store to a 64B cache line", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_data_cache_refills_from_l2_or_northbridge[]={ + { .uname = "GOOD", + .udesc = "Fill with good data. (Final valid status is valid)", + .ucode = 0x1, + }, + { .uname = "INVALID", + .udesc = "Early valid status turned out to be invalid", + .ucode = 0x2, + }, + { .uname = "POISON", + .udesc = "Fill with poison data", + .ucode = 0x4, + }, + { .uname = "READ_ERROR", + .udesc = "Fill with read data error", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_unified_tlb_hit[]={ + { .uname = "4K_DATA", + .udesc = "4 KB unified TLB hit for data", + .ucode = 0x1, + }, + { .uname = "2M_DATA", + .udesc = "2 MB unified TLB hit for data", + .ucode = 0x2, + }, + { .uname = "1G_DATA", + .udesc = "1 GB unified TLB hit for data", + .ucode = 0x4, + }, + { .uname = "4K_INST", + .udesc = "4 KB unified TLB hit for instruction", + .ucode = 0x10, + }, + { .uname = "2M_INST", + .udesc = "2 MB unified TLB hit for instruction", + .ucode = 0x20, + }, + { .uname = "1G_INST", + .udesc = "1 GB unified TLB hit for instruction", + .ucode = 0x40, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x77, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_unified_tlb_miss[]={ + { .uname = "4K_DATA", + .udesc = "4 KB unified TLB miss for data", + .ucode = 0x1, + }, + { .uname = "2M_DATA", + .udesc = "2 MB unified TLB miss for data", + .ucode = 0x2, + }, + { .uname = "1GB_DATA", + .udesc = "1 GB unified TLB miss for data", + .ucode = 0x4, + }, + { .uname = "4K_INST", + .udesc = "4 KB unified TLB miss for instruction", + .ucode = 0x10, + }, + { .uname = "2M_INST", + .udesc = "2 MB unified TLB miss for instruction", + .ucode = 0x20, + }, + { .uname = "1G_INST", + .udesc = "1 GB unified TLB miss for instruction", + .ucode = 0x40, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x77, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_prefetch_instructions_dispatched[]={ + { .uname = "LOAD", + .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", + .ucode = 0x1, + }, + { .uname = "STORE", + .udesc = "Store (PrefetchW)", + .ucode = 0x2, + }, + { .uname = "NTA", + .udesc = "NTA (PrefetchNTA)", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_ineffective_sw_prefetches[]={ + { .uname = "SW_PREFETCH_HIT_IN_L1", + .udesc = "Software prefetch hit in the L1", + .ucode = 0x1, + }, + { .uname = "SW_PREFETCH_HIT_IN_L2", + .udesc = "Software prefetch hit in the L2", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x9, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_memory_requests[]={ + { .uname = "NON_CACHEABLE", + .udesc = "Requests to non-cacheable (UC) memory", + .ucode = 0x1, + }, + { .uname = "WRITE_COMBINING", + .udesc = "Requests to non-cacheable (WC, but not WC+/SS) memory", + .ucode = 0x2, + }, + { .uname = "STREAMING_STORE", + .udesc = "Requests to non-cacheable (WC+/SS, but not WC) memory", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x83, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_data_prefetcher[]={ + { .uname = "ATTEMPTED", + .udesc = "Prefetch attempts", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x2, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_mab_reqs[]={ + { .uname = "BUFFER_BIT_0", + .udesc = "Buffer entry index bit 0", + .ucode = 0x1, + }, + { .uname = "BUFFER_BIT_1", + .udesc = "Buffer entry index bit 1", + .ucode = 0x2, + }, + { .uname = "BUFFER_BIT_2", + .udesc = "Buffer entry index bit 2", + .ucode = 0x4, + }, + { .uname = "BUFFER_BIT_3", + .udesc = "Buffer entry index bit 3", + .ucode = 0x8, + }, + { .uname = "BUFFER_BIT_4", + .udesc = "Buffer entry index bit 4", + .ucode = 0x10, + }, + { .uname = "BUFFER_BIT_5", + .udesc = "Buffer entry index bit 5", + .ucode = 0x20, + }, + { .uname = "BUFFER_BIT_6", + .udesc = "Buffer entry index bit 6", + .ucode = 0x40, + }, + { .uname = "BUFFER_BIT_7", + .udesc = "Buffer entry index bit 7", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_system_read_responses[]={ + { .uname = "EXCLUSIVE", + .udesc = "Exclusive", + .ucode = 0x1, + }, + { .uname = "MODIFIED", + .udesc = "Modified (D18F0x68[ATMModeEn]==0), Modified written (D18F0x68[ATMModeEn]==1)", + .ucode = 0x2, + }, + { .uname = "SHARED", + .udesc = "Shared", + .ucode = 0x4, + }, + { .uname = "OWNED", + .udesc = "Owned", + .ucode = 0x8, + }, + { .uname = "DATA_ERROR", + .udesc = "Data Error", + .ucode = 0x10, + }, + { .uname = "MODIFIED_UNWRITTEN", + .udesc = "Modified unwritten", + .ucode = 0x20, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_octword_write_transfers[]={ + { .uname = "OCTWORD_WRITE_TRANSFER", + .udesc = "OW write transfer", + .ucode = 0x1, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x1, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_requests_to_l2[]={ + { .uname = "INSTRUCTIONS", + .udesc = "IC fill", + .ucode = 0x1, + }, + { .uname = "DATA", + .udesc = "DC fill", + .ucode = 0x2, + }, + { .uname = "TLB_WALK", + .udesc = "TLB fill (page table walks)", + .ucode = 0x4, + }, + { .uname = "SNOOP", + .udesc = "NB probe request", + .ucode = 0x8, + }, + { .uname = "CANCELLED", + .udesc = "Canceled request", + .ucode = 0x10, + }, + { .uname = "PREFETCHER", + .udesc = "L2 cache prefetcher request", + .ucode = 0x40, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x5f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_l2_cache_miss[]={ + { .uname = "INSTRUCTIONS", + .udesc = "IC fill", + .ucode = 0x1, + }, + { .uname = "DATA", + .udesc = "DC fill (includes possible replays, whereas PMCx041 does not)", + .ucode = 0x2, + }, + { .uname = "TLB_WALK", + .udesc = "TLB page table walk", + .ucode = 0x4, + }, + { .uname = "PREFETCHER", + .udesc = "L2 Cache Prefetcher request", + .ucode = 0x10, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x17, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_l2_cache_fill_writeback[]={ + { .uname = "L2_FILLS", + .udesc = "L2 fills from system", + .ucode = 0x1, + }, + { .uname = "L2_WRITEBACKS", + .udesc = "L2 Writebacks to system (Clean and Dirty)", + .ucode = 0x2, + }, + { .uname = "L2_WRITEBACKS_CLEAN", + .udesc = "L2 Clean Writebacks to system", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_page_splintering[]={ + { .uname = "GUEST_LARGER", + .udesc = "Guest page size is larger than host page size when nested paging is enabled", + .ucode = 0x1, + }, + { .uname = "MTRR_MISMATCH", + .udesc = "Splintering due to MTRRs, IORRs, APIC, TOMs or other special address region", + .ucode = 0x2, + }, + { .uname = "HOST_LARGER", + .udesc = "Host page size is larger than the guest page size", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_l1_itlb_miss_and_l2_itlb_miss[]={ + { .uname = "4K_PAGE_FETCHES", + .udesc = "Instruction fetches to a 4 KB page", + .ucode = 0x1, + }, + { .uname = "2M_PAGE_FETCHES", + .udesc = "Instruction fetches to a 2 MB page", + .ucode = 0x2, + }, + { .uname = "1G_PAGE_FETCHES", + .udesc = "Instruction fetches to a 1 GB page", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_instruction_cache_invalidated[]={ + { .uname = "NON_SMC_PROBE_MISS", + .udesc = "Non-SMC invalidating probe that missed on in-flight instructions", + .ucode = 0x1, + }, + { .uname = "NON_SMC_PROBE_HIT", + .udesc = "Non-SMC invalidating probe that hit on in-flight instructions", + .ucode = 0x2, + }, + { .uname = "SMC_PROBE_MISS", + .udesc = "SMC invalidating probe that missed on in-flight instructions", + .ucode = 0x4, + }, + { .uname = "SMC_PROBE_HIT", + .udesc = "SMC invalidating probe that hit on in-flight instructions", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_retired_mmx_fp_instructions[]={ + { .uname = "X87", + .udesc = "X87 instructions", + .ucode = 0x1, + }, + { .uname = "MMX", + .udesc = "MMX(tm) instructions", + .ucode = 0x2, + }, + { .uname = "SSE", + .udesc = "SSE instructions (SSE,SSE2,SSE3,SSSE3,SSE4A,SSE4.1,SSE4.2,AVX,XOP,FMA4)", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_fpu_exceptions[]={ + { .uname = "TOTAL_FAULTS", + .udesc = "Total microfaults", + .ucode = 0x1, + }, + { .uname = "TOTAL_TRAPS", + .udesc = "Total microtraps", + .ucode = 0x2, + }, + { .uname = "INT2EXT_FAULTS", + .udesc = "Int2Ext faults", + .ucode = 0x4, + }, + { .uname = "EXT2INT_FAULTS", + .udesc = "Ext2Int faults", + .ucode = 0x8, + }, + { .uname = "BYPASS_FAULTS", + .udesc = "Bypass faults", + .ucode = 0x10, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x1f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_ibs_ops_tagged[]={ + { .uname = "TAGGED", + .udesc = "Number of ops tagged by IBS", + .ucode = 0x1, + }, + { .uname = "RETIRED", + .udesc = "Number of ops tagged by IBS that retired", + .ucode = 0x2, + }, + { .uname = "IGNORED", + .udesc = "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_ls_dispatch[]={ + { .uname = "LOADS", + .udesc = "Loads", + .ucode = 0x1, + }, + { .uname = "STORES", + .udesc = "Stores", + .ucode = 0x2, + }, + { .uname = "LOAD_OP_STORES", + .udesc = "Load-op-Stores", + .ucode = 0x4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_l2_prefetcher_trigger_events[]={ + { .uname = "LOAD_L1_MISS_SEEN_BY_PREFETCHER", + .udesc = "Load L1 miss seen by prefetcher", + .ucode = 0x1, + }, + { .uname = "STORE_L1_MISS_SEEN_BY_PREFETCHER", + .udesc = "Store L1 miss seen by prefetcher", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_dram_accesses[]={ + { .uname = "DCT0_PAGE_HIT", + .udesc = "DCT0 Page hit", + .ucode = 0x1, + }, + { .uname = "DCT0_PAGE_MISS", + .udesc = "DCT0 Page Miss", + .ucode = 0x2, + }, + { .uname = "DCT0_PAGE_CONFLICT", + .udesc = "DCT0 Page Conflict", + .ucode = 0x4, + }, + { .uname = "DCT1_PAGE_HIT", + .udesc = "DCT1 Page hit", + .ucode = 0x8, + }, + { .uname = "DCT1_PAGE_MISS", + .udesc = "DCT1 Page Miss", + .ucode = 0x10, + }, + { .uname = "DCT1_PAGE_CONFLICT", + .udesc = "DCT1 Page Conflict", + .ucode = 0x20, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_dram_controller_page_table_overflows[]={ + { .uname = "DCT0_PAGE_TABLE_OVERFLOW", + .udesc = "DCT0 Page Table Overflow", + .ucode = 0x1, + }, + { .uname = "DCT1_PAGE_TABLE_OVERFLOW", + .udesc = "DCT1 Page Table Overflow", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_memory_controller_dram_command_slots_missed[]={ + { .uname = "DCT0_COMMAND_SLOTS_MISSED", + .udesc = "DCT0 Command Slots Missed (in MemClks)", + .ucode = 0x1, + }, + { .uname = "DCT1_COMMAND_SLOTS_MISSED", + .udesc = "DCT1 Command Slots Missed (in MemClks)", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_memory_controller_turnarounds[]={ + { .uname = "DCT0_DIMM_TURNAROUND", + .udesc = "DCT0 DIMM (chip select) turnaround", + .ucode = 0x1, + }, + { .uname = "DCT0_READ_WRITE_TURNAROUND", + .udesc = "DCT0 Read to write turnaround", + .ucode = 0x2, + }, + { .uname = "DCT0_WRITE_READ_TURNAROUND", + .udesc = "DCT0 Write to read turnaround", + .ucode = 0x4, + }, + { .uname = "DCT1_DIMM_TURNAROUND", + .udesc = "DCT1 DIMM (chip select) turnaround", + .ucode = 0x8, + }, + { .uname = "DCT1_READ_WRITE_TURNAROUND", + .udesc = "DCT1 Read to write turnaround", + .ucode = 0x10, + }, + { .uname = "DCT1_WRITE_READ_TURNAROUND", + .udesc = "DCT1 Write to read turnaround", + .ucode = 0x20, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3f, + .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_memory_controller_bypass_counter_saturation[]={ + { .uname = "MEMORY_CONTROLLER_HIGH_PRIORITY_BYPASS", + .udesc = "Memory controller high priority bypass", + .ucode = 0x1, + }, + { .uname = "MEMORY_CONTROLLER_MEDIUM_PRIORITY_BYPASS", + .udesc = "Memory controller medium priority bypass", + .ucode = 0x2, + }, + { .uname = "DCT0_DCQ_BYPASS", + .udesc = "DCT0 DCQ bypass", + .ucode = 0x4, + }, + { .uname = "DCT1_DCQ_BYPASS", + .udesc = "DCT1 DCQ bypass", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_thermal_status[]={ + { .uname = "NUM_HTC_TRIP_POINT_CROSSED", + .udesc = "Number of times the HTC trip point is crossed", + .ucode = 0x4, + }, + { .uname = "NUM_CLOCKS_HTC_PSTATE_INACTIVE", + .udesc = "Number of clocks HTC P-state is inactive", + .ucode = 0x20, + }, + { .uname = "NUM_CLOCKS_HTC_PSTATE_ACTIVE", + .udesc = "Number of clocks HTC P-state is active", + .ucode = 0x40, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x64, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cpu_io_requests_to_memory_io[]={ + { .uname = "REMOTE_IO_TO_LOCAL_IO", + .udesc = "Remote IO to Local IO", + .ucode = 0x61, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "REMOTE_CPU_TO_LOCAL_IO", + .udesc = "Remote CPU to Local IO", + .ucode = 0x64, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_IO_TO_REMOTE_IO", + .udesc = "Local IO to Remote IO", + .ucode = 0x91, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_IO_TO_REMOTE_MEM", + .udesc = "Local IO to Remote Mem", + .ucode = 0x92, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_CPU_TO_REMOTE_IO", + .udesc = "Local CPU to Remote IO", + .ucode = 0x94, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_CPU_TO_REMOTE_MEM", + .udesc = "Local CPU to Remote Mem", + .ucode = 0x98, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_IO_TO_LOCAL_IO", + .udesc = "Local IO to Local IO", + .ucode = 0xa1, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_IO_TO_LOCAL_MEM", + .udesc = "Local IO to Local Mem", + .ucode = 0xa2, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_CPU_TO_LOCAL_IO", + .udesc = "Local CPU to Local IO", + .ucode = 0xa4, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "LOCAL_CPU_TO_LOCAL_MEM", + .udesc = "Local CPU to Local Mem", + .ucode = 0xa8, + .uflags= AMD64_FL_NCOMBO, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cache_block_commands[]={ + { .uname = "VICTIM_BLOCK", + .udesc = "Victim Block (Writeback)", + .ucode = 0x1, + }, + { .uname = "READ_BLOCK", + .udesc = "Read Block (Dcache load miss refill)", + .ucode = 0x4, + }, + { .uname = "READ_BLOCK_SHARED", + .udesc = "Read Block Shared (Icache refill)", + .ucode = 0x8, + }, + { .uname = "READ_BLOCK_MODIFIED", + .udesc = "Read Block Modified (Dcache store miss refill)", + .ucode = 0x10, + }, + { .uname = "CHANGE_TO_DIRTY", + .udesc = "Change-to-Dirty (first store to clean block already in cache)", + .ucode = 0x20, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3d, + .uflags = AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_sized_commands[]={ + { .uname = "NON-POSTED_SZWR_BYTE", + .udesc = "Non-Posted SzWr Byte (1-32 bytes). Typical Usage: Legacy or mapped IO, typically 1-4 bytes.", + .ucode = 0x1, + }, + { .uname = "NON-POSTED_SZWR_DW", + .udesc = "Non-Posted SzWr DW (1-16 dwords). Typical Usage: Legacy or mapped IO, typically 1", + .ucode = 0x2, + }, + { .uname = "POSTED_SZWR_BYTE", + .udesc = "Posted SzWr Byte (1-32 bytes). Typical Usage: Subcache-line DMA writes, size varies; also", + .ucode = 0x4, + }, + { .uname = "POSTED_SZWR_DW", + .udesc = "Posted SzWr DW (1-16 dwords). Typical Usage: Block-oriented DMA writes, often cache-line", + .ucode = 0x8, + }, + { .uname = "SZRD_BYTE", + .udesc = "SzRd Byte (4 bytes). Typical Usage: Legacy or mapped IO.", + .ucode = 0x10, + }, + { .uname = "SZRD_DW", + .udesc = "SzRd DW (1-16 dwords). Typical Usage: Block-oriented DMA reads, typically cache-line size.", + .ucode = 0x20, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_probe_responses_and_upstream_requests[]={ + { .uname = "PROBE_MISS", + .udesc = "Probe miss", + .ucode = 0x1, + }, + { .uname = "PROBE_HIT_CLEAN", + .udesc = "Probe hit clean", + .ucode = 0x2, + }, + { .uname = "PROBE_HIT_DIRTY_WITHOUT_MEMORY_CANCEL", + .udesc = "Probe hit dirty without memory cancel (probed by Sized Write or Change2Dirty)", + .ucode = 0x4, + }, + { .uname = "PROBE_HIT_DIRTY_WITH_MEMORY_CANCEL", + .udesc = "Probe hit dirty with memory cancel (probed by DMA read or cache refill request)", + .ucode = 0x8, + }, + { .uname = "UPSTREAM_DISPLAY_REFRESH_ISOC_READS", + .udesc = "Upstream display refresh/ISOC reads", + .ucode = 0x10, + }, + { .uname = "UPSTREAM_NON-DISPLAY_REFRESH_READS", + .udesc = "Upstream non-display refresh reads", + .ucode = 0x20, + }, + { .uname = "UPSTREAM_ISOC_WRITES", + .udesc = "Upstream ISOC writes", + .ucode = 0x40, + }, + { .uname = "UPSTREAM_NON-ISOC_WRITES", + .udesc = "Upstream non-ISOC writes", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_gart_events[]={ + { .uname = "GART_APERTURE_HIT_ON_ACCESS_FROM_CPU", + .udesc = "GART aperture hit on access from CPU", + .ucode = 0x1, + }, + { .uname = "GART_APERTURE_HIT_ON_ACCESS_FROM_IO", + .udesc = "GART aperture hit on access from IO", + .ucode = 0x2, + }, + { .uname = "GART_MISS", + .udesc = "GART miss", + .ucode = 0x4, + }, + { .uname = "GART_REQUEST_HIT_TABLE_WALK_IN_PROGRESS", + .udesc = "GART Request hit table walk in progress", + .ucode = 0x8, + }, + { .uname = "GART_MULTIPLE_TABLE_WALK_IN_PROGRESS", + .udesc = "GART multiple table walk in progress", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x8f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_link_transmit_bandwidth[]={ + { .uname = "COMMAND_DW_SENT", + .udesc = "Command DW sent", + .ucode = 0x1, + .grpid = 0, + }, + { .uname = "DATA_DW_SENT", + .udesc = "Data DW sent", + .ucode = 0x2, + .grpid = 0, + }, + { .uname = "BUFFER_RELEASE_DW_SENT", + .udesc = "Buffer release DW sent", + .ucode = 0x4, + .grpid = 0, + }, + { .uname = "NOP_DW_SENT", + .udesc = "NOP DW sent (idle)", + .ucode = 0x8, + .grpid = 0, + }, + { .uname = "ADDRESS_DW_SENT", + .udesc = "Address (including extensions) DW sent", + .ucode = 0x10, + .grpid = 0, + }, + { .uname = "PER_PACKET_CRC_SENT", + .udesc = "Per packet CRC sent", + .ucode = 0x20, + .grpid = 0, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3f, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + .grpid = 0, + }, + { .uname = "SUBLINK_1", + .udesc = "When links are unganged, enable this umask to select sublink 1", + .ucode = 0x80, + .grpid = 1, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "SUBLINK_0", + .udesc = "When links are unganged, enable this umask to select sublink 0 (default when links ganged)", + .ucode = 0x00, + .grpid = 1, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, + + +}; + +static const amd64_umask_t amd64_fam15h_nb_cpu_to_dram_requests_to_target_node[]={ + { .uname = "LOCAL_TO_NODE_0", + .udesc = "From Local node to Node 0", + .ucode = 0x1, + }, + { .uname = "LOCAL_TO_NODE_1", + .udesc = "From Local node to Node 1", + .ucode = 0x2, + }, + { .uname = "LOCAL_TO_NODE_2", + .udesc = "From Local node to Node 2", + .ucode = 0x4, + }, + { .uname = "LOCAL_TO_NODE_3", + .udesc = "From Local node to Node 3", + .ucode = 0x8, + }, + { .uname = "LOCAL_TO_NODE_4", + .udesc = "From Local node to Node 4", + .ucode = 0x10, + }, + { .uname = "LOCAL_TO_NODE_5", + .udesc = "From Local node to Node 5", + .ucode = 0x20, + }, + { .uname = "LOCAL_TO_NODE_6", + .udesc = "From Local node to Node 6", + .ucode = 0x40, + }, + { .uname = "LOCAL_TO_NODE_7", + .udesc = "From Local node to Node 7", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_io_to_dram_requests_to_target_node[]={ + { .uname = "LOCAL_TO_NODE_0", + .udesc = "From Local node to Node 0", + .ucode = 0x1, + }, + { .uname = "LOCAL_TO_NODE_1", + .udesc = "From Local node to Node 1", + .ucode = 0x2, + }, + { .uname = "LOCAL_TO_NODE_2", + .udesc = "From Local node to Node 2", + .ucode = 0x4, + }, + { .uname = "LOCAL_TO_NODE_3", + .udesc = "From Local node to Node 3", + .ucode = 0x8, + }, + { .uname = "LOCAL_TO_NODE_4", + .udesc = "From Local node to Node 4", + .ucode = 0x10, + }, + { .uname = "LOCAL_TO_NODE_5", + .udesc = "From Local node to Node 5", + .ucode = 0x20, + }, + { .uname = "LOCAL_TO_NODE_6", + .udesc = "From Local node to Node 6", + .ucode = 0x40, + }, + { .uname = "LOCAL_TO_NODE_7", + .udesc = "From Local node to Node 7", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3[]={ + { .uname = "READ_BLOCK_LOCAL_TO_NODE_0", + .udesc = "Read block From Local node to Node 0", + .ucode = 0x11, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_0", + .udesc = "Read block shared From Local node to Node 0", + .ucode = 0x12, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_0", + .udesc = "Read block modified From Local node to Node 0", + .ucode = 0x14, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_0", + .udesc = "Change-to-Dirty From Local node to Node 0", + .ucode = 0x18, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_1", + .udesc = "Read block From Local node to Node 1", + .ucode = 0x21, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_1", + .udesc = "Read block shared From Local node to Node 1", + .ucode = 0x22, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_1", + .udesc = "Read block modified From Local node to Node 1", + .ucode = 0x24, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_1", + .udesc = "Change-to-Dirty From Local node to Node 1", + .ucode = 0x28, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_2", + .udesc = "Read block From Local node to Node 2", + .ucode = 0x41, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_2", + .udesc = "Read block shared From Local node to Node 2", + .ucode = 0x42, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_2", + .udesc = "Read block modified From Local node to Node 2", + .ucode = 0x44, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_2", + .udesc = "Change-to-Dirty From Local node to Node 2", + .ucode = 0x48, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_3", + .udesc = "Read block From Local node to Node 3", + .ucode = 0x81, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_3", + .udesc = "Read block shared From Local node to Node 3", + .ucode = 0x82, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_3", + .udesc = "Read block modified From Local node to Node 3", + .ucode = 0x84, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_3", + .udesc = "Change-to-Dirty From Local node to Node 3", + .ucode = 0x88, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7[]={ + { .uname = "READ_BLOCK_LOCAL_TO_NODE_4", + .udesc = "Read block From Local node to Node 4", + .ucode = 0x11, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_4", + .udesc = "Read block shared From Local node to Node 4", + .ucode = 0x12, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_4", + .udesc = "Read block modified From Local node to Node 4", + .ucode = 0x14, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_4", + .udesc = "Change-to-Dirty From Local node to Node 4", + .ucode = 0x18, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_5", + .udesc = "Read block From Local node to Node 5", + .ucode = 0x21, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_5", + .udesc = "Read block shared From Local node to Node 5", + .ucode = 0x22, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_5", + .udesc = "Read block modified From Local node to Node 5", + .ucode = 0x24, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_5", + .udesc = "Change-to-Dirty From Local node to Node 5", + .ucode = 0x28, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_6", + .udesc = "Read block From Local node to Node 6", + .ucode = 0x41, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_6", + .udesc = "Read block shared From Local node to Node 6", + .ucode = 0x42, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_6", + .udesc = "Read block modified From Local node to Node 6", + .ucode = 0x44, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_6", + .udesc = "Change-to-Dirty From Local node to Node 6", + .ucode = 0x48, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_LOCAL_TO_NODE_7", + .udesc = "Read block From Local node to Node 7", + .ucode = 0x81, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_SHARED_LOCAL_TO_NODE_7", + .udesc = "Read block shared From Local node to Node 7", + .ucode = 0x82, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_BLOCK_MODIFIED_LOCAL_TO_NODE_7", + .udesc = "Read block modified From Local node to Node 7", + .ucode = 0x84, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "CHANGE_TO_DIRTY_LOCAL_TO_NODE_7", + .udesc = "Change-to-Dirty From Local node to Node 7", + .ucode = 0x88, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_cpu_command_requests_to_target_node[]={ + { .uname = "READ_SIZED_LOCAL_TO_NODE_0", + .udesc = "Read Sized From Local node to Node 0", + .ucode = 0x11, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_0", + .udesc = "Write Sized From Local node to Node 0", + .ucode = 0x12, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_0", + .udesc = "Victim Block From Local node to Node 0", + .ucode = 0x14, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_1", + .udesc = "Read Sized From Local node to Node 1", + .ucode = 0x21, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_1", + .udesc = "Write Sized From Local node to Node 1", + .ucode = 0x22, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_1", + .udesc = "Victim Block From Local node to Node 1", + .ucode = 0x24, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_2", + .udesc = "Read Sized From Local node to Node 2", + .ucode = 0x41, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_2", + .udesc = "Write Sized From Local node to Node 2", + .ucode = 0x42, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_2", + .udesc = "Victim Block From Local node to Node 2", + .ucode = 0x44, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_3", + .udesc = "Read Sized From Local node to Node 3", + .ucode = 0x81, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_3", + .udesc = "Write Sized From Local node to Node 3", + .ucode = 0x82, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_3", + .udesc = "Victim Block From Local node to Node 3", + .ucode = 0x84, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_4", + .udesc = "Read Sized From Local node to Node 4", + .ucode = 0x19, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_4", + .udesc = "Write Sized From Local node to Node 4", + .ucode = 0x1a, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_4", + .udesc = "Victim Block From Local node to Node 4", + .ucode = 0x1c, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_5", + .udesc = "Read Sized From Local node to Node 5", + .ucode = 0x29, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_5", + .udesc = "Write Sized From Local node to Node 5", + .ucode = 0x2a, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_5", + .udesc = "Victim Block From Local node to Node 5", + .ucode = 0x2c, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_6", + .udesc = "Read Sized From Local node to Node 6", + .ucode = 0x49, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_6", + .udesc = "Write Sized From Local node to Node 6", + .ucode = 0x4a, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_6", + .udesc = "Victim Block From Local node to Node 6", + .ucode = 0x4c, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "READ_SIZED_LOCAL_TO_NODE_7", + .udesc = "Read Sized From Local node to Node 7", + .ucode = 0x89, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "WRITE_SIZED_LOCAL_TO_NODE_7", + .udesc = "Write Sized From Local node to Node 7", + .ucode = 0x8a, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "VICTIM_BLOCK_LOCAL_TO_NODE_7", + .udesc = "Victim Block From Local node to Node 7", + .ucode = 0x8c, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "ALL_LOCAL_TO_NODE_0_3", + .udesc = "All From Local node to Node 0-3", + .ucode = 0xf7, + .uflags= AMD64_FL_NCOMBO, + }, + { .uname = "ALL_LOCAL_TO_NODE_4_7", + .udesc = "All From Local node to Node 4-7", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_request_cache_status_0[]={ + { .uname = "PROBE_HIT_S", + .udesc = "Probe Hit S", + .ucode = 0x1, + }, + { .uname = "PROBE_HIT_E", + .udesc = "Probe Hit E", + .ucode = 0x2, + }, + { .uname = "PROBE_HIT_MUW_OR_O", + .udesc = "Probe Hit MuW or O", + .ucode = 0x4, + }, + { .uname = "PROBE_HIT_M", + .udesc = "Probe Hit M", + .ucode = 0x8, + }, + { .uname = "PROBE_MISS", + .udesc = "Probe Miss", + .ucode = 0x10, + }, + { .uname = "DIRECTED_PROBE", + .udesc = "Directed Probe", + .ucode = 0x20, + }, + { .uname = "TRACK_CACHE_STAT_FOR_RDBLK", + .udesc = "Track Cache Stat for RdBlk", + .ucode = 0x40, + }, + { .uname = "TRACK_CACHE_STAT_FOR_RDBLKS", + .udesc = "Track Cache Stat for RdBlkS", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_request_cache_status_1[]={ + { .uname = "PROBE_HIT_S", + .udesc = "Probe Hit S", + .ucode = 0x1, + }, + { .uname = "PROBE_HIT_E", + .udesc = "Probe Hit E", + .ucode = 0x2, + }, + { .uname = "PROBE_HIT_MUW_OR_O", + .udesc = "Probe Hit MuW or O", + .ucode = 0x4, + }, + { .uname = "PROBE_HIT_M", + .udesc = "Probe Hit M", + .ucode = 0x8, + }, + { .uname = "PROBE_MISS", + .udesc = "Probe Miss", + .ucode = 0x10, + }, + { .uname = "DIRECTED_PROBE", + .udesc = "Directed Probe", + .ucode = 0x20, + }, + { .uname = "TRACK_CACHE_STAT_FOR_CHGTODIRTY", + .udesc = "Track Cache Stat for ChgToDirty", + .ucode = 0x40, + }, + { .uname = "TRACK_CACHE_STAT_FOR_RDBLKM", + .udesc = "Track Cache Stat for RdBlkM", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_memory_controller_requests[]={ + { .uname = "WRITE_REQUESTS_TO_DCT", + .udesc = "Write requests sent to the DCT", + .ucode = 0x1, + }, + { .uname = "READ_REQUESTS_TO_DCT", + .udesc = "Read requests (including prefetch requests) sent to the DCT", + .ucode = 0x2, + }, + { .uname = "PREFETCH_REQUESTS_TO_DCT", + .udesc = "Prefetch requests sent to the DCT", + .ucode = 0x4, + }, + { .uname = "32_BYTES_SIZED_WRITES", + .udesc = "32 Bytes Sized Writes", + .ucode = 0x8, + }, + { .uname = "64_BYTES_SIZED_WRITES", + .udesc = "64 Bytes Sized Writes", + .ucode = 0x10, + }, + { .uname = "32_BYTES_SIZED_READS", + .udesc = "32 Bytes Sized Reads", + .ucode = 0x20, + }, + { .uname = "64_BYTE_SIZED_READS", + .udesc = "64 Byte Sized Reads", + .ucode = 0x40, + }, + { .uname = "READ_REQUESTS_TO_DCT_WHILE_WRITES_PENDING", + .udesc = "Read requests sent to the DCT while writes requests are pending in the DCT", + .ucode = 0x80, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_read_request_to_l3_cache[]={ + { .uname = "READ_BLOCK_EXCLUSIVE", + .udesc = "Read Block Exclusive (Data cache read)", + .ucode = 0x1, + .grpid = 0, + }, + { .uname = "READ_BLOCK_SHARED", + .udesc = "Read Block Shared (Instruction cache read)", + .ucode = 0x2, + .grpid = 0, + }, + { .uname = "READ_BLOCK_MODIFY", + .udesc = "Read Block Modify", + .ucode = 0x4, + .grpid = 0, + }, + { .uname = "PREFETCH", + .udesc = "Count prefetches honly", + .ucode = 0x8, + .grpid = 0, + }, + { .uname = "READ_BLOCK_ANY", + .udesc = "Count any read request", + .ucode = 0x7, + .grpid = 0, + .uflags= AMD64_FL_DFL | AMD64_FL_NCOMBO, + }, + CORE_SELECT(1), +}; + +static const amd64_umask_t amd64_fam15h_nb_l3_fills_caused_by_l2_evictions[]={ + { .uname = "SHARED", + .udesc = "Shared", + .ucode = 0x1, + .grpid = 0, + }, + { .uname = "EXCLUSIVE", + .udesc = "Exclusive", + .ucode = 0x2, + .grpid = 0, + }, + { .uname = "OWNED", + .udesc = "Owned", + .ucode = 0x4, + .grpid = 0, + }, + { .uname = "MODIFIED", + .udesc = "Modified", + .ucode = 0x8, + .grpid = 0, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xff, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + .grpid = 0, + }, + CORE_SELECT(1), + }; + +static const amd64_umask_t amd64_fam15h_nb_l3_evictions[]={ + { .uname = "SHARED", + .udesc = "Shared", + .ucode = 0x1, + }, + { .uname = "EXCLUSIVE", + .udesc = "Exclusive", + .ucode = 0x2, + }, + { .uname = "OWNED", + .udesc = "Owned", + .ucode = 0x4, + }, + { .uname = "MODIFIED", + .udesc = "Modified", + .ucode = 0x8, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xf, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_umask_t amd64_fam15h_nb_l3_latency[]={ + { .uname = "L3_REQUEST_CYCLE", + .udesc = "L3 Request cycle count.", + .ucode = 0x1, + }, + { .uname = "L3_REQUEST", + .udesc = "L3 request count.", + .ucode = 0x2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3, + .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, + }, +}; + +static const amd64_entry_t amd64_fam15h_nb_pe[]={ +{ .name = "DRAM_ACCESSES", + .desc = "DRAM Accesses", + .code = 0xe0, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_dram_accesses), + .ngrp = 1, + .umasks = amd64_fam15h_nb_dram_accesses, +}, +{ .name = "DRAM_CONTROLLER_PAGE_TABLE_OVERFLOWS", + .desc = "DRAM Controller Page Table Overflows", + .code = 0xe1, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_dram_controller_page_table_overflows), + .ngrp = 1, + .umasks = amd64_fam15h_nb_dram_controller_page_table_overflows, +}, +{ .name = "MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED", + .desc = "Memory Controller DRAM Command Slots Missed", + .code = 0xe2, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_dram_command_slots_missed), + .ngrp = 1, + .umasks = amd64_fam15h_nb_memory_controller_dram_command_slots_missed, +}, +{ .name = "MEMORY_CONTROLLER_TURNAROUNDS", + .desc = "Memory Controller Turnarounds", + .code = 0xe3, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_turnarounds), + .ngrp = 1, + .umasks = amd64_fam15h_nb_memory_controller_turnarounds, +}, +{ .name = "MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION", + .desc = "Memory Controller Bypass Counter Saturation", + .code = 0xe4, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_bypass_counter_saturation), + .ngrp = 1, + .umasks = amd64_fam15h_nb_memory_controller_bypass_counter_saturation, +}, +{ .name = "THERMAL_STATUS", + .desc = "Thermal Status", + .code = 0xe8, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_thermal_status), + .ngrp = 1, + .umasks = amd64_fam15h_nb_thermal_status, +}, +{ .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", + .desc = "CPU/IO Requests to Memory/IO", + .code = 0xe9, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_io_requests_to_memory_io), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_io_requests_to_memory_io, +}, +{ .name = "CACHE_BLOCK_COMMANDS", + .desc = "Cache Block Commands", + .code = 0xea, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cache_block_commands), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cache_block_commands, +}, +{ .name = "SIZED_COMMANDS", + .desc = "Sized Commands", + .code = 0xeb, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_sized_commands), + .ngrp = 1, + .umasks = amd64_fam15h_nb_sized_commands, +}, +{ .name = "PROBE_RESPONSES_AND_UPSTREAM_REQUESTS", + .desc = "Probe Responses and Upstream Requests", + .code = 0xec, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_probe_responses_and_upstream_requests), + .ngrp = 1, + .umasks = amd64_fam15h_nb_probe_responses_and_upstream_requests, +}, +{ .name = "GART_EVENTS", + .desc = "GART Events", + .code = 0xee, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_gart_events), + .ngrp = 1, + .umasks = amd64_fam15h_nb_gart_events, +}, +{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_0", + .desc = "Link Transmit Bandwidth Link 0", + .code = 0xf6, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), + .ngrp = 2, + .umasks = amd64_fam15h_nb_link_transmit_bandwidth, +}, +{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_1", + .desc = "Link Transmit Bandwidth Link 1", + .code = 0xf7, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), + .ngrp = 2, + .umasks = amd64_fam15h_nb_link_transmit_bandwidth, +}, +{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_2", + .desc = "Link Transmit Bandwidth Link 2", + .code = 0xf8, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), + .ngrp = 2, + .umasks = amd64_fam15h_nb_link_transmit_bandwidth, +}, +{ .name = "LINK_TRANSMIT_BANDWIDTH_LINK_3", + .desc = "Link Transmit Bandwidth Link 3", + .code = 0x1f9, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_link_transmit_bandwidth), + .ngrp = 2, + .umasks = amd64_fam15h_nb_link_transmit_bandwidth, +}, +{ .name = "CPU_TO_DRAM_REQUESTS_TO_TARGET_NODE", + .desc = "CPU to DRAM Requests to Target Node", + .code = 0x1e0, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_to_dram_requests_to_target_node), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_to_dram_requests_to_target_node, +}, +{ .name = "IO_TO_DRAM_REQUESTS_TO_TARGET_NODE", + .desc = "IO to DRAM Requests to Target Node", + .code = 0x1e1, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_io_to_dram_requests_to_target_node), + .ngrp = 1, + .umasks = amd64_fam15h_nb_io_to_dram_requests_to_target_node, +}, +{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_0_3", + .desc = "CPU Read Command Latency to Target Node 0-3", + .code = 0x1e2, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3, +}, +{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_0_3", + .desc = "CPU Read Command Requests to Target Node 0-3", + .code = 0x1e3, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_0_3, +}, +{ .name = "CPU_READ_COMMAND_LATENCY_TO_TARGET_NODE_4_7", + .desc = "CPU Read Command Latency to Target Node 4-7", + .code = 0x1e4, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7, +}, +{ .name = "CPU_READ_COMMAND_REQUESTS_TO_TARGET_NODE_4_7", + .desc = "CPU Read Command Requests to Target Node 4-7", + .code = 0x1e5, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_read_command_requests_to_target_node_4_7, +}, +{ .name = "CPU_COMMAND_LATENCY_TO_TARGET_NODE", + .desc = "CPU Command Latency to Target Node", + .code = 0x1e6, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_command_requests_to_target_node), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_command_requests_to_target_node, +}, +{ .name = "CPU_REQUESTS_TO_TARGET_NODE", + .desc = "CPU Requests to Target Node", + .code = 0x1e7, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_cpu_command_requests_to_target_node), + .ngrp = 1, + .umasks = amd64_fam15h_nb_cpu_command_requests_to_target_node, +}, +{ .name = "REQUEST_CACHE_STATUS_0", + .desc = "Request Cache Status 0", + .code = 0x1ea, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_request_cache_status_0), + .ngrp = 1, + .umasks = amd64_fam15h_nb_request_cache_status_0, +}, +{ .name = "REQUEST_CACHE_STATUS_1", + .desc = "Request Cache Status 1", + .code = 0x1eb, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_request_cache_status_1), + .ngrp = 1, + .umasks = amd64_fam15h_nb_request_cache_status_1, +}, +{ .name = "MEMORY_CONTROLLER_REQUESTS", + .desc = "Memory Controller Requests", + .code = 0x1f0, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_memory_controller_requests), + .ngrp = 1, + .umasks = amd64_fam15h_nb_memory_controller_requests, +}, +{ .name = "READ_REQUEST_TO_L3_CACHE", + .desc = "Read Request to L3 Cache", + .code = 0x4e0, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), + .ngrp = 2, + .umasks = amd64_fam15h_nb_read_request_to_l3_cache, +}, +{ .name = "L3_CACHE_MISSES", + .desc = "L3 Cache Misses", + .code = 0x4e1, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), + .ngrp = 2, + .umasks = amd64_fam15h_nb_read_request_to_l3_cache, +}, +{ .name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", + .desc = "L3 Fills caused by L2 Evictions", + .code = 0x4e2, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_fills_caused_by_l2_evictions), + .ngrp = 2, + .umasks = amd64_fam15h_nb_l3_fills_caused_by_l2_evictions, +}, +{ .name = "L3_EVICTIONS", + .desc = "L3 Evictions", + .code = 0x4e3, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_evictions), + .ngrp = 1, + .umasks = amd64_fam15h_nb_l3_evictions, +}, +{ .name = "NON_CANCELED_L3_READ_REQUESTS", + .desc = "Non-canceled L3 Read Requests", + .code = 0x4ed, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_read_request_to_l3_cache), + .ngrp = 2, + .umasks = amd64_fam15h_nb_read_request_to_l3_cache, +}, +{ .name = "L3_LATENCY", + .desc = "L3 Latency", + .code = 0x4ef, + .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_l3_latency), + .ngrp = 1, + .umasks = amd64_fam15h_nb_l3_latency, +}, +}; diff --git a/lib/pfmlib_amd64_fam15h.c b/lib/pfmlib_amd64_fam15h.c index 9170868..b752d87 100644 --- a/lib/pfmlib_amd64_fam15h.c +++ b/lib/pfmlib_amd64_fam15h.c @@ -27,43 +27,72 @@ #include "pfmlib_priv.h" #include "pfmlib_amd64_priv.h" #include "events/amd64_events_fam15h.h" +#include "events/amd64_events_fam15h_nb.h" -#define DEFINE_FAM15H_REV(d, n, r, pmuid) \ -static int \ -pfm_amd64_fam15h_##n##_detect(void *this) \ -{ \ - int ret; \ - ret = pfm_amd64_detect(this); \ - if (ret != PFM_SUCCESS) \ - return ret; \ - ret = pfm_amd64_cfg.revision; \ - return ret == pmuid ? PFM_SUCCESS : PFM_ERR_NOTSUPP; \ -} \ -pfmlib_pmu_t amd64_fam15h_##n##_support={ \ - .desc = "AMD64 Fam15h "#d, \ - .name = "amd64_fam15h_"#n, \ - .pmu = pmuid, \ - .pmu_rev = r, \ - .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam15h_pe),\ - .type = PFM_PMU_TYPE_CORE, \ - .supported_plm = AMD64_FAM10H_PLM, \ - .num_cntrs = 6, \ - .max_encoding = 1, \ - .pe = amd64_fam15h_pe, \ - .atdesc = amd64_mods, \ - .flags = PFMLIB_PMU_FL_RAW_UMASK, \ - \ - .pmu_detect = pfm_amd64_fam15h_##n##_detect,\ - .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding,\ - PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), \ - .get_event_first = pfm_amd64_get_event_first, \ - .get_event_next = pfm_amd64_get_event_next, \ - .event_is_valid = pfm_amd64_event_is_valid, \ - .validate_table = pfm_amd64_validate_table, \ - .get_event_info = pfm_amd64_get_event_info, \ - .get_event_attr_info = pfm_amd64_get_event_attr_info,\ - PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs),\ - .get_event_nattrs = pfm_amd64_get_event_nattrs, \ +static int +pfm_amd64_fam15h_detect(void *this) +{ + int ret; + + ret = pfm_amd64_detect(this); + if (ret != PFM_SUCCESS) + return ret; + + if (pfm_amd64_cfg.revision == PFM_PMU_AMD64_FAM15H_INTERLAGOS) + return PFM_SUCCESS; + + return PFM_ERR_NOTSUPP; } -DEFINE_FAM15H_REV(Interlagos, interlagos, 0, PFM_PMU_AMD64_FAM15H_INTERLAGOS); +pfmlib_pmu_t amd64_fam15h_interlagos_support={ + .desc = "AMD64 Fam15h Interlagos", + .name = "amd64_fam15h_interlagos", + .pmu = PFM_PMU_AMD64_FAM15H_INTERLAGOS, + .pmu_rev = 0, + .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam15h_pe), + .type = PFM_PMU_TYPE_CORE, + .supported_plm = AMD64_FAM10H_PLM, + .num_cntrs = 6, + .max_encoding = 1, + .pe = amd64_fam15h_pe, + .atdesc = amd64_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_amd64_fam15h_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, + PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), + .get_event_first = pfm_amd64_get_event_first, + .get_event_next = pfm_amd64_get_event_next, + .event_is_valid = pfm_amd64_event_is_valid, + .validate_table = pfm_amd64_validate_table, + .get_event_info = pfm_amd64_get_event_info, + .get_event_attr_info = pfm_amd64_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), + .get_event_nattrs = pfm_amd64_get_event_nattrs, +}; + +pfmlib_pmu_t amd64_fam15h_nb_support={ + .desc = "AMD64 Fam15h NorthBridge", + .name = "amd64_fam15h_nb", + .pmu = PFM_PMU_AMD64_FAM15H_NB, + .perf_name = "amd_nb", + .pmu_rev = 0, + .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_pe), + .type = PFM_PMU_TYPE_UNCORE, + .supported_plm = 0, /* no plm support */ + .num_cntrs = 4, + .max_encoding = 1, + .pe = amd64_fam15h_nb_pe, + .atdesc = amd64_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_amd64_fam15h_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, + PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), + .get_event_first = pfm_amd64_get_event_first, + .get_event_next = pfm_amd64_get_event_next, + .event_is_valid = pfm_amd64_event_is_valid, + .validate_table = pfm_amd64_validate_table, + .get_event_info = pfm_amd64_get_event_info, + .get_event_attr_info = pfm_amd64_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_amd64_nb_perf_validate_pattrs), + .get_event_nattrs = pfm_amd64_get_event_nattrs, +}; diff --git a/lib/pfmlib_amd64_perf_event.c b/lib/pfmlib_amd64_perf_event.c index 68cf2b9..e210328 100644 --- a/lib/pfmlib_amd64_perf_event.c +++ b/lib/pfmlib_amd64_perf_event.c @@ -24,12 +24,38 @@ #include #include #include +#include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_amd64_priv.h" /* architecture private */ #include "pfmlib_perf_event_priv.h" +static int +find_pmu_type_by_name(const char *name) +{ + char filename[PATH_MAX]; + FILE *fp; + int ret, type; + + if (!name) + return PFM_ERR_NOTSUPP; + + sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); + + fp = fopen(filename, "r"); + if (!fp) + return PFM_ERR_NOTSUPP; + + ret = fscanf(fp, "%d", &type); + if (ret != 1) + type = PFM_ERR_NOTSUPP; + + fclose(fp); + + return type; +} + int pfm_amd64_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { @@ -52,8 +78,20 @@ pfm_amd64_get_perf_encoding(void *this, pfmlib_event_desc_t *e) return PFM_ERR_NOTSUPP; } - /* all events treated as raw for now */ - attr->type = PERF_TYPE_RAW; + ret = PERF_TYPE_RAW; + + /* + * if specific perf PMU is provided then try to locate it + * otherwise assume core PMU and thus type RAW + */ + if (pmu->perf_name) { + /* greab PMU type from sysfs */ + ret = find_pmu_type_by_name(pmu->perf_name); + if (ret < 0) + return ret; + } + DPRINT("amd64_get_perf_encoding: PMU type=%d\n", ret); + attr->type = ret; attr->config = e->codes[0]; return PFM_SUCCESS; @@ -80,8 +118,8 @@ pfm_amd64_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PMU) { if (e->pattrs[i].idx == AMD64_ATTR_U - || e->pattrs[i].idx == AMD64_ATTR_K - || e->pattrs[i].idx == AMD64_ATTR_H) + || e->pattrs[i].idx == AMD64_ATTR_K + || e->pattrs[i].idx == AMD64_ATTR_H) compact = 1; } @@ -102,3 +140,30 @@ pfm_amd64_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) } } } + +void +pfm_amd64_nb_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) +{ + int i, compact; + + for (i=0; i < e->npattrs; i++) { + compact = 0; + + /* umasks never conflict */ + if (e->pattrs[i].type == PFM_ATTR_UMASK) + continue; + + /* + * no perf_events attr is supported by AMD64 Northbridge PMU + * sampling is not supported + */ + if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PERF_EVENT) { + compact = 1; + } + + if (compact) { + pfmlib_compact_pattrs(e, i); + i--; + } + } +} diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h index e940758..14c9526 100644 --- a/lib/pfmlib_amd64_priv.h +++ b/lib/pfmlib_amd64_priv.h @@ -212,4 +212,5 @@ extern int pfm_amd64_get_num_events(void *this); extern int pfm_amd64_get_perf_encoding(void *this, pfmlib_event_desc_t *e); extern void pfm_amd64_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); +extern void pfm_amd64_nb_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); #endif /* __PFMLIB_AMD64_PRIV_H__ */ diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index 0ed9441..4f4092f 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -76,6 +76,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &amd64_fam12h_llano_support, &amd64_fam14h_bobcat_support, &amd64_fam15h_interlagos_support, + &amd64_fam15h_nb_support, &intel_core_support, &intel_atom_support, &intel_nhm_support, diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 75a2d30..0b46a86 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -213,6 +213,7 @@ extern pfmlib_pmu_t amd64_fam11h_turion_support; extern pfmlib_pmu_t amd64_fam12h_llano_support; extern pfmlib_pmu_t amd64_fam14h_bobcat_support; extern pfmlib_pmu_t amd64_fam15h_interlagos_support; +extern pfmlib_pmu_t amd64_fam15h_nb_support; extern pfmlib_pmu_t intel_p6_support; extern pfmlib_pmu_t intel_ppro_support; extern pfmlib_pmu_t intel_pii_support; diff --git a/tests/validate_x86.c b/tests/validate_x86.c index cb781b9..8dc222b 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1879,31 +1879,35 @@ static const test_event_t x86_test_events[]={ }, { SRC_LINE, .name = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:NOP_DW_SENT", + .ret = PFM_ERR_NOTFOUND, /* event in Northbridge PMU */ + }, + { SRC_LINE, + .name = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:NOP_DW_SENT", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x5308f6, - .fstr = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:NOP_DW_SENT:SUBLINK_0", + .fstr = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:NOP_DW_SENT:SUBLINK_0", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL", + .name = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x533ff6, - .fstr = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_0", + .fstr = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_0", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_1", + .name = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_1", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x53bff6, - .fstr = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_1", + .fstr = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:ALL:SUBLINK_1", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:COMMAND_DW_SENT:DATA_DW_SENT", + .name = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:COMMAND_DW_SENT:DATA_DW_SENT", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x5303f6, - .fstr = "amd64_fam15h_interlagos::LINK_TRANSMIT_BANDWIDTH_LINK_0:COMMAND_DW_SENT:DATA_DW_SENT:SUBLINK_0", + .fstr = "amd64_fam15h_nb::LINK_TRANSMIT_BANDWIDTH_LINK_0:COMMAND_DW_SENT:DATA_DW_SENT:SUBLINK_0", }, { SRC_LINE, .name = "amd64_fam15h_interlagos::DISPATCHED_FPU_OPS:0x4ff:u", @@ -1917,40 +1921,40 @@ static const test_event_t x86_test_events[]={ .fstr = "amd64_fam15h_interlagos::DISPATCHED_FPU_OPS:0xff:k=0:u=1:e=0:i=0:c=0:h=0:g=0" }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_modify:core_3", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:read_block_modify:core_3", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x4005334e0ull, - .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_MODIFY:CORE_3", + .fstr = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_MODIFY:CORE_3", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x40053f7e0ull, - .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_ANY:ANY_CORE", + .fstr = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_ANY:ANY_CORE", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:PREFETCH:READ_BLOCK_MODIFY:core_4", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:PREFETCH:READ_BLOCK_MODIFY:core_4", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x400534de0ull, - .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:READ_BLOCK_MODIFY:PREFETCH:CORE_4", + .fstr = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:READ_BLOCK_EXCLUSIVE:READ_BLOCK_MODIFY:PREFETCH:CORE_4", }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1", .ret = PFM_ERR_FEATCOMB, /* must use individual umasks to combine with prefetch */ }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1:core_3", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:read_block_any:prefetch:core_1:core_3", .ret = PFM_ERR_FEATCOMB, /* core umasks cannot be combined */ }, { SRC_LINE, - .name = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:prefetch:core_0", + .name = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:prefetch:core_0", .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x4005308e0ull, - .fstr = "amd64_fam15h_interlagos::READ_REQUEST_TO_L3_CACHE:PREFETCH:CORE_0", + .fstr = "amd64_fam15h_nb::READ_REQUEST_TO_L3_CACHE:PREFETCH:CORE_0", }, { SRC_LINE, .name = "ivb_ep::mem_load_uops_llc_miss_retired:local_dram", commit 9007b7acf1aa3d8b6bd344c14f6cf6c566170842 Author: Stephane Eranian Date: Sun Dec 1 10:42:24 2013 +0100 Add man page for AMD64 Fam15h core and uncore PMU Man page was missing. Signed-off-by: Stephane Eranian diff --git a/README b/README index 8aa8086..a765cb4 100644 --- a/README +++ b/README @@ -37,7 +37,7 @@ The library supports many PMUs. The current version can handle: AMD64 Fam11h (Turion) AMD64 Fam12h (Llano) AMD64 Fam14h (Bobcat) - AMD64 Fam15h (Bulldozer) + AMD64 Fam15h (Bulldozer) (core and uncore) - For Intel X86: Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) diff --git a/docs/Makefile b/docs/Makefile index 8f28726..f24999f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -36,6 +36,7 @@ ARCH_MAN=libpfm_intel_core.3 \ libpfm_amd64_k7.3 \ libpfm_amd64_k8.3 \ libpfm_amd64_fam10h.3 \ + libpfm_amd64_fam15h.3 \ libpfm_intel_atom.3 \ libpfm_intel_nhm.3 \ libpfm_intel_nhm_unc.3 \ diff --git a/docs/man3/libpfm_amd64_fam15h.3 b/docs/man3/libpfm_amd64_fam15h.3 new file mode 100644 index 0000000..84ae455 --- /dev/null +++ b/docs/man3/libpfm_amd64_fam15h.3 @@ -0,0 +1,55 @@ +.TH LIBPFM 3 "Nov, 2013" "" "Linux Programmer's Manual" +.SH NAME +libpfm_amd64_fam15h - support for AMD64 Family 15h processors +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: amd64_fam15h_interlagos +.B PMU desc: AMD64 Fam15h Interlagos +.B PMU name: amd64_fam15h_nb +.B PMU desc: AMD64 Fam15h Northbridge +.sp +.SH DESCRIPTION +The library supports AMD Family 15h processors core PMU in both 32 and 64-bit modes. The +uncore PMU (Northbridge) is also supported as a separate PMU model. + +.SH MODIFIERS +The following modifiers are supported on AMD64 Family 15h core PMU: +.TP +.B u +Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B h +Measure at while executing in host mode (when using virtualization). This corresponds to \fBPFM_PLMH\fR. +This modifier is available starting with Fam10h. This is a boolean modifier. +.TP +.B g +Measure at while executing in guest mode (when using virtualization). This modifier is available +starting with Fam10h. This is a boolean modifier. +.TP +.B i +Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR +occurring. This is a boolean modifier +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. +.TP +.B c +Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles +in which the number of occurrences of the event is greater or equal to the threshold. This is an integer +modifier with values in the range [0:255]. +.TP +The uncore (NorthBridge) PMU \fBdoes not support\fR any modifiers. + +.SH AUTHORS +.nf +Stephane Eranian +Robert Richter +.if +.PP commit b81ff2dc94f7e9acee2c4ba306d13e343cce4a18 Author: Stephane Eranian Date: Sun Dec 1 17:12:49 2013 +0100 fix compiler warning on pfm_intel_rapl_get_encoding() Avoid clang warning Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c index 25c51f6..4570d65 100644 --- a/lib/pfmlib_intel_rapl.c +++ b/lib/pfmlib_intel_rapl.c @@ -96,12 +96,8 @@ static int pfm_intel_rapl_get_encoding(void *this, pfmlib_event_desc_t *e) { - pfmlib_pmu_t *pmu = this; const intel_x86_entry_t *pe; - /* shut up the compiler */ - pmu = pmu; - pe = this_pe(this); e->fstr[0] = '\0'; commit b35f8115e173f4c1cf5d58b6e6b62f0ad91cb2dd Author: Stephane Eranian Date: Tue Dec 3 06:48:24 2013 +0100 Add missing Intel Silvermont libpfm4 man page. Adds the man pag for Intel Silvermont core PMU. Reported-by: Steve Kaufman Signed-off-by: Stephane Eranian diff --git a/docs/man3/libpfm_intel_slm.3 b/docs/man3/libpfm_intel_slm.3 new file mode 100644 index 0000000..1e1153f --- /dev/null +++ b/docs/man3/libpfm_intel_slm.3 @@ -0,0 +1,84 @@ +.TH LIBPFM 3 "November, 2013" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_slm - support for Intel Silvermont core PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: slm +.B PMU desc: Intel Silvermont +.sp +.SH DESCRIPTION +The library supports the Intel Silvermont core PMU. + +.SH MODIFIERS +The following modifiers are supported on Intel Silvermont processors: +.TP +.B u +Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B i +Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR +occurring. This is a boolean modifier +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event +to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. +This is a boolean modifier. +.TP +.B c +Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles +in which the number of occurrences of the event is greater or equal to the threshold. This is an integer +modifier with values in the range [0:255]. + +.SH OFFCORE_RESPONSE events +Intel Silvermont provides two offcore_response events: +\fBOFFCORE_RESPONSE_0\fR and \fBOFFCORE_RESPONSE_1\fR. + +Those events need special treatment in the performance monitoring infrastructure +because each event uses an extra register to store some settings. Thus, in +case multiple offcore_response events are monitored simultaneously, the kernel needs +to manage the sharing of that extra register. + +The offcore_response events are exposed as a normal event by the library. The extra +settings are exposed as regular umasks. The library takes care of encoding the +events according to the underlying kernel interface. + +On Intel Silvermont, the umasks are divided into three categories: request, supplier +and snoop. The user must provide at least one umask for each category. The categories +are shown in the umask descriptions. The library provides a default umask per category +if not provided by the user. + +There is also the special response umask called \fBANY_RESPONSE\fR. When this umask +is used then it overrides any supplier and snoop umasks. In other words, users can +specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. + +In case no supplier or snoop is specified, the library defaults to using +\fBANY_RESPONSE\fR. + +For instance, the following are valid event selections: +.TP +.B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE +.TP +.B OFFCORE_RESPONSE_0:ANY_REQUEST +.TP +.B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY + +.P +But the following are illegal: + +.TP +.B OFFCORE_RESPONSE_0:ANY_RFO:NON_DRAM:ANY_RESPONSE +.TP +.B OFFCORE_RESPONSE_0:ANY_RFO:L2_HIT:SNOOP_ANY:ANY_RESPONSE + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP commit 4b0a11762f62f7763be31ade2d70ff0ae5f80fd4 Author: Vince Weaver Date: Fri Dec 6 10:17:18 2013 -0500 add missing Intel Haswell model numbers This patch adds support for a few more Haswell models: 63, 69, 70, 71. Signed-off-by: Vince Weaver diff --git a/lib/pfmlib_intel_hsw.c b/lib/pfmlib_intel_hsw.c index 7d01ff1..f4975f6 100644 --- a/lib/pfmlib_intel_hsw.c +++ b/lib/pfmlib_intel_hsw.c @@ -40,6 +40,10 @@ pfm_hsw_detect(void *this) switch (pfm_intel_x86_cfg.model) { case 60: /* Haswell */ + case 63: /* Haswell */ + case 69: /* Haswell */ + case 70: /* Haswell */ + case 71: /* Haswell */ break; default: return PFM_ERR_NOTSUPP; commit ec046652845877d46cc8c62d86f47325380fbaa1 Author: Andreas Beckmann Date: Thu Dec 12 22:46:19 2013 +0100 fix typos in IVB event descriptions Signed-off-by: Andreas Beckmann diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h index 3c5583e..407059b 100644 --- a/lib/events/intel_ivb_events.h +++ b/lib/events/intel_ivb_events.h @@ -1173,7 +1173,7 @@ static const intel_x86_umask_t ivb_other_assists[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "AVX_STORE", - .udesc = "Number of assists associated with 25-bit AVX stores", + .udesc = "Number of assists associated with 256-bit AVX stores", .ucode = 0x0800, .uflags= INTEL_X86_NCOMBO, }, @@ -1277,7 +1277,7 @@ static const intel_x86_umask_t ivb_uops_dispatched_port[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3", - .udesc = "Cycles in which a uop is disptached on port 3", + .udesc = "Cycles in which a uop is dispatched on port 3", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, commit a2eb1c8257b07ebc2f5e7a1ace8d005d0a7a08f0 Author: Steve Kaufmann Date: Mon Jan 13 14:19:16 2014 +0100 fix spelling mistakes in event descriptions Applied spell-checker on event descriptions. Signed-off-by: Steve Kaufmann diff --git a/lib/events/amd64_events_fam14h.h b/lib/events/amd64_events_fam14h.h index e975521..0cf11a9 100644 --- a/lib/events/amd64_events_fam14h.h +++ b/lib/events/amd64_events_fam14h.h @@ -121,7 +121,7 @@ static const amd64_umask_t amd64_fam14h_retired_serializing_ops[]={ static const amd64_umask_t amd64_fam14h_retired_x87_fpu_ops[]={ { .uname = "ADD_SUB_OPS", - .udesc = "Add/substract ops", + .udesc = "Add/subtract ops", .ucode = 0x1, }, { .uname = "MULT_OPS", @@ -1180,7 +1180,7 @@ static const amd64_entry_t amd64_fam14h_pe[]={ .umasks = amd64_fam14h_l1_dtlb_hit, }, { .name = "DCACHE_SW_PREFETCHES", - .desc = "Number of software prefetches that do not cuase an actual data cache refill", + .desc = "Number of software prefetches that do not cause an actual data cache refill", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x52, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam14h_dcache_sw_prefetches), diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h index 7872468..0b8c17b 100644 --- a/lib/events/amd64_events_fam15h.h +++ b/lib/events/amd64_events_fam15h.h @@ -1708,7 +1708,7 @@ static const amd64_umask_t amd64_fam15h_read_request_to_l3_cache[]={ .grpid = 0, }, { .uname = "PREFETCH", - .udesc = "Count prefetches honly", + .udesc = "Count prefetches only", .ucode = 0x8, .grpid = 0, }, diff --git a/lib/events/amd64_events_fam15h_nb.h b/lib/events/amd64_events_fam15h_nb.h index 5969eb6..82799f0 100644 --- a/lib/events/amd64_events_fam15h_nb.h +++ b/lib/events/amd64_events_fam15h_nb.h @@ -1711,7 +1711,7 @@ static const amd64_umask_t amd64_fam15h_nb_read_request_to_l3_cache[]={ .grpid = 0, }, { .uname = "PREFETCH", - .udesc = "Count prefetches honly", + .udesc = "Count prefetches only", .ucode = 0x8, .grpid = 0, }, diff --git a/lib/events/arm_1176_events.h b/lib/events/arm_1176_events.h index 35a43fa..d31d810 100644 --- a/lib/events/arm_1176_events.h +++ b/lib/events/arm_1176_events.h @@ -121,7 +121,7 @@ static const arm_entry_t arm_1176_pe []={ }, {.name = "PROC_RET_EXEC_PRED", .code = 0x25, - .desc = "Proceudre return instruction executed and address predicted" + .desc = "Procedure return instruction executed and address predicted" }, {.name = "PROC_RET_EXEC_PRED_INCORRECT", .code = 0x26, diff --git a/lib/events/arm_cortex_a8_events.h b/lib/events/arm_cortex_a8_events.h index 2b61dda..f5c99e5 100644 --- a/lib/events/arm_cortex_a8_events.h +++ b/lib/events/arm_cortex_a8_events.h @@ -86,7 +86,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ }, {.name = "PC_IMM_BRANCH", .code = 0x0d, - .desc = "Immedidate branches architecturally executed" + .desc = "Immediate branches architecturally executed" }, {.name = "PC_PROC_RETURN", .code = 0x0e, @@ -194,7 +194,7 @@ static const arm_entry_t arm_cortex_a8_pe []={ }, {.name = "OP_EXECUTED", .code = 0x55, - .desc = "Operations excuted (includes sub-ops in multi-cycle instructions)" + .desc = "Operations executed (includes sub-ops in multi-cycle instructions)" }, {.name = "CYCLES_INST_STALL", .code = 0x56, diff --git a/lib/events/arm_cortex_a9_events.h b/lib/events/arm_cortex_a9_events.h index c034bd3..ef5b337 100644 --- a/lib/events/arm_cortex_a9_events.h +++ b/lib/events/arm_cortex_a9_events.h @@ -86,7 +86,7 @@ static const arm_entry_t arm_cortex_a9_pe []={ }, {.name = "PC_IMM_BRANCH", .code = 0x0d, - .desc = "Immedidate branches architecturally executed" + .desc = "Immediate branches architecturally executed" }, {.name = "UNALIGNED_ACCESS", .code = 0x0f, diff --git a/lib/events/intel_atom_events.h b/lib/events/intel_atom_events.h index 0b72ad3..3b564be 100644 --- a/lib/events/intel_atom_events.h +++ b/lib/events/intel_atom_events.h @@ -85,7 +85,7 @@ static const intel_x86_umask_t atom_icache[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "MISSES", - .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", + .udesc = "Count all instructions fetches that miss the icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", .ucode = 0x200, }, }; diff --git a/lib/events/intel_coreduo_events.h b/lib/events/intel_coreduo_events.h index 16336f9..9e0cb5e 100644 --- a/lib/events/intel_coreduo_events.h +++ b/lib/events/intel_coreduo_events.h @@ -315,12 +315,12 @@ static const intel_x86_umask_t coreduo_sse_instructions_retired[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_DOUBLE", - .udesc = "Number of SSE/SSE2 packed double percision instructions retired", + .udesc = "Number of SSE/SSE2 packed double precision instructions retired", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DOUBLE", - .udesc = "Number of SSE/SSE2 scalar double percision instructions retired", + .udesc = "Number of SSE/SSE2 scalar double precision instructions retired", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, @@ -579,7 +579,7 @@ static const intel_x86_entry_t intel_coreduo_pe[]={ .umasks = coreduo_l2_lines_in, /* identical to actual umasks list for this event */ }, { .name = "L2_IFETCH", - .desc = "L2 instruction fetches from nstruction fetch unit (includes speculative fetches) ", + .desc = "L2 instruction fetches from instruction fetch unit (includes speculative fetches) ", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index ccd4a2b..0491fdd 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -26,7 +26,7 @@ static const intel_x86_umask_t hsw_baclears[]={ { .uname = "ANY", - .udesc = "NUmber of front-end re-steers due to BPU misprediction", + .udesc = "Number of front-end re-steers due to BPU misprediction", .ucode = 0x1f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1511,7 +1511,7 @@ static const intel_x86_umask_t hsw_hle_retired[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC4", - .udesc = "Number of times an HLE execution aborted due to incomptaible memory type", + .udesc = "Number of times an HLE execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, @@ -1554,7 +1554,7 @@ static const intel_x86_umask_t hsw_rtm_retired[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC4", - .udesc = "Number of times an RTM execution aborted due to incomptaible memory type", + .udesc = "Number of times an RTM execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, @@ -1779,7 +1779,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .code = 0xc5, }, { .name = "BACLEARS", - .desc = "Branch resteered", + .desc = "Branch re-steered", .code = 0xe6, .cntmsk = 0xff, .ngrp = 1, @@ -1934,7 +1934,7 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .umasks = hsw_inst_retired }, { .name = "INT_MISC", - .desc = "Miscelleanous interruptions", + .desc = "Miscellaneous interruptions", .code = 0xd, .cntmsk = 0xff, .ngrp = 1, diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h index 407059b..28e0216 100644 --- a/lib/events/intel_ivb_events.h +++ b/lib/events/intel_ivb_events.h @@ -510,7 +510,7 @@ static const intel_x86_umask_t ivb_idq[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS_OCCUR", - .udesc = "Occurences of DSB MS going active", + .udesc = "Occurrences of DSB MS going active", .uequiv = "MS_DSB_UOPS:c=1:e=1", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, @@ -861,7 +861,7 @@ static const intel_x86_umask_t ivb_ld_blocks[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "NO_SR", - .udesc = "Number of times that split load operations are temporarily blocked because all resources for handlding the split accesses are in use", + .udesc = "Number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, @@ -1553,7 +1553,7 @@ static const intel_x86_umask_t ivb_offcore_response[]={ static const intel_x86_umask_t ivb_baclears[]={ { .uname = "ANY", - .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1663,7 +1663,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .umasks = ivb_arith, }, { .name = "BACLEARS", - .desc = "Branch resteered", + .desc = "Branch re-steered", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xe6, @@ -1909,7 +1909,7 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .umasks = ivb_l2_l1d_wb_rqsts, }, { .name = "L2_LINES_IN", - .desc = "L2 lines alloacated", + .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xf1, diff --git a/lib/events/intel_netburst_events.h b/lib/events/intel_netburst_events.h index e24f22f..bf08b02 100644 --- a/lib/events/intel_netburst_events.h +++ b/lib/events/intel_netburst_events.h @@ -1495,7 +1495,7 @@ static const netburst_entry_t netburst_events[] = { /* 44 */ {.name = "machine_clear", - .desc = "Number of occurances when the entire " + .desc = "Number of occurrences when the entire " "pipeline of the machine is cleared", .event_select = 0x2, .escr_select = 0x5, @@ -1506,7 +1506,7 @@ static const netburst_entry_t netburst_events[] = { .desc = "Counts for a portion of the many cycles while the " "machine is cleared for any cause. Use edge-" "triggering for this bit only to get a count of " - "occurances versus a duration", + "occurrences versus a duration", .bit = 0, }, {.name = "MOCLEAR", diff --git a/lib/events/intel_nhm_events.h b/lib/events/intel_nhm_events.h index e47068f..265002d 100644 --- a/lib/events/intel_nhm_events.h +++ b/lib/events/intel_nhm_events.h @@ -61,7 +61,7 @@ static const intel_x86_umask_t nhm_baclear[]={ static const intel_x86_umask_t nhm_bpu_clears[]={ { .uname = "EARLY", - .udesc = "Early Branch Prediciton Unit clears", + .udesc = "Early Branch Prediction Unit clears", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, @@ -332,7 +332,7 @@ static const intel_x86_umask_t nhm_fp_assist[]={ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "INPUT", - .udesc = "Floating poiint assists for invalid input value (Precise Event)", + .udesc = "Floating point assists for invalid input value (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, @@ -740,7 +740,7 @@ static const intel_x86_umask_t nhm_l2_hw_prefetch[]={ static const intel_x86_umask_t nhm_l2_lines_in[]={ { .uname = "ANY", - .udesc = "L2 lines alloacated", + .udesc = "L2 lines allocated", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1976,7 +1976,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ .umasks = nhm_fp_assist, }, { .name = "FP_COMP_OPS_EXE", - .desc = "Floating poing computational micro-ops", + .desc = "Floating point computational micro-ops", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x10, @@ -2179,7 +2179,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ .umasks = nhm_l2_hw_prefetch, }, { .name = "L2_LINES_IN", - .desc = "L2 lines alloacated", + .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf1, @@ -2348,7 +2348,7 @@ static const intel_x86_entry_t intel_nhm_pe[]={ .code = 0x1b2, }, { .name = "PARTIAL_ADDRESS_ALIAS", - .desc = "False dependencies due to partial address froming", + .desc = "False dependencies due to partial address forming", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x107, diff --git a/lib/events/intel_nhm_unc_events.h b/lib/events/intel_nhm_unc_events.h index 513a730..6421a6b 100644 --- a/lib/events/intel_nhm_unc_events.h +++ b/lib/events/intel_nhm_unc_events.h @@ -29,15 +29,15 @@ static const intel_x86_umask_t nhm_unc_unc_dram_open[]={ { .uname = "CH0", - .udesc = "DRAM Channel 0 open comamnds issued for read or write", + .udesc = "DRAM Channel 0 open commands issued for read or write", .ucode = 0x100, }, { .uname = "CH1", - .udesc = "DRAM Channel 1 open comamnds issued for read or write", + .udesc = "DRAM Channel 1 open commands issued for read or write", .ucode = 0x200, }, { .uname = "CH2", - .udesc = "DRAM Channel 2 open comamnds issued for read or write", + .udesc = "DRAM Channel 2 open commands issued for read or write", .ucode = 0x400, }, }; @@ -878,7 +878,7 @@ static const intel_x86_entry_t intel_nhm_unc_pe[]={ .flags = INTEL_X86_FIXED, }, { .name = "UNC_DRAM_OPEN", - .desc = "DRAM open comamnds issued for read or write", + .desc = "DRAM open commands issued for read or write", .modmsk = NHM_UNC_ATTRS, .cntmsk = 0x1fe00000, .code = 0x60, diff --git a/lib/events/intel_p6_events.h b/lib/events/intel_p6_events.h index ba9512d..f8a83a8 100644 --- a/lib/events/intel_p6_events.h +++ b/lib/events/intel_p6_events.h @@ -168,7 +168,7 @@ static const intel_x86_entry_t intel_p6_pe[]={ .code = 0xc0, }, { .name = "DATA_MEM_REFS", - .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", + .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, @@ -237,7 +237,7 @@ static const intel_x86_entry_t intel_p6_pe[]={ .umasks = p6_l2_ifetch, }, { .name = "L2_ST", - .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, diff --git a/lib/events/intel_pii_events.h b/lib/events/intel_pii_events.h index 4bff1ec..5846c64 100644 --- a/lib/events/intel_pii_events.h +++ b/lib/events/intel_pii_events.h @@ -132,7 +132,7 @@ static const intel_x86_entry_t intel_pii_pe[]={ .code = 0xc0, }, { .name = "DATA_MEM_REFS", - .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", + .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, @@ -201,7 +201,7 @@ static const intel_x86_entry_t intel_pii_pe[]={ .umasks = pii_l2_ifetch, }, { .name = "L2_ST", - .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, diff --git a/lib/events/intel_pm_events.h b/lib/events/intel_pm_events.h index 4fa8795..e32a883 100644 --- a/lib/events/intel_pm_events.h +++ b/lib/events/intel_pm_events.h @@ -235,7 +235,7 @@ static const intel_x86_entry_t intel_pm_pe[]={ .code = 0xc0, }, { .name = "DATA_MEM_REFS", - .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", + .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, @@ -304,7 +304,7 @@ static const intel_x86_entry_t intel_pm_pe[]={ .umasks = pm_l2_ifetch, }, { .name = "L2_ST", - .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, diff --git a/lib/events/intel_ppro_events.h b/lib/events/intel_ppro_events.h index 63ad7b7..0555652 100644 --- a/lib/events/intel_ppro_events.h +++ b/lib/events/intel_ppro_events.h @@ -73,7 +73,7 @@ static const intel_x86_entry_t intel_ppro_pe[]={ .code = 0xc0, }, { .name = "DATA_MEM_REFS", - .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performe, is only counted once). Does ot include I/O accesses or other non-memory accesses", + .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, @@ -142,7 +142,7 @@ static const intel_x86_entry_t intel_ppro_pe[]={ .umasks = ppro_l2_ifetch, }, { .name = "L2_ST", - .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indictes that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified reqyests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", + .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h index c540e64..558dbf8 100644 --- a/lib/events/intel_slm_events.h +++ b/lib/events/intel_slm_events.h @@ -32,7 +32,7 @@ static const intel_x86_umask_t slm_icache[]={ .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "MISSES", - .udesc = "Count all instructions fetches that miss tha icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", + .udesc = "Count all instructions fetches that miss the icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, @@ -97,7 +97,7 @@ static const intel_x86_umask_t slm_inst_retired[]={ static const intel_x86_umask_t slm_l2_reject_xq[]={ { .uname = "ALL", - .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indictes back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", + .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", .ucode = 0x000, .uflags= INTEL_X86_DFL, }, @@ -345,7 +345,7 @@ static const intel_x86_umask_t slm_rehabq[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "STA_FULL", - .udesc = "Number of retired stores that are delayed becuase there is not a store address buffer available", + .udesc = "Number of retired stores that are delayed because there is not a store address buffer available", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, @@ -625,7 +625,7 @@ static const intel_x86_umask_t slm_ms_decoded[]={ static const intel_x86_umask_t slm_decode_restriction[]={ { .uname = "PREDECODE_WRONG", - .udesc = "Number of times the prediction (from the predecode cache) for intruction length is incorrect", + .udesc = "Number of times the prediction (from the predecode cache) for instruction length is incorrect", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, diff --git a/lib/events/intel_snb_events.h b/lib/events/intel_snb_events.h index 68e8d2d..11937ea 100644 --- a/lib/events/intel_snb_events.h +++ b/lib/events/intel_snb_events.h @@ -571,7 +571,7 @@ static const intel_x86_umask_t snb_idq[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS_OCCUR", - .udesc = "Occurences of DSB MS going active", + .udesc = "Occurrences of DSB MS going active", .uequiv = "MS_DSB_UOPS:c=1:e=1", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, @@ -1408,17 +1408,17 @@ static const intel_x86_umask_t snb_uops_dispatched_port[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3_LD", - .udesc = "Cycles in which a load uop is disptached on port 3", + .udesc = "Cycles in which a load uop is dispatched on port 3", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3_STA", - .udesc = "Cycles in which a store uop is disptached on port 3", + .udesc = "Cycles in which a store uop is dispatched on port 3", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3", - .udesc = "Cycles in which a uop is disptached on port 3", + .udesc = "Cycles in which a uop is dispatched on port 3", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, @@ -1683,7 +1683,7 @@ static const intel_x86_umask_t snb_offcore_response[]={ static const intel_x86_umask_t snb_baclears[]={ { .uname = "ANY", - .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1742,7 +1742,7 @@ static const intel_x86_entry_t intel_snb_pe[]={ .umasks = snb_arith, }, { .name = "BACLEARS", - .desc = "Branch resteered", + .desc = "Branch re-steered", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xe6, @@ -2033,7 +2033,7 @@ static const intel_x86_entry_t intel_snb_pe[]={ .umasks = snb_l2_l1d_wb_rqsts, }, { .name = "L2_LINES_IN", - .desc = "L2 lines alloacated", + .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xf1, diff --git a/lib/events/intel_snbep_events.h b/lib/events/intel_snbep_events.h index 0df2b5c..17b51c6 100644 --- a/lib/events/intel_snbep_events.h +++ b/lib/events/intel_snbep_events.h @@ -571,7 +571,7 @@ static const intel_x86_umask_t snbep_idq[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS_OCCUR", - .udesc = "Occurences of DSB MS going active", + .udesc = "Occurrences of DSB MS going active", .uequiv = "MS_DSB_UOPS:c=1:e=1", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, @@ -1421,17 +1421,17 @@ static const intel_x86_umask_t snbep_uops_dispatched_port[]={ .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3_LD", - .udesc = "Cycles in which a load uop is disptached on port 3", + .udesc = "Cycles in which a load uop is dispatched on port 3", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3_STA", - .udesc = "Cycles in which a store uop is disptached on port 3", + .udesc = "Cycles in which a store uop is dispatched on port 3", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT_3", - .udesc = "Cycles in which a uop is disptached on port 3", + .udesc = "Cycles in which a uop is dispatched on port 3", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, @@ -1695,7 +1695,7 @@ static const intel_x86_umask_t snbep_offcore_response[]={ static const intel_x86_umask_t snbep_baclears[]={ { .uname = "ANY", - .udesc = "Counts the number of times the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", + .udesc = "Counts the number of times the front end is re-steered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1754,7 +1754,7 @@ static const intel_x86_entry_t intel_snbep_pe[]={ .umasks = snbep_arith, }, { .name = "BACLEARS", - .desc = "Branch resteered", + .desc = "Branch re-steered", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xe6, @@ -2045,7 +2045,7 @@ static const intel_x86_entry_t intel_snbep_pe[]={ .umasks = snbep_l2_l1d_wb_rqsts, }, { .name = "L2_LINES_IN", - .desc = "L2 lines alloacated", + .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xff, .code = 0xf1, diff --git a/lib/events/intel_snbep_unc_cbo_events.h b/lib/events/intel_snbep_unc_cbo_events.h index 125c5ad..b2f0878 100644 --- a/lib/events/intel_snbep_unc_cbo_events.h +++ b/lib/events/intel_snbep_unc_cbo_events.h @@ -641,7 +641,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ .umasks = snbep_unc_c_llc_victims, }, { .name = "UNC_C_MISC", - .desc = "Miscelleanous C-Box events", + .desc = "Miscellaneous C-Box events", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x39, @@ -659,7 +659,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ .umasks = snbep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_AK_USED", - .desc = "Acknowledgement ring in use. Counts number of cycles ring is being used at this ring stop", + .desc = "Acknowledgment ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xc, .code = 0x1c, diff --git a/lib/events/intel_wsm_events.h b/lib/events/intel_wsm_events.h index 07dffb0..d6f59cb 100644 --- a/lib/events/intel_wsm_events.h +++ b/lib/events/intel_wsm_events.h @@ -299,7 +299,7 @@ static const intel_x86_umask_t wsm_dtlb_load_misses[]={ static const intel_x86_umask_t wsm_l2_lines_in[]={ { .uname = "ANY", - .udesc = "L2 lines alloacated", + .udesc = "L2 lines allocated", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, @@ -1993,7 +1993,7 @@ static const intel_x86_entry_t intel_wsm_pe[]={ .umasks = wsm_dtlb_load_misses, }, { .name = "L2_LINES_IN", - .desc = "L2 lines alloacated", + .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf1, diff --git a/lib/events/intel_wsm_unc_events.h b/lib/events/intel_wsm_unc_events.h index fa76e26..66fd961 100644 --- a/lib/events/intel_wsm_unc_events.h +++ b/lib/events/intel_wsm_unc_events.h @@ -29,15 +29,15 @@ static const intel_x86_umask_t wsm_unc_unc_dram_open[]={ { .uname = "CH0", - .udesc = "DRAM Channel 0 open comamnds issued for read or write", + .udesc = "DRAM Channel 0 open commands issued for read or write", .ucode = 0x100, }, { .uname = "CH1", - .udesc = "DRAM Channel 1 open comamnds issued for read or write", + .udesc = "DRAM Channel 1 open commands issued for read or write", .ucode = 0x200, }, { .uname = "CH2", - .udesc = "DRAM Channel 2 open comamnds issued for read or write", + .udesc = "DRAM Channel 2 open commands issued for read or write", .ucode = 0x400, }, }; @@ -925,7 +925,7 @@ static const intel_x86_entry_t intel_wsm_unc_pe[]={ .flags = INTEL_X86_FIXED, }, { .name = "UNC_DRAM_OPEN", - .desc = "DRAM open comamnds issued for read or write", + .desc = "DRAM open commands issued for read or write", .modmsk = NHM_UNC_ATTRS, .cntmsk = 0x1fe00000, .code = 0x60, @@ -1334,7 +1334,7 @@ static const intel_x86_entry_t intel_wsm_unc_pe[]={ .umasks = wsm_unc_unc_thermal_throttling_temp, /* identical to actual umasks list for this event */ }, { .name = "UNC_PROCHOT_ASSERTION", - .desc = "Number of system ssertions of PROCHOT indicating the entire processor has exceeded the thermal limit", + .desc = "Number of system assertions of PROCHOT indicating the entire processor has exceeded the thermal limit", .modmsk = NHM_UNC_ATTRS, .cntmsk = 0x1fe00000, .code = 0x82, diff --git a/lib/events/intel_x86_arch_events.h b/lib/events/intel_x86_arch_events.h index 0b9ff6a..c8c862b 100644 --- a/lib/events/intel_x86_arch_events.h +++ b/lib/events/intel_x86_arch_events.h @@ -42,7 +42,7 @@ static intel_x86_entry_t intel_x86_arch_pe[]={ {.name = "UNHALTED_REFERENCE_CYCLES", .code = 0x013c, .cntmsk = 0x400000000ull, /* temporary */ - .desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core freqeuncy changes due to performance state transitions", + .desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core frequency changes due to performance state transitions", }, {.name = "LLC_REFERENCES", .code = 0x4f2e, diff --git a/lib/events/mips_74k_events.h b/lib/events/mips_74k_events.h index 399da6e..523627b 100644 --- a/lib/events/mips_74k_events.h +++ b/lib/events/mips_74k_events.h @@ -224,7 +224,7 @@ static const mips_entry_t mips_74k_pe []={ { .name = "DCACHE_MISSES", .code = 0x98, - .desc = "D-cache misses. This count is per instruction at grad- uation and includes load, store, prefetch, synci and address based cacheops", + .desc = "D-cache misses. This count is per instruction at graduation and includes load, store, prefetch, synci and address based cacheops", }, { .name = "JTLB_DATA_ACCESSES", @@ -244,7 +244,7 @@ static const mips_entry_t mips_74k_pe []={ { .name = "DCACHE_VTAG_MISMATCH", .code = 0x9a, - .desc = "The 74K core's D-cache has an auxiliary virtual tag, used to pick the right line early. When (occasionally) the physical tag match and virtual tag match do not line up, it is treated as a cache miss - in processing the miss the virtual tag is correcyed for future accesses. This event counts those bogus misses", + .desc = "The 74K core's D-cache has an auxiliary virtual tag, used to pick the right line early. When (occasionally) the physical tag match and virtual tag match do not line up, it is treated as a cache miss - in processing the miss the virtual tag is corrected for future accesses. This event counts those bogus misses", }, { .name = "L2_CACHE_WRITEBACKS", @@ -349,7 +349,7 @@ static const mips_entry_t mips_74k_pe []={ { .name = "INTEGER_INSNS", .code = 0x28, - .desc = "Integer instructions graduated (includes nop, ssnop, ehb as well as all arithmetic, locial, shift and extract type operations)", + .desc = "Integer instructions graduated (includes nop, ssnop, ehb as well as all arithmetic, logical, shift and extract type operations)", }, { .name = "FPU_INSNS", diff --git a/lib/events/sparc_ultra3_events.h b/lib/events/sparc_ultra3_events.h index a074e3a..c19097d 100644 --- a/lib/events/sparc_ultra3_events.h +++ b/lib/events/sparc_ultra3_events.h @@ -20,7 +20,7 @@ static const sparc_entry_t ultra3_pe[] = { }, { .name = "IC_ref", - .desc = "I-cache refrences", + .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, @@ -84,13 +84,13 @@ static const sparc_entry_t ultra3_pe[] = { }, { .name = "Rstall_storeQ", - .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", + .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, @@ -204,7 +204,7 @@ static const sparc_entry_t ultra3_pe[] = { }, { .name = "Rstall_FP_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S1, .code = 0xb, }, diff --git a/lib/events/sparc_ultra3i_events.h b/lib/events/sparc_ultra3i_events.h index a8ce584..f81213e 100644 --- a/lib/events/sparc_ultra3i_events.h +++ b/lib/events/sparc_ultra3i_events.h @@ -20,7 +20,7 @@ static const sparc_entry_t ultra3i_pe[] = { }, { .name = "IC_ref", - .desc = "I-cache refrences", + .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, @@ -84,13 +84,13 @@ static const sparc_entry_t ultra3i_pe[] = { }, { .name = "Rstall_storeQ", - .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", + .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, @@ -204,7 +204,7 @@ static const sparc_entry_t ultra3i_pe[] = { }, { .name = "Rstall_FP_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S1, .code = 0xb, }, @@ -383,7 +383,7 @@ static const sparc_entry_t ultra3i_pe[] = { /* PIC1 events specific to UltraSPARC-III+/IIIi */ { .name = "Re_DC_missovhd", - .desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", + .desc = "Used to measure D-cache stall counts separately for L2-cache hits and misses. This counter is used with the recirculation and cache access events to separately calculate the D-cache loads that hit and miss the L2-cache", .ctrl = PME_CTRL_S1, .code = 0x4, }, diff --git a/lib/events/sparc_ultra3plus_events.h b/lib/events/sparc_ultra3plus_events.h index 04de4a6..b1dc8ca 100644 --- a/lib/events/sparc_ultra3plus_events.h +++ b/lib/events/sparc_ultra3plus_events.h @@ -20,7 +20,7 @@ static const sparc_entry_t ultra3plus_pe[] = { }, { .name = "IC_ref", - .desc = "I-cache refrences", + .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, @@ -84,13 +84,13 @@ static const sparc_entry_t ultra3plus_pe[] = { }, { .name = "Rstall_storeQ", - .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", + .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, @@ -204,7 +204,7 @@ static const sparc_entry_t ultra3plus_pe[] = { }, { .name = "Rstall_FP_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S1, .code = 0xb, }, @@ -421,7 +421,7 @@ static const sparc_entry_t ultra3plus_pe[] = { /* PIC1 events specific to UltraSPARC-III+/IIIi processors */ { .name = "Re_DC_missovhd", - .desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", + .desc = "Used to measure D-cache stall counts separately for L2-cache hits and misses. This counter is used with the recirculation and cache access events to separately calculate the D-cache loads that hit and miss the L2-cache", .ctrl = PME_CTRL_S1, .code = 0x4, }, diff --git a/lib/events/sparc_ultra4plus_events.h b/lib/events/sparc_ultra4plus_events.h index 47512da..ca473a4 100644 --- a/lib/events/sparc_ultra4plus_events.h +++ b/lib/events/sparc_ultra4plus_events.h @@ -32,13 +32,13 @@ static const sparc_entry_t ultra4plus_pe[] = { }, { .name = "Rstall_storeQ", - .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", + .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, @@ -50,7 +50,7 @@ static const sparc_entry_t ultra4plus_pe[] = { }, { .name = "IC_ref", - .desc = "I-cache refrences", + .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, @@ -62,7 +62,7 @@ static const sparc_entry_t ultra4plus_pe[] = { }, { .name = "Rstall_FP_use", - .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", + .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0xa, }, @@ -179,7 +179,7 @@ static const sparc_entry_t ultra4plus_pe[] = { }, { .name = "IPB_to_IC_fill", - .desc = "I-cache filles from the instruction prefetch buffer", + .desc = "I-cache fills from the instruction prefetch buffer", .ctrl = PME_CTRL_S0, .code = 0x1e, }, @@ -577,7 +577,7 @@ static const sparc_entry_t ultra4plus_pe[] = { }, { .name = "L3_miss", - .desc = "Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (exclusing block stores) requests", + .desc = "Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (excluding block stores) requests", .ctrl = PME_CTRL_S1, .code = 0x31, }, commit e799ae1bafabe88d7a63787edd42953850cba676 Author: Steve Kaufmann Date: Mon Jan 13 14:21:09 2014 +0100 fix PMU name description typo in comment Signed-off-by: Steve Kaufmann diff --git a/lib/events/intel_snbep_events.h b/lib/events/intel_snbep_events.h index 17b51c6..a9d88f5 100644 --- a/lib/events/intel_snbep_events.h +++ b/lib/events/intel_snbep_events.h @@ -24,7 +24,7 @@ * * This file has been automatically generated. * - * PMU: snb (Intel Sandy Bridge EP) + * PMU: snb_ep (Intel Sandy Bridge EP) */ static const intel_x86_umask_t snbep_agu_bypass_cancel[]={ commit bca43a50f9249df79db82e149de2a8d846eb6d45 Author: Stephane Eranian Date: Mon Jan 13 16:21:44 2014 +0100 add support for RAPL GPU energy counter This patch adds a new RAPL event to measure builtin GPU energy consumption on Intel SNB/IVB/HSW client processors. Requires a kernel with RAPL support in perf_events such as Linux v3.13 or later. New event: rapl::rapl_energy_gpu Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c index 4570d65..1215731 100644 --- a/lib/pfmlib_intel_rapl.c +++ b/lib/pfmlib_intel_rapl.c @@ -51,7 +51,12 @@ extern pfmlib_pmu_t intel_rapl_support; } static const intel_x86_entry_t intel_rapl_cln_pe[]={ - RAPL_COMMON_EVENTS + RAPL_COMMON_EVENTS, + { .name = "RAPL_ENERGY_GPU", + .desc = "Number of Joules consumed by the builtin GPU. Unit is 2^-32 Joules", + .cntmsk = 0x8, + .code = 0x4, + } }; static const intel_x86_entry_t intel_rapl_srv_pe[]={ commit 137067de9afc9c5bbb17d0057756c106f7898029 Author: Stephane Eranian Date: Thu Feb 6 12:08:18 2014 +0100 add INTEL_X86_GRP_DFL_NONE umask flag When this flag is set for a umask, it means the umask group (grpid) does not require at least one umask to be set. In other words, umasks in the group need to be separated from the others, but it is okay if none is specified. Normally, at least one umask per group is required. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c index 9813e1c..7233e77 100644 --- a/lib/pfmlib_intel_snbep_unc.c +++ b/lib/pfmlib_intel_snbep_unc.c @@ -149,9 +149,19 @@ snbep_unc_add_defaults(void *this, pfmlib_event_desc_t *e, continue; } + if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) { + skip = 1; + continue; + } + /* umask is default for group */ if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) { - DPRINT("added default %s for group %d j=%d idx=%d\n", ent->umasks[idx].uname, i, j, idx); + DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n", + ent->umasks[idx].uname, + i, + j, + idx, + ent->umasks[idx].ucode); /* * default could be an alias, but * ucode must reflect actual code @@ -181,7 +191,7 @@ snbep_unc_add_defaults(void *this, pfmlib_event_desc_t *e, return PFM_ERR_UMASK; } } - DPRINT("max_grpid=%d nattrs=%d k=%d\n", max_grpid, e->nattrs, k); + DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask); done: e->nattrs = k; return PFM_SUCCESS; diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c index baa9f63..cfa1730 100644 --- a/lib/pfmlib_intel_x86.c +++ b/lib/pfmlib_intel_x86.c @@ -215,9 +215,19 @@ pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, continue; } + if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) { + skip = 1; + continue; + } + /* umask is default for group */ if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) { - DPRINT("added default %s for group %d j=%d idx=%d\n", ent->umasks[idx].uname, i, j, idx); + DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n", + ent->umasks[idx].uname, + i, + j, + idx, + ent->umasks[idx].ucode); /* * default could be an alias, but * ucode must reflect actual code @@ -246,7 +256,7 @@ pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, return PFM_ERR_UMASK; } } - DPRINT("max_grpid=%d nattrs=%d k=%d\n", max_grpid, e->nattrs, k); + DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask); done: e->nattrs = k; return PFM_SUCCESS; diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h index 0f0c1f5..95a7565 100644 --- a/lib/pfmlib_intel_x86_priv.h +++ b/lib/pfmlib_intel_x86_priv.h @@ -86,6 +86,7 @@ typedef struct { #define INTEL_X86_NO_AUTOENCODE 0x100 /* does not support auto encoding validation */ #define INTEL_X86_CODE_OVERRIDE 0x200 /* umask overrides event code */ #define INTEL_X86_LDLAT 0x400 /* needs load latency modifier (ldlat) */ +#define INTEL_X86_GRP_DFL_NONE 0x800 /* ok if umask group defaults to no umask */ typedef union pfm_intel_x86_reg { unsigned long long val; /* complete register value */ commit 02e07d2585015d5c2eecdc47a35b07fdc21035cb Author: Stephane Eranian Date: Thu Feb 6 12:30:33 2014 +0100 fix SNBEP uncore default umask settings The routine snbep_unc_add_defaults() is already shifting the raw umask code by 8, so do not shift it by 8 a second time in the encoding routine, otherwise it will be zero. Also combine the default with what is already set for the other umask groups. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c index 7233e77..7ca86ca 100644 --- a/lib/pfmlib_intel_snbep_unc.c +++ b/lib/pfmlib_intel_snbep_unc.c @@ -424,8 +424,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) ret = snbep_unc_add_defaults(this, e, ugrpmsk, &um, &filter, max_grpid); if (ret != PFM_SUCCESS) return ret; - um >>= 8; - umask2 = um; + umask2 |= um; } /* commit 4f070bd5a858c9ce57528018c50798d08811166b Author: Stephane Eranian Date: Thu Feb 6 12:32:15 2014 +0100 fix SNBEP-UNC CBOX UNC_C_LLC_LOOKUP umasks groups NID must qualify another umask. So put them in separate groups. Make DATA_READS the default umask for first group. Update validation test suite accordingly. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_snbep_unc_cbo_events.h b/lib/events/intel_snbep_unc_cbo_events.h index b2f0878..40d3e37 100644 --- a/lib/events/intel_snbep_unc_cbo_events.h +++ b/lib/events/intel_snbep_unc_cbo_events.h @@ -165,7 +165,7 @@ static const intel_x86_umask_t snbep_unc_c_llc_lookup[]={ { .uname = "DATA_READ", .udesc = "Data read requests", .grpid = 0, - .uflags = INTEL_X86_NCOMBO, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .ucode = 0x300, }, { .uname = "WRITE", @@ -182,12 +182,12 @@ static const intel_x86_umask_t snbep_unc_c_llc_lookup[]={ }, { .uname = "NID", .udesc = "Match a given RTID destination NID", - .uflags = INTEL_X86_NCOMBO, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF, - .grpid = 0, + .grpid = 1, .ucode = 0x4100, }, - CBO_FILT_MESIFS(1), + CBO_FILT_MESIFS(2), }; static const intel_x86_umask_t snbep_unc_c_llc_victims[]={ @@ -626,7 +626,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .cntmsk = 0x3, .code = 0x34, - .ngrp = 2, + .ngrp = 3, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_llc_lookup), .umasks = snbep_unc_c_llc_lookup, diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 8dc222b..d0a6666 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1269,17 +1269,17 @@ static const test_event_t x86_test_events[]={ .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:nf=3", .ret = PFM_SUCCESS, .count = 2, - .codes[0] = 0x4134, + .codes[0] = 0x4334, .codes[1] = 0x7c0c00, - .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_MESIF:e=0:i=0:t=0:tf=0:nf=3", + .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:NID:STATE_MESIF:e=0:i=0:t=0:tf=0:nf=3", }, { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_M:nf=3", .ret = PFM_SUCCESS, .count = 2, - .codes[0] = 0x4134, + .codes[0] = 0x4334, .codes[1] = 0x200c00, - .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_M:e=0:i=0:t=0:tf=0:nf=3", + .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:NID:STATE_M:e=0:i=0:t=0:tf=0:nf=3", }, { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:nf=3:tid=1", commit 9b6576c74879450618ac8cb65342578401dbf937 Author: Stephane Eranian Date: Tue Feb 18 18:56:35 2014 +0100 add a match_event() pmu callback This patch adds the match_event() pmu callback. This is an optional callback. It can be used by PMU models to implement specific event match algorithm. It operates only on the event name, not the attributes. Signed-off-by: Stephane Eranian diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index 861f475..b9d1d67 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -1026,10 +1026,17 @@ pfmlib_release_event(pfmlib_event_desc_t *e) } static int +match_event(void *this, pfmlib_event_desc_t *d, const char *e, const char *s) +{ + return strcasecmp(e, s); +} + +static int pfmlib_parse_equiv_event(const char *event, pfmlib_event_desc_t *d) { pfmlib_pmu_t *pmu = d->pmu; pfm_event_info_t einfo; + int (*match)(void *this, pfmlib_event_desc_t *d, const char *e, const char *s); char *str, *s, *p; int i; int ret; @@ -1045,11 +1052,13 @@ pfmlib_parse_equiv_event(const char *event, pfmlib_event_desc_t *d) if (p) *p++ = '\0'; + match = pmu->match_event ? pmu->match_event : match_event; + pfmlib_for_each_pmu_event(pmu, i) { ret = pmu->get_event_info(pmu, i, &einfo); if (ret != PFM_SUCCESS) goto error; - if (!strcasecmp(einfo.name, s)) + if (!match(pmu, d, einfo.name, s)) goto found; } free(str); @@ -1085,6 +1094,7 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) pfm_event_info_t einfo; char *str, *s, *p; pfmlib_pmu_t *pmu; + int (*match)(void *this, pfmlib_event_desc_t *d, const char *e, const char *s); const char *pname = NULL; int i, j, ret; @@ -1138,6 +1148,8 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) */ if (pname && !pfmlib_pmu_active(pmu) && !pfm_cfg.inactive) continue; + + match = pmu->match_event ? pmu->match_event : match_event; /* * for each event */ @@ -1145,7 +1157,7 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) ret = pmu->get_event_info(pmu, i, &einfo); if (ret != PFM_SUCCESS) goto error; - if (!strcasecmp(einfo.name, s)) + if (!match(pmu, d, einfo.name, s)) goto found; } } diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index 287222d..aa7f819 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -132,8 +132,12 @@ typedef struct pfmlib_pmu { void (*validate_pattrs[PFM_OS_MAX])(void *this, pfmlib_event_desc_t *e); int (*os_detect[PFM_OS_MAX])(void *this); int (*validate_table)(void *this, FILE *fp); - int (*get_num_events)(void *this); /* optional */ - void (*display_reg)(void *this, pfmlib_event_desc_t *e, void *val); /* optional */ + /* + * optional callbacks + */ + int (*get_num_events)(void *this); + void (*display_reg)(void *this, pfmlib_event_desc_t *e, void *val); + int (*match_event)(void *this, pfmlib_event_desc_t *d, const char *e, const char *s); } pfmlib_pmu_t; typedef struct { commit 13dab55b0c963a027dbe33c7434efe07433346f5 Author: Stephane Eranian Date: Tue Feb 18 17:33:22 2014 +0100 add perf_event RAW event syntax support This support is integrated via a pseudo PMU called perf_raw. It provides no events, but instead just the necessary callbacks to match evnet codes specified in hex using the following syntax: rX. Where X is a 64-bit hexadecimal value. It can be followed by the usual attributes supported by perf_events: $ perf_examples/task -e r01c4:u,cycles foo This pseudo PMU is only available on Linux. Signed-off-by: Stephane Eranian diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 5434a96..76bca1b 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -197,6 +197,8 @@ typedef enum { PFM_PMU_INTEL_SLM, /* Intel Silvermont */ PFM_PMU_AMD64_FAM15H_NB, /* AMD AMD64 Fam15h NorthBridge */ + PFM_PMU_PERF_EVENT_RAW, /* perf_events RAW event syntax */ + /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/lib/Makefile b/lib/Makefile index 02dcfbb..e4530bf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,7 +30,7 @@ include $(TOPDIR)/rules.mk SRCS=pfmlib_common.c ifeq ($(SYS),Linux) -SRCS += pfmlib_perf_event_pmu.c pfmlib_perf_event.c +SRCS += pfmlib_perf_event_pmu.c pfmlib_perf_event.c pfmlib_perf_event_raw.c endif CFLAGS+=-D_REENTRANT -I. diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index b9d1d67..bd2f49f 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -170,6 +170,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= #endif #ifdef __linux__ &perf_event_support, + &perf_event_raw_support, #endif }; #define PFMLIB_NUM_PMUS (int)(sizeof(pfmlib_pmus)/sizeof(pfmlib_pmu_t *)) diff --git a/lib/pfmlib_perf_event_raw.c b/lib/pfmlib_perf_event_raw.c new file mode 100644 index 0000000..e10d215 --- /dev/null +++ b/lib/pfmlib_perf_event_raw.c @@ -0,0 +1,175 @@ +/* + * pfmlib_perf_events_raw.c: support for raw event syntax + * + * Copyright (c) 2014 Google, Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +#include "pfmlib_priv.h" +#include "pfmlib_perf_event_priv.h" + +static int +pfm_perf_raw_detect(void *this) +{ +#ifdef __linux__ + /* ought to find a better way of detecting PERF */ +#define PERF_OLD_PROC_FILE "/proc/sys/kernel/perf_counter_paranoid" +#define PERF_PROC_FILE "/proc/sys/kernel/perf_event_paranoid" + return !(access(PERF_PROC_FILE, F_OK) + && access(PERF_OLD_PROC_FILE, F_OK)) ? PFM_SUCCESS: PFM_ERR_NOTSUPP; +#else + return PFM_SUCCESS; +#endif +} + +static int +pfm_perf_raw_get_event_first(void *this) +{ + return 0; +} + +static int +pfm_perf_raw_get_event_next(void *this, int idx) +{ + /* only one pseudo event */ + return -1; +} + +static int +pfm_perf_raw_get_encoding(void *this, pfmlib_event_desc_t *e) +{ + /* + * actual enoding done in pfm_perf_raw_match_event() + */ + e->fstr[0] = '\0'; + evt_strcat(e->fstr, "r%"PRIx64, e->codes[0]); + return PFM_SUCCESS; +} + +static int +pfm_perf_raw_get_perf_encoding(void *this, pfmlib_event_desc_t *e) +{ + struct perf_event_attr *attr; + + attr = e->os_data; + attr->type = PERF_TYPE_RAW; + attr->config = e->codes[0]; + + attr->config1 = e->codes[1]; + attr->config2 = e->codes[2]; + + return PFM_SUCCESS; +} + +static int +pfm_perf_raw_event_is_valid(void *this, int idx) +{ + return idx == 0; +} + +static int +pfm_perf_raw_get_event_attr_info(void *this, int idx, int attr_idx, pfm_event_attr_info_t *info) +{ + return PFM_ERR_ATTR; +} + +static int +pfm_perf_raw_get_event_info(void *this, int idx, pfm_event_info_t *info) +{ + pfmlib_pmu_t *pmu = this; + + info->name = "r0000"; + info->desc = "perf_events raw event syntax: r[0-9a-fA-F]+", + info->code = 0; + info->equiv = NULL; + info->idx = 0; + info->pmu = pmu->pmu; + info->is_precise = 0; + + /* unit masks + modifiers */ + info->nattrs = 0; + + return PFM_SUCCESS; +} + +static unsigned int +pfm_perf_raw_get_event_nattrs(void *this, int idx) +{ + return 0; +} + +/* + * remove attrs which are in conflicts (or duplicated) with os layer + */ +static void +pfm_perf_raw_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) +{ +} + +/* + * returns 0 if match (like strcmp()) + */ +static int +pfm_perf_raw_match_event(void *this, pfmlib_event_desc_t *d, const char *e, const char *s) +{ + uint64_t code; + int ret; + + if (*s != 'r' || !isxdigit(*(s+1))) + return 1; + + ret = sscanf(s+1, "%"PRIx64, &code); + if (ret != 1) + return 1; + /* + * stash code in final position + */ + d->codes[0] = code; + d->count = 1; + + return 0; +} + +pfmlib_pmu_t perf_event_raw_support={ + .desc = "perf_events raw PMU", + .name = "perf_raw", + .pmu = PFM_PMU_PERF_EVENT_RAW, + .pme_count = 1, + .type = PFM_PMU_TYPE_OS_GENERIC, + .max_encoding = 1, + .supported_plm = PERF_PLM_ALL, + .pmu_detect = pfm_perf_raw_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_perf_raw_get_encoding, + PFMLIB_ENCODE_PERF(pfm_perf_raw_get_perf_encoding), + .get_event_first = pfm_perf_raw_get_event_first, + .get_event_next = pfm_perf_raw_get_event_next, + .event_is_valid = pfm_perf_raw_event_is_valid, + .get_event_info = pfm_perf_raw_get_event_info, + .get_event_attr_info = pfm_perf_raw_get_event_attr_info, + .get_event_nattrs = pfm_perf_raw_get_event_nattrs, + .match_event = pfm_perf_raw_match_event, + PFMLIB_VALID_PERF_PATTRS(pfm_perf_raw_perf_validate_pattrs), +}; diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index aa7f819..e1a2458 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -288,6 +288,7 @@ extern pfmlib_pmu_t sparc_niagara1_support; extern pfmlib_pmu_t sparc_niagara2_support; extern pfmlib_pmu_t cell_support; extern pfmlib_pmu_t perf_event_support; +extern pfmlib_pmu_t perf_event_raw_support; extern pfmlib_pmu_t intel_wsm_sp_support; extern pfmlib_pmu_t intel_wsm_dp_support; extern pfmlib_pmu_t intel_wsm_unc_support; commit 6c21b52f3fd6815505fe2ed68f82a02d44fde6cb Author: Stephane Eranian Date: Wed Feb 19 15:01:32 2014 +0100 create a per-system man variable in man pages Makefile This patches add SYS_MAN to separate system-specific man pages. For instance, the perf_events entry points are only available on Linux. Signed-off-by: Stephane Eranian diff --git a/docs/Makefile b/docs/Makefile index 45180d5..bbc73c6 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -28,6 +28,7 @@ include $(TOPDIR)/rules.mk .PHONY: all clean distclean depend ARCH_MAN= +SYS_MAN= ifeq ($(CONFIG_PFMLIB_ARCH_X86),y) ARCH_MAN=libpfm_intel_core.3 \ @@ -81,14 +81,17 @@ GEN_MAN= libpfm.3 \ pfm_get_event_encoding.3 \ pfm_get_event_next.3 \ pfm_get_pmu_info.3 \ - pfm_get_perf_event_encoding.3 \ pfm_get_os_event_encoding.3 \ pfm_get_version.3 \ pfm_initialize.3 \ pfm_terminate.3 \ pfm_strerror.3 -MAN=$(GEN_MAN) $(ARCH_MAN) +ifeq ($(SYS),Linux) +SYS_MAN=pfm_get_perf_event_encoding.3 +endif + +MAN=$(GEN_MAN) $(ARCH_MAN) $(SYS_MAN) install: commit 1703fb2f259c330707373b3badb5192d6a193c1c Author: Stephane Eranian Date: Wed Feb 19 15:53:46 2014 +0100 add man page for perf_events RAW PMU Add the man page for the perf_events pseudo PMU to pass raw events syntax. Signed-off-by: Stephane Eranian diff --git a/docs/Makefile b/docs/Makefile index bbc73c6..330c26b 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -88,7 +88,7 @@ GEN_MAN= libpfm.3 \ pfm_strerror.3 ifeq ($(SYS),Linux) -SYS_MAN=pfm_get_perf_event_encoding.3 +SYS_MAN=pfm_get_perf_event_encoding.3 libpfm_perf_event_raw.3 endif MAN=$(GEN_MAN) $(ARCH_MAN) $(SYS_MAN) diff --git a/docs/man3/libpfm_perf_event_raw.3 b/docs/man3/libpfm_perf_event_raw.3 new file mode 100644 index 0000000..2f8b535 --- /dev/null +++ b/docs/man3/libpfm_perf_event_raw.3 @@ -0,0 +1,76 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_perf_event_raw - support for perf_events raw events syntax +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: perf_raw +.B PMU desc: Raw perf_events event syntax +.sp +.SH DESCRIPTION +The library supports a pseudo PMU model to allow raw encodings of PMU events +for the Linux perf_events kernel interface. + +With this PMU, it is possible to provide the raw hexadecimal encoding of any +hardware event for any PMU models. The raw encoding is passed as is to the +kernel. All events are encoded as \fBPERF_TYPE_RAW\fR. As such, perf_events +generic events, such as cycles, instructions, cannot be encoded by this +PMU. + +The syntax is very simple: rX. X is the hexadecimal 64-bit value for the event. +It may include event filters on some PMU models. The hexadecimal number is passed +without the 0x prefix, e.g., r01c4. + +The library's standard perf_events attributes are supported oby this PMU model. +They are separated with colons as is customary with the library. + +.SH MODIFIERS +The following modifiers are supported by this PMU model: +.TP +.B u +Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B h +Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. +This is a boolean modifier +.TP +.B mg +Measure guest execution only. This is a boolean modifier +.TP +.B mh +Measure host execution only. This is a boolean modifier +.TP +.B period +Specify the the sampling period value. Value can be expressed in decimal or hexadecimal. +Value is 64-bit wide. This option is mutually exclusive with +\fBfreq\fR. The period is expressed in the unit of the event. There is no default value. +.TP +.B freq +Specify the the sampling frequency value. Value can be expressed in decimal or hexadecimal. +Value is 64-bit wide. This options is mutually exclusive with +\fBperiod\fR. The value is expressed in Hertz. For instance, freq=100, means that the +event should be sampled 100 times per second on average. There is no default value. +.TP +.B excl +The associated event is the only event measured on the PMU. This applies only to hardware +events. This atrtribute requires admin privileges. Default is off. +.TP +.B precise +Enables precise sampling mode. This option is only valid on sampling events. This is an +integer value. It can have the following values: 1 enable precise sampling, 2 enable precise +sampling and eliminate skid. Not all events necessarily support precise mode, this is dependent +on the underlying PMU. Eliminating skid is a best effort feature. It may not work for all +samples. + + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP commit 921eb7acc4082f8dd0e7efe27918ee666e2b291d Author: Stephane Eranian Date: Mon Jan 27 02:25:00 2014 -0800 add Intel IvyBridge-EP (IvyTown) uncore PMU support This patch adds support for Intel IvyBridge-EP (IvyTown) uncore PMUs. That includes the following PMUs: - CBOX - UBOX - PCU - HA - IMC - IRP - R3QPI - R2PCIE - QPI This event table are based on the following documentation: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/xeon-e5-2600-v2-uncore-manual.pdf Signed-off-by: Stephane Eranian diff --git a/docs/Makefile b/docs/Makefile index 330c26b..ecf6769 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -58,6 +58,15 @@ ARCH_MAN=libpfm_intel_core.3 \ libpfm_intel_snbep_unc_ubo.3 \ libpfm_intel_snbep_unc_r2pcie.3 \ libpfm_intel_snbep_unc_r3qpi.3 \ + libpfm_intel_ivbep_unc_cbo.3 \ + libpfm_intel_ivbep_unc_ha.3 \ + libpfm_intel_ivbep_unc_imc.3 \ + libpfm_intel_ivbep_unc_pcu.3 \ + libpfm_intel_ivbep_unc_qpi.3 \ + libpfm_intel_ivbep_unc_ubo.3 \ + libpfm_intel_ivbep_unc_r2pcie.3 \ + libpfm_intel_ivbep_unc_r3qpi.3 \ + libpfm_intel_ivbep_unc_irp.3 \ libpfm_intel_knc.3 ifeq ($(CONFIG_PFMLIB_ARCH_I386),y) diff --git a/docs/man3/libpfm_intel_ivbep_unc_cbo.3 b/docs/man3/libpfm_intel_ivbep_unc_cbo.3 new file mode 100644 index 0000000..e417a46 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_cbo.3 @@ -0,0 +1,68 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_cbo - support for Intel Ivy Bridge-EP C-Box uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_cbo[0-7] +.B PMU desc: Intel Ivy Bridge-EP C-Box uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge C-Box (coherency engine) uncore PMU. +This PMU model only exists on Ivy Bridge model 45. There is one C-box +PMU per physical core. Therefore there are up to fifteen identical C-Box PMU instances +numbered from 0 to 14. On dual-socket systems, the number refers to the C-Box +PMU on the socket where the program runs. For instance, if running on CPU15, then +ivbep_unc_cbo0 refers to the C-Box for physical core 0 on socket 1. Conversely, +if running on CPU0, then the same ivbep_unc_cbo0 refers to the C-Box for physical +core 0 but on socket 0. + +Each C-Box PMU implements 4 generic counters and two filter registers used only +with certain events and umasks. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge C-Box uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of C-Box cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:255]. +.TP +.B nf +Node filter. Certain events, such as UNC_C_LLC_LOOKUP, UNC_C_LLC_VICTIMS, provide a \fBNID\fR umask. +Sometimes the \fBNID\fR is combined with other filtering capabilities, such as opcodes. +The node filter is an 8-bit max bitmask. A node corresponds to a processor +socket. The legal values therefore depdend on the underlying hardware configuration. For +dual-socket systems, the bitmask has two valid bits [0:1]. +.TP +.B cf +Core Filter. This is a 3-bit filter which is used to filter based on phyiscal core origin +of the C-Box request. Possible values are 0-7. If the filter is not specified, then no +filtering takes place. +.TP +.B tf +Thread Filter. This is a 1-bit filter which is used to filter C-Box requests based on logical +processor (hyper-thread) identification. Possibles values are 0-1. If the filter is not +specified, then no filtering takes place. + +.SH Opcode filtering + +Certain events, such as UNC_C_TOR_INSERTS supports opcode matching on the C-BOX transaction +type. To use this feature, first an opcode matching umask must be selected, e.g., MISS_OPCODE. +Second, the opcode to match on must be selected via a second umasks amongs the OPC_* umasks. +For instance, UNC_C_TOR_INSERTS:OPCODE:OPC_RFO, counts the number of TOR insertions for RFO +transactions. + +Opcode matching may be combined with node filtering with certain umasks. In general, the +filtering support is encoded into the umask name, e.g., NID_OPCODE supports both +node and opcode filtering. For instance, UNC_C_TOR_INSERTS:NID_OPCODE:OPC_RFO:nf=1. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_ha.3 b/docs/man3/libpfm_intel_ivbep_unc_ha.3 new file mode 100644 index 0000000..0d4d4e2 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_ha.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_ha - support for Intel Ivy Bridge-EP Home Agent (HA) uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_ha0, ivbep_unc_ha1 +.B PMU desc: Intel Ivy Bridge-EP HA uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge Home Agent (HA) uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge HA uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of HA cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:255]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_imc.3 b/docs/man3/libpfm_intel_ivbep_unc_imc.3 new file mode 100644 index 0000000..0050bcb --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_imc.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_imc - support for Intel Ivy Bridge-EP Integrated Memory Controller (IMC) uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_imc[0-7] +.B PMU desc: Intel Ivy Bridge-EP IMC uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge Integrated Memory Controller (IMC) uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge C-Box uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of IMC cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:255]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_irp.3 b/docs/man3/libpfm_intel_ivbep_unc_irp.3 new file mode 100644 index 0000000..7f4f8b0 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_irp.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_irp - support for Intel Ivy Bridge-EP IRP uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_irp +.B PMU desc: Intel Ivy Bridge-EP IRP uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge IRP uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:255]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_pcu.3 b/docs/man3/libpfm_intel_ivbep_unc_pcu.3 new file mode 100644 index 0000000..65ec6a3 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_pcu.3 @@ -0,0 +1,44 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_pcu - support for Intel Ivy Bridge-EP Power Controller Unit (PCU) uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_pcu +.B PMU desc: Intel Ivy Bridge-EP PCU uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge Power Controller Unit uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge C-Box uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of HA cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:15]. +.TP +.B ff +Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. +The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be +multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +counted. + +.SH Frequency band filtering + +There are 3 events which support frequency band filtering, namely, UNC_P_FREQ_BAND0_CYCLES, UNC_P_FREQ_BAND1_CYCLES, +UNC_P_FREQ_BAND2_CYCLES, UNC_P_FREQ_BAND3_CYCLES. The frequency filter (available via the ff modifier) is stored into +a PMU shared register which hold all 4 possible frequency bands, one per event. However, the library generate the +encoding for each event individually because it processes events one at a time. The caller or the underlying kernel +interface may have to merge the band filter settings to program the filter register properly. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_qpi.3 b/docs/man3/libpfm_intel_ivbep_unc_qpi.3 new file mode 100644 index 0000000..2b33f47 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_qpi.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_qpi - support for Intel Ivy Bridge-EP QPI uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_qpi0, ivbep_unc_qpi1 +.B PMU desc: Intel Ivy Bridge-EP QPI uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge Power QPI uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge QPI uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of QPI cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:255]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_r2pcie.3 b/docs/man3/libpfm_intel_ivbep_unc_r2pcie.3 new file mode 100644 index 0000000..196e46e --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_r2pcie.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_r2pcie - support for Intel Ivy Bridge-EP R2 PCIe uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_r2pcie +.B PMU desc: Intel Ivy Bridge-EP R2 PCIe uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge R2 PCIe uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge R2PCIe uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of R2PCIe cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:15]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_r3qpi.3 b/docs/man3/libpfm_intel_ivbep_unc_r3qpi.3 new file mode 100644 index 0000000..1f7f48b --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_r3qpi.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_r3qpi - support for Intel Ivy Bridge-EP R3QPI uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_r3qpi0, ivbep_unc_r3qpi1, ivbep_unc_r3qpi2 +.B PMU desc: Intel Ivy Bridge-EP R3QPI uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge R3QPI uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge R3PQI uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of R3QPI cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:15]. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/docs/man3/libpfm_intel_ivbep_unc_ubo.3 b/docs/man3/libpfm_intel_ivbep_unc_ubo.3 new file mode 100644 index 0000000..2c4a6c1 --- /dev/null +++ b/docs/man3/libpfm_intel_ivbep_unc_ubo.3 @@ -0,0 +1,54 @@ +.TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" +.SH NAME +libpfm_intel_ivbep_unc_ubo - support for Intel Ivy Bridge-EP U-Box uncore PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: ivbep_unc_ubo +.B PMU desc: Intel Ivy Bridge-EP U-Box uncore PMU +.sp +.SH DESCRIPTION +The library supports the Intel Ivy Bridge system configuration unit (U-Box) uncore PMU. +This PMU model only exists on Ivy Bridge model 62. + +.SH MODIFIERS +The following modifiers are supported on Intel Ivy Bridge U-Box uncore PMU: +.TP +.B e +Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. +.TP +.B t +Set the threshold value. When set to a non-zero value, the counter counts the number +of HA cycles in which the number of occurrences of the event is greater or equal to +the threshold. This is an integer modifier with values in the range [0:15]. +.TP +.B oi +Invert the meaning of the occupancy event POWER_STATE_OCCUPANCY. The counter will now count PCU cycles in which the +event is \fBnot\fR occurring. This is a boolean modifier +.TP +.B oe +Enable edge detection for the occupancy event POWER_STATE_OCCUPANCY. The event now counts only when there is a state +transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold +modifier (t) with a value greater or equal to one. This is a boolean modifier. + +.TP +.B ff +Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. +The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be +multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +counted. + +.SH Frequency band filtering + +There are 3 events which support frequency band filtering, namely, UNC_P_FREQ_BAND0_CYCLES, UNC_P_FREQ_BAND1_CYCLES, +UNC_P_FREQ_BAND2_CYCLES, UNC_P_FREQ_BAND3_CYCLES. The frequency filter (available via the ff modifier) is stored into +a PMU shared register which hold all 4 possible frequency bands, one per event. However, the library generate the +encoding for each event individually because it processes events one at a time. The caller or the underlying kernel +interface may have to merge the band filter settings to program the filter register properly. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 76bca1b..ccd3211 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -199,6 +199,42 @@ typedef enum { PFM_PMU_PERF_EVENT_RAW, /* perf_events RAW event syntax */ + PFM_PMU_INTEL_IVBEP_UNC_CB0, /* Intel IvyBridge-EP C-Box core 0 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB1, /* Intel IvyBridge-EP C-Box core 1 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB2, /* Intel IvyBridge-EP C-Box core 2 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB3, /* Intel IvyBridge-EP C-Box core 3 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB4, /* Intel IvyBridge-EP C-Box core 4 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB5, /* Intel IvyBridge-EP C-Box core 5 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB6, /* Intel IvyBridge-EP C-Box core 6 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB7, /* Intel IvyBridge-EP C-Box core 7 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB8, /* Intel IvyBridge-EP C-Box core 8 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB9, /* Intel IvyBridge-EP C-Box core 9 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB10, /* Intel IvyBridge-EP C-Box core 10 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB11, /* Intel IvyBridge-EP C-Box core 11 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB12, /* Intel IvyBridge-EP C-Box core 12 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB13, /* Intel IvyBridge-EP C-Box core 13 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_CB14, /* Intel IvyBridge-EP C-Box core 14 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_HA0, /* Intel IvyBridge-EP HA 0 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_HA1, /* Intel IvyBridge-EP HA 1 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC0, /* Intel IvyBridge-EP IMC socket 0 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC1, /* Intel IvyBridge-EP IMC socket 1 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC2, /* Intel IvyBridge-EP IMC socket 2 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC3, /* Intel IvyBridge-EP IMC socket 3 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC4, /* Intel IvyBridge-EP IMC socket 4 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC5, /* Intel IvyBridge-EP IMC socket 5 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC6, /* Intel IvyBridge-EP IMC socket 6 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IMC7, /* Intel IvyBridge-EP IMC socket 7 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_PCU, /* Intel IvyBridge-EP PCU uncore */ + PFM_PMU_INTEL_IVBEP_UNC_QPI0, /* Intel IvyBridge-EP QPI link 0 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_QPI1, /* Intel IvyBridge-EP QPI link 1 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_QPI2, /* Intel IvyBridge-EP QPI link 2 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_UBOX, /* Intel IvyBridge-EP U-Box uncore */ + PFM_PMU_INTEL_IVBEP_UNC_R2PCIE, /* Intel IvyBridge-EP R2PCIe uncore */ + PFM_PMU_INTEL_IVBEP_UNC_R3QPI0, /* Intel IvyBridge-EP R3QPI 0 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_R3QPI1, /* Intel IvyBridge-EP R3QPI 1 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_R3QPI2, /* Intel IvyBridge-EP R3QPI 2 uncore */ + PFM_PMU_INTEL_IVBEP_UNC_IRP, /* Intel IvyBridge-EP IRP uncore */ + /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/lib/Makefile b/lib/Makefile index e4530bf..531167e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,6 +70,15 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ pfmlib_intel_snbep_unc_ubo.c \ pfmlib_intel_snbep_unc_r2pcie.c \ pfmlib_intel_snbep_unc_r3qpi.c \ + pfmlib_intel_ivbep_unc_cbo.c \ + pfmlib_intel_ivbep_unc_ha.c \ + pfmlib_intel_ivbep_unc_imc.c \ + pfmlib_intel_ivbep_unc_pcu.c \ + pfmlib_intel_ivbep_unc_qpi.c \ + pfmlib_intel_ivbep_unc_ubo.c \ + pfmlib_intel_ivbep_unc_r2pcie.c \ + pfmlib_intel_ivbep_unc_r3qpi.c \ + pfmlib_intel_ivbep_unc_irp.c \ pfmlib_intel_knc.c \ pfmlib_intel_slm.c \ pfmlib_intel_netburst.c \ @@ -219,6 +228,15 @@ INC_X86= pfmlib_intel_x86_priv.h \ events/intel_snbep_unc_r2pcie_events.h \ events/intel_snbep_unc_r3qpi_events.h \ events/intel_knc_events.h \ + events/intel_ivbep_unc_cbo_events.h \ + events/intel_ivbep_unc_ha_events.h \ + events/intel_ivbep_unc_imc_events.h \ + events/intel_ivbep_unc_pcu_events.h \ + events/intel_ivbep_unc_qpi_events.h \ + events/intel_ivbep_unc_ubo_events.h \ + events/intel_ivbep_unc_r2pcie_events.h \ + events/intel_ivbep_unc_r3qpi_events.h \ + events/intel_ivbep_unc_irp_events.h \ events/intel_slm_events.h INC_MIPS=events/mips_74k_events.h events/mips_74k_events.h diff --git a/lib/events/intel_ivbep_unc_cbo_events.h b/lib/events/intel_ivbep_unc_cbo_events.h new file mode 100644 index 0000000..ee8959f --- /dev/null +++ b/lib/events/intel_ivbep_unc_cbo_events.h @@ -0,0 +1,981 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * PMU: ivbep_unc_cbo (Intel IvyBridge-EP C-Box uncore PMU) + */ + +#define CBO_FILT_MESIF(a, b, c, d) \ + { .uname = "STATE_"#a,\ + .udesc = #b" cacheline state",\ + .ufilters[0] = 1ULL << (17 + (c)),\ + .grpid = d, \ + } + +#define CBO_FILT_MESIFS(d) \ + CBO_FILT_MESIF(I, Invalid, 0, d), \ + CBO_FILT_MESIF(S, Shared, 1, d), \ + CBO_FILT_MESIF(E, Exclusive, 2, d), \ + CBO_FILT_MESIF(M, Modified, 3, d), \ + CBO_FILT_MESIF(F, Forward, 4, d), \ + { .uname = "STATE_MESIF",\ + .udesc = "Any cache line state",\ + .ufilters[0] = 0x3fULL << 17,\ + .grpid = d, \ + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, \ + } + +#define CBO_FILT_OPC(d) \ + { .uname = "OPC_RFO",\ + .udesc = "Demand data RFO (combine with any OPCODE umask)",\ + .ufilters[1] = 0x180ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_CRD",\ + .udesc = "Demand code read (combine with any OPCODE umask)",\ + .ufilters[1] = 0x181ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_DRD",\ + .udesc = "Demand data read (combine with any OPCODE umask)",\ + .ufilters[1] = 0x182ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PRD",\ + .udesc = "Partial reads (UC) (combine with any OPCODE umask)",\ + .ufilters[1] = 0x187ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_WCILF",\ + .udesc = "Full Stream store (combine with any OPCODE umask)", \ + .ufilters[1] = 0x18cULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_WCIL",\ + .udesc = "Partial Stream store (combine with any OPCODE umask)", \ + .ufilters[1] = 0x18dULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PF_RFO",\ + .udesc = "Prefetch RFO into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x190ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PF_CODE",\ + .udesc = "Prefetch code into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x191ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PF_DATA",\ + .udesc = "Prefetch data into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x192ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCIWILF",\ + .udesc = "PCIe write (non-allocating) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x194ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCIPRD",\ + .udesc = "PCIe UC read (combine with any OPCODE umask)", \ + .ufilters[1] = 0x195ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCIITOM",\ + .udesc = "PCIe write (allocating) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x19cULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCIRDCUR",\ + .udesc = "PCIe read current (combine with any OPCODE umask)", \ + .ufilters[1] = 0x19eULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_WBMTOI",\ + .udesc = "Request writeback modified invalidate line (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1c4ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_WBMTOE",\ + .udesc = "Request writeback modified set to exclusive (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1c5ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_ITOM",\ + .udesc = "Request invalidate line (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1c8ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCINSRD",\ + .udesc = "PCIe non-snoop read (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1e4ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCINSWR",\ + .udesc = "PCIe non-snoop write (partial) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1e5ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + }, \ + { .uname = "OPC_PCINSWRF",\ + .udesc = "PCIe non-snoop write (full) (combine with any OPCODE umask)", \ + .ufilters[1] = 0x1e6ULL << 20, \ + .uflags = INTEL_X86_NCOMBO, \ + .grpid = d, \ + } + +static const intel_x86_umask_t ivbep_unc_c_llc_lookup[]={ + { .uname = "DATA_READ", + .udesc = "Data read requests", + .grpid = 0, + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WRITE", + .udesc = "Write requests. Includes all write transactions (cached, uncached)", + .grpid = 0, + .ucode = 0x500, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REMOTE_SNOOP", + .udesc = "External snoop request", + .grpid = 0, + .ucode = 0x900, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ANY", + .udesc = "Any request", + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + .ucode = 0x1100, + }, + { .uname = "NID", + .udesc = "Match a given RTID destination NID", + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .grpid = 1, + .ucode = 0x4100, + .uflags = INTEL_X86_GRP_DFL_NONE + }, + CBO_FILT_MESIFS(2), +}; + +static const intel_x86_umask_t ivbep_unc_c_llc_victims[]={ + { .uname = "STATE_M", + .udesc = "Lines in M state", + .ucode = 0x100, + .grpid = 0, + }, + { .uname = "STATE_E", + .udesc = "Lines in E state", + .ucode = 0x200, + .grpid = 0, + }, + { .uname = "STATE_S", + .udesc = "Lines in S state", + .ucode = 0x400, + .grpid = 0, + }, + { .uname = "MISS", + .udesc = "TBD", + .ucode = 0x800, + .grpid = 0, + }, + { .uname = "NID", + .udesc = "Victimized Lines matching the NID filter", + .ucode = 0x4000, + .uflags = INTEL_X86_GRP_DFL_NONE, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .grpid = 1, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_ring_ad_used[]={ + { .uname = "UP_VR0_EVEN", + .udesc = "Up and Even ring polarity filter on virutal ring 0", + .ucode = 0x100, + }, + { .uname = "UP_VR0_ODD", + .udesc = "Up and odd ring polarity filter on virtual ring 0", + .ucode = 0x200, + }, + { .uname = "DOWN_VR0_EVEN", + .udesc = "Down and even ring polarity filter on virtual ring 0", + .ucode = 0x400, + }, + { .uname = "DOWN_VR0_ODD", + .udesc = "Down and odd ring polarity filter on virtual ring 0", + .ucode = 0x800, + }, + { .uname = "UP_VR1_EVEN", + .udesc = "Up and Even ring polarity filter on virutal ring 1", + .ucode = 0x1000, + }, + { .uname = "UP_VR1_ODD", + .udesc = "Up and odd ring polarity filter on virtual ring 1", + .ucode = 0x2000, + }, + { .uname = "DOWN_VR1_EVEN", + .udesc = "Down and even ring polarity filter on virtual ring 1", + .ucode = 0x4000, + }, + { .uname = "DOWN_VR1_ODD", + .udesc = "Down and odd ring polarity filter on virtual ring 1", + .ucode = 0x8000, + }, + { .uname = "UP", + .udesc = "Up on any virtual ring", + .ucode = 0x3300, + }, + { .uname = "DOWN", + .udesc = "Down any virtual ring", + .ucode = 0xcc00, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_ring_bounces[]={ + { .uname = "AD_IRQ", + .udesc = "TBD", + .ucode = 0x200, + }, + { .uname = "AK", + .udesc = "Acknowledgments to core", + .ucode = 0x400, + }, + + { .uname = "BL", + .udesc = "Data responses to core", + .ucode = 0x800, + }, + { .uname = "IV", + .udesc = "Snoops of processor cache", + .ucode = 0x1000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_ring_iv_used[]={ + { .uname = "ANY", + .udesc = "Any filter", + .ucode = 0xf00, + .uflags = INTEL_X86_DFL, + }, + { .uname = "UP", + .udesc = "Filter on any up polarity", + .ucode = 0x3300, + }, + { .uname = "DOWN", + .udesc = "Filter on any down polarity", + .ucode = 0xcc00, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_ext_starved[]={ + { .uname = "IRQ", + .udesc = "Irq externally starved, therefore blocking the IPQ", + .ucode = 0x100, + }, + { .uname = "IPQ", + .udesc = "IPQ externally starved, therefore blocking the IRQ", + .ucode = 0x200, + }, + { .uname = "PRQ", + .udesc = "IRQ is blocking the ingress queue and causing starvation", + .ucode = 0x400, + }, + { .uname = "ISMQ_BIDS", + .udesc = "Number of time the ISMQ bids", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_inserts[]={ + { .uname = "IPQ", + .udesc = "IPQ", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IRQ", + .udesc = "IRQ", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IRQ_REJECTED", + .udesc = "IRQ rejected", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "VFIFO", + .udesc = "Counts the number of allocated into the IRQ ordering FIFO", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_ipq_retry[]={ + { .uname = "ADDR_CONFLICT", + .udesc = "Address conflict", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ANY", + .udesc = "Any Reject", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "FULL", + .udesc = "No Egress credits", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "QPI_CREDITS", + .udesc = "No QPI credits", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_irq_retry[]={ + { .uname = "ADDR_CONFLICT", + .udesc = "Address conflict", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ANY", + .udesc = "Any reject", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "FULL", + .udesc = "No Egress credits", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "QPI_CREDITS", + .udesc = "No QPI credits", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RTID", + .udesc = "No RTIDs", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IIO_CREDITS", + .udesc = "No IIO Credits", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_ismq_retry[]={ + { .uname = "ANY", + .udesc = "Any reject", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "FULL", + .udesc = "No Egress credits", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IIO_CREDITS", + .udesc = "No IIO credits", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "QPI_CREDITS", + .udesc = "NO QPI credits", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RTID", + .udesc = "No RTIDs", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WB_CREDITS", + .udesc = "No WB credits", + .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_rxr_occupancy[]={ + { .uname = "IPQ", + .udesc = "IPQ", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IRQ", + .udesc = "IRQ", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IRQ_REJECTED", + .udesc = "IRQ rejected", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "VFIFO", + .udesc = "Number of used entries in the IRQ ordering FIFO in each cycle", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_tor_inserts[]={ + { .uname = "OPCODE", + .udesc = "Number of transactions inserted into the TOR that match an opcode (must provide opc_* umask)", + .ucode = 0x100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_OPCODE", + .udesc = "Number of miss transactions inserted into the TOR that match an opcode (must provide opc_* umask)", + .ucode = 0x300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "EVICTION", + .udesc = "Number of Evictions transactions inserted into TOR", + .ucode = 0x400, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "ALL", + .udesc = "Number of transactions inserted in TOR", + .ucode = 0x800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "WB", + .udesc = "Number of write transactions inserted into the TOR", + .ucode = 0x1000, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "LOCAL_OPCODE", + .udesc = "Number of opcode-matched transactions inserted into the TOR that are satisfied by locally homed memory", + .ucode = 0x2100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_LOCAL_OPCODE", + .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by locally homed memory", + .ucode = 0x2300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "LOCAL", + .udesc = "Number of transactions inserted into the TOR that are satisfied by locally homed memory", + .ucode = 0x2800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "MISS_LOCAL", + .udesc = "Number of miss transactions inserted into the TOR that are satisfied by locally homed memory", + .ucode = 0x2a00, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_OPCODE", + .udesc = "Number of transactions inserted into the TOR that match a NID and opcode (must provide opc_* umask and nf=X modifier)", + .ucode = 0x4100, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NID_MISS_OPCODE", + .udesc = "Number of NID and opcode matched miss transactions inserted into the TOR (must provide opc_* umask and nf=X modifier)", + .ucode = 0x4300, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NID_EVICTION", + .udesc = "Number of NID-matched eviction transactions inserted into the TOR (must provide nf=X modifier)", + .ucode = 0x4400, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_ALL", + .udesc = "Number of NID-matched transactions inserted into the TOR", + .ucode = 0x4800, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_MISS_ALL", + .udesc = "Number of NID-matched miss transactions that were inserted into the TOR (must provide nf=X modifier)", + .ucode = 0x4a00, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_WB", + .udesc = "Number of NID-matched write back transactions inserted into the TOR (must provide nf=X modifier)", + .ucode = 0x5000, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "REMOTE_OPCODE", + .udesc = "Number of opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", + .ucode = 0x8100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_REMOTE_OPCODE", + .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", + .ucode = 0x8300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REMOTE", + .udesc = "Number of transactions inserted into the TOR that are satisfied by remote caches or memory", + .ucode = 0x8800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "MISS_REMOTE", + .udesc = "Number of miss transactions inserted into the TOR that are satisfied by remote caches or memory", + .ucode = 0x8a00, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + CBO_FILT_OPC(1) +}; + +static const intel_x86_umask_t ivbep_unc_c_tor_occupancy[]={ + { .uname = "OPCODE", + .udesc = "Number of TOR entries that match an opcode (must provide opc_* umask)", + .ucode = 0x100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_OPCODE", + .udesc = "Number of TOR entries that match a NID and an opcode (must provide opc_* umask)", + .ucode = 0x300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "EVICTION", + .udesc = "Number of outstanding eviction transactions in the TOR", + .ucode = 0x400, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "ALL", + .udesc = "All valid TOR entries", + .ucode = 0x800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "MISS_ALL", + .udesc = "Number of outstanding miss requests in the TOR", + .ucode = 0xa00, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "WB", + .udesc = "Number of write transactions in the TOR. Does not include RFO, but actual operations that contain data being sent from the core", + .ucode = 0x1000, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "LOCAL_OPCODE", + .udesc = "Number of opcode-matched transactions in the TOR that are satisfied by locally homed memory", + .ucode = 0x2100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_LOCAL_OPCODE", + .udesc = "Number of miss opcode-matched transactions in the TOR that are satisfied by locally homed memory", + .ucode = 0x2300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "LOCAL", + .udesc = "Number of transactions in the TOR that are satisfied by locally homed memory", + .ucode = 0x2800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "MISS_LOCAL", + .udesc = "Number of miss transactions in the TOR that are satisfied by locally homed memory", + .ucode = 0x2a00, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_OPCODE", + .udesc = "Number of NID-matched TOR entries that an opcode (must provide nf=X modifier and opc_* umask)", + .ucode = 0x4100, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NID_MISS_OPCODE", + .udesc = "Number of NID-matched outstanding miss requests in the TOR that an opcode (must provide nf=X modifier and opc_* umask)", + .ucode = 0x4300, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NID_EVICTION", + .udesc = "Number of NID-matched outstanding requests in the TOR (must provide a nf=X modifier)", + .ucode = 0x4400, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_ALL", + .udesc = "Number of NID-matched outstanding requests in the TOR (must provide nf=X modifier)", + .ucode = 0x4800, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_MISS_ALL", + .udesc = "Number of NID-matched outstanding miss requests in the TOR (must provide a nf=X modifier)", + .ucode = 0x4a00, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "NID_WB", + .udesc = "Number of NID-matched write transactions in the TOR (must provide a nf=X modifier)", + .ucode = 0x5000, + .grpid = 0, + .umodmsk_req = _SNBEP_UNC_ATTR_NF1, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "REMOTE_OPCODE", + .udesc = "Number of opcode-matched transactions in the TOR that are satisfied by remote caches or memory", + .ucode = 0x8100, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS_REMOTE_OPCODE", + .udesc = "Number of miss opcode-matched transactions in the TOR that are satisfied by remote caches or memory", + .ucode = 0x8300, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REMOTE", + .udesc = "Number of transactions in the TOR that are satisfied by remote caches or memory", + .ucode = 0x8800, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + { .uname = "MISS_REMOTE", + .udesc = "Number of miss transactions inserted into the TOR that are satisfied by remote caches or memory", + .ucode = 0x8a00, + .grpid = 0, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, + }, + CBO_FILT_OPC(1) +}; + +static const intel_x86_umask_t ivbep_unc_c_txr_inserts[]={ + { .uname = "AD_CACHE", + .udesc = "Counts the number of ring transactions from Cachebo to AD ring", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "AK_CACHE", + .udesc = "Counts the number of ring transactions from Cachebo to AK ring", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "BL_CACHE", + .udesc = "Counts the number of ring transactions from Cachebo to BL ring", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IV_CACHE", + .udesc = "Counts the number of ring transactions from Cachebo ton IV ring", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "AD_CORE", + .udesc = "Counts the number of ring transactions from Corebo to AD ring", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "AK_CORE", + .udesc = "Counts the number of ring transactions from Corebo to AK ring", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "BL_CORE", + .udesc = "Counts the number of ring transactions from Corebo to BL ring", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_c_txr_ads_used[]={ + { .uname = "AD", + .udesc = "onto AD ring", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "AK", + .udesc = "Onto AK ring", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "BL", + .udesc = "Onto BL ring", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + } +}; + +static const intel_x86_umask_t ivbep_unc_c_misc[]={ + { .uname = "RSPI_WAS_FSE", + .udesc = "Counts the number of times when a SNoop hit in FSE states and triggered a silent eviction. This is useful because this information is lost in the PRE encodings", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WC_ALIASING", + .udesc = "Counts the number of times a USWC write (WCIL(F)) transaction hits in the LLC in M state, triggering a WBMTOI followed by the USWC write. This occurs when there is WC aliasing", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "STARTED", + .udesc = "TBD", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RFO_HIT_S", + .udesc = "Counts the number of times that an RFO hits in S state. This is usfeul for determining if it might be good for a workload to use RSPIWB instead of RSPSWB", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_c_pe[]={ + { .name = "UNC_C_CLOCKTICKS", + .desc = "C-box Uncore clockticks", + .modmsk = 0x0, + .cntmsk = 0xf, + .code = 0x00, + .flags = INTEL_X86_FIXED, + }, + { .name = "UNC_C_COUNTER0_OCCUPANCY", + .desc = "Counter 0 occupancy. Counts the occupancy related information by filtering CB0 occupancy count captured in counter 0.", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0xe, + .code = 0x1f, + }, + { .name = "UNC_C_LLC_LOOKUP", + .desc = "Cache lookups", + .modmsk = IVBEP_UNC_CBO_NID_ATTRS, + .cntmsk = 0x3, + .code = 0x34, + .ngrp = 3, + .flags = INTEL_X86_NO_AUTOENCODE, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_llc_lookup), + .umasks = ivbep_unc_c_llc_lookup, + }, + { .name = "UNC_C_LLC_VICTIMS", + .desc = "Lines victimized", + .modmsk = IVBEP_UNC_CBO_NID_ATTRS, + .cntmsk = 0x3, + .code = 0x37, + .flags = INTEL_X86_NO_AUTOENCODE, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_llc_victims), + .ngrp = 2, + .umasks = ivbep_unc_c_llc_victims, + }, + { .name = "UNC_C_MISC", + .desc = "Miscelleanous C-Box events", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0x3, + .code = 0x39, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_misc), + .ngrp = 1, + .umasks = ivbep_unc_c_misc, + }, + { .name = "UNC_C_RING_AD_USED", + .desc = "Address ring in use. Counts number of cycles ring is being used at this ring stop", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0xc, + .code = 0x1b, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_ring_ad_used), + .ngrp = 1, + .umasks = ivbep_unc_c_ring_ad_used, + }, + { .name = "UNC_C_RING_AK_USED", + .desc = "Acknowledgement ring in use. Counts number of cycles ring is being used at this ring stop", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0xc, + .code = 0x1c, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ + .ngrp = 1, + .umasks = ivbep_unc_c_ring_ad_used, + }, + { .name = "UNC_C_RING_BL_USED", + .desc = "Bus or Data ring in use. Counts number of cycles ring is being used at this ring stop", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0xc, + .code = 0x1d, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ + .ngrp = 1, + .umasks = ivbep_unc_c_ring_ad_used, + }, + { .name = "UNC_C_RING_BOUNCES", + .desc = "Number of LLC responses that bounced in the ring", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0x3, + .code = 0x05, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_ring_bounces), + .ngrp = 1, + .umasks = ivbep_unc_c_ring_bounces, + }, + { .name = "UNC_C_RING_IV_USED", + .desc = "Invalidate ring in use. Counts number of cycles ring is being used at this ring stop", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0xc, + .code = 0x1e, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_ring_iv_used), + .ngrp = 1, + .umasks = ivbep_unc_c_ring_iv_used, + }, + { .name = "UNC_C_RING_SRC_THRTL", + .desc = "TDB", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0x3, + .code = 0x07, + }, + { .name = "UNC_C_RXR_EXT_STARVED", + .desc = "Ingress arbiter blocking cycles", + .modmsk = IVBEP_UNC_CBO_ATTRS, + .cntmsk = 0x3, + .code = 0x12, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_ext_starved), + .ngrp = 1, + .umasks = ivbep_unc_c_rxr_ext_starved, + }, + { .name = "UNC_C_RXR_INSERTS", + .desc = "Ingress Allocations", + .code = 0x13, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_inserts), + .umasks = ivbep_unc_c_rxr_inserts + }, + { .name = "UNC_C_RXR_IPQ_RETRY", + .desc = "Probe Queue Retries", + .code = 0x31, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_ipq_retry), + .umasks = ivbep_unc_c_rxr_ipq_retry + }, + { .name = "UNC_C_RXR_IRQ_RETRY", + .desc = "Ingress Request Queue Rejects", + .code = 0x32, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_irq_retry), + .umasks = ivbep_unc_c_rxr_irq_retry + }, + { .name = "UNC_C_RXR_ISMQ_RETRY", + .desc = "ISMQ Retries", + .code = 0x33, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_ismq_retry), + .umasks = ivbep_unc_c_rxr_ismq_retry + }, + { .name = "UNC_C_RXR_OCCUPANCY", + .desc = "Ingress Occupancy", + .code = 0x11, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_rxr_inserts), + .umasks = ivbep_unc_c_rxr_inserts, /* identical to ivbep_unc_c_rxr_inserts */ + }, + { .name = "UNC_C_TOR_INSERTS", + .desc = "TOR Inserts", + .code = 0x35, + .cntmsk = 0x3, + .ngrp = 2, + .modmsk = IVBEP_UNC_CBO_NID_ATTRS, + .flags = INTEL_X86_NO_AUTOENCODE, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_tor_inserts), + .umasks = ivbep_unc_c_tor_inserts + }, + { .name = "UNC_C_TOR_OCCUPANCY", + .desc = "TOR Occupancy", + .code = 0x36, + .cntmsk = 0x1, + .ngrp = 2, + .modmsk = IVBEP_UNC_CBO_NID_ATTRS, + .flags = INTEL_X86_NO_AUTOENCODE, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_tor_occupancy), + .umasks = ivbep_unc_c_tor_occupancy + }, + { .name = "UNC_C_TXR_ADS_USED", + .desc = "Egress events", + .code = 0x04, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_txr_ads_used), + .umasks = ivbep_unc_c_txr_ads_used + }, + { .name = "UNC_C_TXR_INSERTS", + .desc = "Egress allocations", + .code = 0x02, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_CBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_c_txr_inserts), + .umasks = ivbep_unc_c_txr_inserts + }, +}; diff --git a/lib/events/intel_ivbep_unc_ha_events.h b/lib/events/intel_ivbep_unc_ha_events.h new file mode 100644 index 0000000..58f0e06 --- /dev/null +++ b/lib/events/intel_ivbep_unc_ha_events.h @@ -0,0 +1,925 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: ivbep_unc_ha (Intel IvyBridge-EP HA uncore PMU) + */ + +static const intel_x86_umask_t ivbep_unc_h_conflict_cycles[]={ + { .uname = "CONFLICT", + .udesc = "Number of cycles that we are handling conflicts", + .ucode = 0x200, + }, + { .uname = "LAST", + .udesc = "Count every last conflictor in conflict chain. Can be used to compute average conflict chain length", + .ucode = 0x400, + }, + { .uname = "CMP_FWDS", + .udesc = "Count the number of cmp_fwd. This gives the number of late conflicts", + .ucode = 0x1000, + }, + { .uname = "ACKCNFLTS", + .udesc = "Count the number Acknflts", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_directory_lookup[]={ + { .uname = "NO_SNP", + .udesc = "Snoop not needed", + .ucode = 0x200, + }, + { .uname = "SNOOP", + .udesc = "SNooop needed", + .ucode = 0x100, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_bypass_imc[]={ + { .uname = "TAKEN", + .udesc = "Bypass taken", + .ucode = 0x200, + }, + { .uname = "NOT_TAKEN", + .udesc = "Bypass not taken", + .ucode = 0x100, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_directory_update[]={ + { .uname = "ANY", + .udesc = "Counts any directory update", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "CLEAR", + .udesc = "Directory clears", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SET", + .udesc = "Directory set", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_igr_no_credit_cycles[]={ + { .uname = "AD_QPI0", + .udesc = "AD to QPI link 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "AD_QPI1", + .udesc = "AD to QPI link 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "BL_QPI0", + .udesc = "BL to QPI link 0", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "BL_QPI1", + .udesc = "BL to QPI link 1", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_imc_writes[]={ + { .uname = "ALL", + .udesc = "Counts all writes", + .ucode = 0xf00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "FULL", + .udesc = "Counts full line non ISOCH", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FULL_ISOCH", + .udesc = "Counts ISOCH full line", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "PARTIAL", + .udesc = "Counts partial non-ISOCH", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "PARTIAL_ISOCH", + .udesc = "Counts ISOCH partial", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_imc_reads[]={ + { .uname = "NORMAL", + .udesc = "Normal priority", + .ucode = 0x100, + .uflags = INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_requests[]={ + { .uname = "READS", + .udesc = "Counts incoming read requests. Good proxy for LLC read misses, incl. RFOs", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_LOCAL", + .udesc = "Counts incoming read requests coming from local socket. Good proxy for LLC read misses, incl. RFOs from the local socket", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_REMOTE", + .udesc = "Counts incoming read requests coming from remote socket. Good proxy for LLC read misses, incl. RFOs from the remote socket", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WRITES", + .udesc = "Counts incoming writes", + .ucode = 0xc00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WRITES_LOCAL", + .udesc = "Counts incoming writes from local socket", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WRITES_REMOTE", + .udesc = "Counts incoming writes from remote socket", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "INVITOE_LOCAL", + .udesc = "Counts InvItoE coming from local socket", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "INVITOE_REMOTE", + .udesc = "Counts InvItoE coming from remote socket", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + } +}; + +static const intel_x86_umask_t ivbep_unc_h_rpq_cycles_no_reg_credits[]={ + { .uname = "CHN0", + .udesc = "Channel 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CHN1", + .udesc = "Channel 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CHN2", + .udesc = "channel 2", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CHN3", + .udesc = "Chanell 3", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_tad_requests_g0[]={ + { .uname = "REGION0", + .udesc = "Counts for TAD Region 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION1", + .udesc = "Counts for TAD Region 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION2", + .udesc = "Counts for TAD Region 2", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION3", + .udesc = "Counts for TAD Region 3", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION4", + .udesc = "Counts for TAD Region 4", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION5", + .udesc = "Counts for TAD Region 5", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION6", + .udesc = "Counts for TAD Region 6", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION7", + .udesc = "Counts for TAD Region 7", + .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_tad_requests_g1[]={ + { .uname = "REGION8", + .udesc = "Counts for TAD Region 8", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION9", + .udesc = "Counts for TAD Region 9", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION10", + .udesc = "Counts for TAD Region 10", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "REGION11", + .udesc = "Counts for TAD Region 11", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_snoop_resp[]={ + { .uname = "RSPI", + .udesc = "Filters for snoop responses of RspI. RspI is returned when the remote cache does not have the data or when the remote cache silently evicts data (e.g. RFO hit non-modified line)", + .ucode = 0x100, + }, + { .uname = "RSPS", + .udesc = "Filters for snoop responses of RspS. RspS is returned when the remote cache has the data but is not forwarding it. It is a way to let the requesting socket know that it cannot allocate the data in E-state", + .ucode = 0x200, + }, + { .uname = "RSPIFWD", + .udesc = "Filters for snoop responses of RspIFwd. RspIFwd is returned when the remote cache agent forwards data and the requesting agent is able to acquire the data in E or M state. This is commonly returned with RFO transacations. It can be either HitM or HitFE", + .ucode = 0x400, + }, + { .uname = "RSPSFWD", + .udesc = "Filters for snoop responses of RspSFwd. RspSFwd is returned when the remote cache agent forwards data but holds on to its current copy. This is common for data and code reads that hit in a remote socket in E or F state", + .ucode = 0x800, + }, + { .uname = "RSP_WB", + .udesc = "Filters for snoop responses of RspIWB or RspSWB. This is returned when a non-RFO requests hits in M-state. Data and code reads can return either RspIWB or RspSWB depending on how the system has been configured. InvItoE transactions will also return RspIWB because they must acquire ownership", + .ucode = 0x1000, + }, + { .uname = "RSP_FWD_WB", + .udesc = "Filters for snoop responses of RspxFwdxWB. This snoop response is only used in 4s systems. It is used when a snoop HITM in a remote caching agent and it directly forwards data to a requester and simultaneously returns data to the home to be written back to memory", + .ucode = 0x2000, + }, + { .uname = "RSPCNFLCT", + .udesc = "Filters for snoop responses of RspConflict. This is returned when a snoop finds an existing outstanding transaction in a remote caching agent when it CMAs that caching agent. This triggers the conflict resolution hardware. This covers both RspConflct and RspCnflctWBI", + .ucode = 0x4000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_ad[]={ + { .uname = "NDR", + .udesc = "Counts non-data responses", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SNP", + .udesc = "Counts outbound snoops send on the ring", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_ad_cycles_full[]={ + { .uname = "ALL", + .udesc = "Counts cycles full from both schedulers", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "SCHED0", + .udesc = "Counts cycles full from scheduler bank 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SCHED1", + .udesc = "Counts cycles full from scheduler bank 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_bl_occupancy[]={ + { .uname = "SCHED0", + .udesc = "Counts cycles full from scheduler bank 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SCHED1", + .udesc = "Counts cycles full from scheduler bank 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_ak_cycles_full[]={ + { .uname = "ALL", + .udesc = "Counts cycles from both schedulers", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "SCHED0", + .udesc = "Counts cycles from scheduler bank 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SCHED1", + .udesc = "Counts cycles from scheduler bank 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_bl[]={ + { .uname = "DRS_CACHE", + .udesc = "Counts data being sent to the cache", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DRS_CORE", + .udesc = "Counts data being sent directly to the requesting core", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DRS_QPI", + .udesc = "Counts data being sent to a remote socket over QPI", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_bl_cycles_full[]={ + { .uname = "ALL", + .udesc = "BL Egress Full", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SCHED0", + .udesc = "BL Egress Full", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SCHED1", + .udesc = "BL Egress Full", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +#if 0 +static const intel_x86_umask_t ivbep_unc_h_addr_opc_match[]={ + { .uname = "FILT", + .udesc = "Number of addr and opcode matches (opc via opc= or address via addr= modifiers)", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_ADDR, + }, +}; +#endif + +static const intel_x86_umask_t ivbep_unc_h_bt_occupancy[]={ + { .uname = "LOCAL", + .udesc = "Local", + .ucode = 0x100, + }, + { .uname = "REMOTE", + .udesc = "Remote", + .ucode = 0x200, + }, + { .uname = "READS_REMOTE", + .udesc = "Reads remote", + .ucode = 0x800, + }, + { .uname = "WRITES_LOCAL", + .udesc = "Writes local", + .ucode = 0x1000, + }, + { .uname = "WRITES_REMOTE", + .udesc = "Writes remote", + .ucode = 0x2000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_bt_to_ht_not_issued[]={ + { .uname = "INCOMING_SNP_HAZARD", + .udesc = "Number of cycles unable to issue from BT due to incoming snoop hazard", + .ucode = 0x200, + }, + { .uname = "INCOMING_BL_HAZARD", + .udesc = "Number of cycles unable to issue from BT due to incoming BL data hazard", + .ucode = 0x400, + } +}; + +static const intel_x86_umask_t ivbep_unc_h_osb[]={ + { .uname = "REMOTE", + .udesc = "Remote", + .ucode = 0x800, + }, + { .uname = "READS_LOCAL", + .udesc = "Local reads", + .ucode = 0x200, + }, + { .uname = "INVITOE_LOCAL", + .udesc = "Local InvItoE", + .ucode = 0x400, + } +}; +static const intel_x86_umask_t ivbep_unc_h_osb_edr[]={ + { .uname = "ALL", + .udesc = "All data returns", + .ucode = 0x100, + .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, + }, + { .uname = "READS_LOCAL_I", + .udesc = "Reads to local I", + .ucode = 0x200, + }, + { .uname = "READS_REMOTE_I", + .udesc = "Reads to remote I", + .ucode = 0x400, + }, + { .uname = "READS_LOCAL_S", + .udesc = "Reads to local S", + .ucode = 0x800, + }, + { .uname = "READS_REMOTE_S", + .udesc = "Reads to remote S", + .ucode = 01000, + } +}; + +static const intel_x86_umask_t ivbep_unc_h_ring_ad_used[]={ + { .uname = "CCW_VR0_EVEN", + .udesc = "Counter-clockwise and even ring polarity on virtual ring 0", + .ucode = 0x400, + }, + { .uname = "CCW_VR0_ODD", + .udesc = "Counter-clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x800, + }, + { .uname = "CW_VR0_EVEN", + .udesc = "Clockwise and even ring polarity on virtual ring 0", + .ucode = 0x100, + }, + { .uname = "CW_VR0_ODD", + .udesc = "Clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x200, + }, + { .uname = "CCW_VR1_EVEN", + .udesc = "Counter-clockwise and even ring polarity on virtual ring 1", + .ucode = 0x400, + }, + { .uname = "CCW_VR1_ODD", + .udesc = "Counter-clockwise and odd ring polarity on virtual ring 1", + .ucode = 0x800, + }, + { .uname = "CW_VR1_EVEN", + .udesc = "Clockwise and even ring polarity on virtual ring 1", + .ucode = 0x100, + }, + { .uname = "CW_VR1_ODD", + .udesc = "Clockwise and odd ring polarity on virtual ring 1", + .ucode = 0x200, + }, + { .uname = "CW", + .udesc = "Clockwise with any polarity on either virtual rings", + .ucode = 0x3300, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xcc00, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_snp_resp_recv_local[]={ + { .uname = "RSPI", + .udesc = "Filters for snoop responses of RspI. RspI is returned when the remote cache does not have the data or when the remote cache silently evicts data (e.g. RFO hit non-modified line)", + .ucode = 0x100, + }, + { .uname = "RSPS", + .udesc = "Filters for snoop responses of RspS. RspS is returned when the remote cache has the data but is not forwarding it. It is a way to let the requesting socket know that it cannot allocate the data in E-state", + .ucode = 0x200, + }, + { .uname = "RSPIFWD", + .udesc = "Filters for snoop responses of RspIFwd. RspIFwd is returned when the remote cache agent forwards data and the requesting agent is able to acquire the data in E or M state. This is commonly returned with RFO transacations. It can be either HitM or HitFE", + .ucode = 0x400, + }, + { .uname = "RSPSFWD", + .udesc = "Filters for snoop responses of RspSFwd. RspSFwd is returned when the remote cache agent forwards data but holds on to its current copy. This is common for data and code reads that hit in a remote socket in E or F state", + .ucode = 0x800, + }, + { .uname = "RSP_WB", + .udesc = "Filters for snoop responses of RspIWB or RspSWB. This is returned when a non-RFO requests hits in M-state. Data and code reads can return either RspIWB or RspSWB depending on how the system has been configured. InvItoE transactions will also return RspIWB because they must acquire ownership", + .ucode = 0x1000, + }, + { .uname = "RSP_FWD_WB", + .udesc = "Filters for snoop responses of RspxFwdxWB. This snoop response is only used in 4s systems. It is used when a snoop HITM in a remote caching agent and it directly forwards data to a requester and simultaneously returns data to the home to be written back to memory", + .ucode = 0x2000, + }, + { .uname = "RSPCNFLCT", + .udesc = "Filters for snoop responses of RspConflict. This is returned when a snoop finds an existing outstanding transaction in a remote caching agent when it CMAs that caching agent. This triggers the conflict resolution hardware. This covers both RspConflct and RspCnflctWBI", + .ucode = 0x4000, + }, + { .uname = "OTHER", + .udesc = "Filters all other snoop responses", + .ucode = 0x8000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_tracker_cycles_full[]={ + { .uname = "GP", + .udesc = "Number of cycles when the general purpose (GP) HA trakcer pool is completely used. It will not return valid count when BT is disabled", + .ucode = 0x100, + }, + { .uname = "ALL", + .udesc = "Number of cycles when the general purpose (GP) HA trakcer pool is completely used including reserved HT entries. It will not return vaid count when BT is disabled", + .ucode = 0x200, + .uflags = INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_txr_ak[]={ + { .uname = "NDR", + .udesc = "Number of outbound NDR (non-data response) transactions send on the AK ring. AK NDR is used for messages to the local socket", + .ucode = 0x100, + }, + { .uname = "CRD_CBO", + .udesc = "Number of outbound CDR transactions send on the AK ring to CBO", + .ucode = 0x200, + }, + { .uname = "CRD_QPI", + .udesc = "Number of outbound CDR transactions send on the AK ring to QPI", + .ucode = 0x400, + }, +}; + +static const intel_x86_umask_t ivbep_unc_h_iodc_conflicts[]={ + { .uname = "ANY", + .udesc = "Any conflict", + .ucode = 0x100, + .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, + }, + { .uname = "LAST", + .udesc = "Last conflict", + .ucode = 0x400, + } +}; + +static const intel_x86_entry_t intel_ivbep_unc_h_pe[]={ + { .name = "UNC_H_CLOCKTICKS", + .desc = "HA Uncore clockticks", + .modmsk = IVBEP_UNC_HA_ATTRS, + .cntmsk = 0xf, + .code = 0x00, + }, + { .name = "UNC_H_CONFLICT_CYCLES", + .desc = "Conflict Checks", + .code = 0xb, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_conflict_cycles), + .umasks = ivbep_unc_h_conflict_cycles, + }, + { .name = "UNC_H_DIRECT2CORE_COUNT", + .desc = "Direct2Core Messages Sent", + .code = 0x11, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_DIRECT2CORE_CYCLES_DISABLED", + .desc = "Cycles when Direct2Core was Disabled", + .code = 0x12, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_DIRECT2CORE_TXN_OVERRIDE", + .desc = "Number of Reads that had Direct2Core Overridden", + .code = 0x13, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_DIRECTORY_LOOKUP", + .desc = "Directory Lookups", + .code = 0xc, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_directory_lookup), + .umasks = ivbep_unc_h_directory_lookup + }, + { .name = "UNC_H_DIRECTORY_UPDATE", + .desc = "Directory Updates", + .code = 0xd, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_directory_update), + .umasks = ivbep_unc_h_directory_update + }, + { .name = "UNC_H_IGR_NO_CREDIT_CYCLES", + .desc = "Cycles without QPI Ingress Credits", + .code = 0x22, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_igr_no_credit_cycles), + .umasks = ivbep_unc_h_igr_no_credit_cycles + }, + { .name = "UNC_H_IMC_RETRY", + .desc = "Retry Events", + .code = 0x1e, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_IMC_WRITES", + .desc = "HA to IMC Full Line Writes Issued", + .code = 0x1a, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_imc_writes), + .umasks = ivbep_unc_h_imc_writes + }, + { .name = "UNC_H_IMC_READS", + .desc = "HA to IMC normal priority reads issued", + .code = 0x17, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_imc_reads), + .umasks = ivbep_unc_h_imc_reads + }, + { .name = "UNC_H_REQUESTS", + .desc = "Read and Write Requests", + .code = 0x1, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_requests), + .umasks = ivbep_unc_h_requests + }, + { .name = "UNC_H_RPQ_CYCLES_NO_REG_CREDITS", + .desc = "IMC RPQ Credits Empty", + .code = 0x15, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_rpq_cycles_no_reg_credits), + .umasks = ivbep_unc_h_rpq_cycles_no_reg_credits + }, + { .name = "UNC_H_TAD_REQUESTS_G0", + .desc = "HA Requests to a TAD Region", + .code = 0x1b, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_tad_requests_g0), + .umasks = ivbep_unc_h_tad_requests_g0 + }, + { .name = "UNC_H_TAD_REQUESTS_G1", + .desc = "HA Requests to a TAD Region", + .code = 0x1c, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_tad_requests_g1), + .umasks = ivbep_unc_h_tad_requests_g1 + }, + { .name = "UNC_H_TXR_AD_CYCLES_FULL", + .desc = "AD Egress Full", + .code = 0x2a, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_ad_cycles_full), + .umasks = ivbep_unc_h_txr_ad_cycles_full + }, + { .name = "UNC_H_TXR_AK_CYCLES_FULL", + .desc = "AK Egress Full", + .code = 0x32, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_ak_cycles_full), + .umasks = ivbep_unc_h_txr_ak_cycles_full + }, + { .name = "UNC_H_TXR_AK", + .desc = "Outbound Ring Transactions on AK", + .code = 0xe, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_ak), + .umasks = ivbep_unc_h_txr_ak + }, + { .name = "UNC_H_TXR_BL", + .desc = "Outbound DRS Ring Transactions to Cache", + .code = 0x10, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_bl), + .umasks = ivbep_unc_h_txr_bl + }, + { .name = "UNC_H_TXR_BL_CYCLES_FULL", + .desc = "BL Egress Full", + .code = 0x36, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_ak_cycles_full), + .umasks = ivbep_unc_h_txr_ak_cycles_full, /* identical to snbep_unc_h_txr_ak_cycles_full */ + }, + { .name = "UNC_H_WPQ_CYCLES_NO_REG_CREDITS", + .desc = "HA IMC CHN0 WPQ Credits Empty", + .code = 0x18, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_rpq_cycles_no_reg_credits), + .umasks = ivbep_unc_h_rpq_cycles_no_reg_credits, /* shared */ + }, + { .name = "UNC_H_BT_BYPASS", + .desc = "Backup Tracker bypass", + .code = 0x52, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_BYPASS_IMC", + .desc = "HA to IMC bypass", + .code = 0x14, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_bypass_imc), + .umasks = ivbep_unc_h_bypass_imc, + }, + { .name = "UNC_H_BT_CYCLES_NE", + .desc = "Backup Tracker cycles not empty", + .code = 0x42, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_BT_OCCUPANCY", + .desc = "Backup Tracker inserts", + .code = 0x43, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_bt_occupancy), + .umasks = ivbep_unc_h_bt_occupancy, + }, + { .name = "UNC_H_IGR_AD_QPI2", + .desc = "AD QPI Link 2 credit accumulator", + .code = 0x59, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_IGR_BL_QPI2", + .desc = "BL QPI Link 2 credit accumulator", + .code = 0x5a, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_IODC_INSERTS", + .desc = "IODC inserts", + .code = 0x56, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_IODC_CONFLICTS", + .desc = "IODC conflicts", + .code = 0x57, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_iodc_conflicts), + .umasks = ivbep_unc_h_iodc_conflicts, + }, + { .name = "UNC_H_IODC_OLEN_WBMTOI", + .desc = "IODC zero length writes", + .code = 0x58, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + { .name = "UNC_H_OSB", + .desc = "OSB snoop broadcast", + .code = 0x53, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_osb), + .umasks = ivbep_unc_h_osb, + }, + { .name = "UNC_H_OSB_EDR", + .desc = "OSB early data return", + .code = 0x54, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_osb_edr), + .umasks = ivbep_unc_h_osb_edr, + }, + { .name = "UNC_H_RING_AD_USED", + .desc = "AD ring in use", + .code = 0x3e, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_ring_ad_used), + .umasks = ivbep_unc_h_ring_ad_used, + }, + { .name = "UNC_H_RING_AK_USED", + .desc = "AK ring in use", + .code = 0x3f, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_ring_ad_used), /* shared */ + .umasks = ivbep_unc_h_ring_ad_used, + }, + { .name = "UNC_H_RING_BL_USED", + .desc = "BL ring in use", + .code = 0x40, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_ring_ad_used), /* shared */ + .umasks = ivbep_unc_h_ring_ad_used, + }, + { .name = "UNC_H_DIRECTORY_LAT_OPT", + .desc = "Directory latency optimization data return path taken", + .code = 0x41, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_HA_ATTRS, + }, + + { .name = "UNC_H_SNP_RESP_RECV_LOCAL", + .desc = "Snoop responses received local", + .code = 0x60, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_snp_resp_recv_local), + .umasks = ivbep_unc_h_snp_resp_recv_local, + }, + { .name = "UNC_H_TXR_BL_OCCUPANCY", + .desc = "BL Egress occupancy", + .code = 0x34, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_txr_bl_occupancy), + .umasks = ivbep_unc_h_txr_bl_occupancy, + }, + { .name = "UNC_H_SNOOP_RESP", + .desc = "Snoop responses received", + .code = 0x21, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_HA_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_h_snoop_resp), + .umasks = ivbep_unc_h_snoop_resp + }, +}; diff --git a/lib/events/intel_ivbep_unc_imc_events.h b/lib/events/intel_ivbep_unc_imc_events.h new file mode 100644 index 0000000..ac8ef41 --- /dev/null +++ b/lib/events/intel_ivbep_unc_imc_events.h @@ -0,0 +1,644 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * PMU: ivbep_unc_imc (Intel IvyBridge-EP IMC uncore PMU) + */ + +static const intel_x86_umask_t ivbep_unc_m_cas_count[]={ + { .uname = "ALL", + .udesc = "Counts total number of DRAM CAS commands issued on this channel", + .ucode = 0xf00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "RD", + .udesc = "Counts all DRAM reads on this channel, incl. underfills", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RD_REG", + .udesc = "Counts number of DRAM read CAS commands issued on this channel, incl. regular read CAS and those with implicit precharge", + .ucode = 0x100, + }, + { .uname = "RD_UNDERFILL", + .udesc = "Counts number of underfill reads issued by the memory controller", + .ucode = 0x200, + }, + { .uname = "WR", + .udesc = "Counts number of DRAM write CAS commands on this channel", + .ucode = 0xc00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WR_RMM", + .udesc = "Counts Number of opportunistic DRAM write CAS commands issued on this channel", + .ucode = 0x800, + }, + { .uname = "WR_WMM", + .udesc = "Counts number of DRAM write CAS commands issued on this channel while in Write-Major mode", + .ucode = 0x400, + }, + { .uname = "RD_RMM", + .udesc = "Counts Number of opportunistic DRAM read CAS commands issued on this channel", + .ucode = 0x1000, + }, + { .uname = "RD_WMM", + .udesc = "Counts number of DRAM read CAS commands issued on this channel while in Write-Major mode", + .ucode = 0x2000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_dram_refresh[]={ + { .uname = "HIGH", + .udesc = "TBD", + .ucode = 0x400, + }, + { .uname = "PANIC", + .udesc = "TBD", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_major_modes[]={ + { .uname = "ISOCH", + .udesc = "Counts cycles in ISOCH Major maode", + .ucode = 0x800, + }, + { .uname = "PARTIAL", + .udesc = "Counts cycles in Partial Major mode", + .ucode = 0x400, + }, + { .uname = "READ", + .udesc = "Counts cycles in Read Major mode", + .ucode = 0x100, + }, + { .uname = "WRITE", + .udesc = "Counts cycles in Write Major mode", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_power_cke_cycles[]={ + { .uname = "RANK0", + .udesc = "Count cycles for rank 0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK1", + .udesc = "Count cycles for rank 1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK2", + .udesc = "Count cycles for rank 2", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK3", + .udesc = "Count cycles for rank 3", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK4", + .udesc = "Count cycles for rank 4", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK5", + .udesc = "Count cycles for rank 5", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK6", + .udesc = "Count cycles for rank 6", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RANK7", + .udesc = "Count cycles for rank 7", + .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_preemption[]={ + { .uname = "RD_PREEMPT_RD", + .udesc = "Counts read over read preemptions", + .ucode = 0x100, + }, + { .uname = "RD_PREEMPT_WR", + .udesc = "Counts read over write preemptions", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_pre_count[]={ + { .uname = "PAGE_CLOSE", + .udesc = "Counts number of DRAM precharge commands sent on this channel as a result of the page close counter expiring", + .ucode = 0x200, + }, + { .uname = "PAGE_MISS", + .udesc = "Counts number of DRAM precharge commands sent on this channel as a result of page misses", + .ucode = 0x100, + }, + { .uname = "RD", + .udesc = "Precharge due to read", + .ucode = 0x100, + }, + { .uname = "WR", + .udesc = "Precharhe due to write", + .ucode = 0x200, + }, + { .uname = "BYP", + .udesc = "Precharge due to bypass", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_act_count[]={ + { .uname = "RD", + .udesc = "Activate due to read", + .ucode = 0x100, + }, + { .uname = "WR", + .udesc = "Activate due to write", + .ucode = 0x200, + }, + { .uname = "BYP", + .udesc = "Activate due to bypass", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_byp_cmds[]={ + { .uname = "ACT", + .udesc = "ACT command issued by 2 cycle bypass", + .ucode = 0x100, + }, + { .uname = "CAS", + .udesc = "CAS command issued by 2 cycle bypass", + .ucode = 0x200, + }, + { .uname = "PRE", + .udesc = "PRE command issued by 2 cycle bypass", + .ucode = 0x400, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_rd_cas_prio[]={ + { .uname = "LOW", + .udesc = "Read CAS issued with low priority", + .ucode = 0x100, + }, + { .uname = "MED", + .udesc = "Read CAS issued with medium priority", + .ucode = 0x200, + }, + { .uname = "HIGH", + .udesc = "Read CAS issued with high priority", + .ucode = 0x400, + }, + { .uname = "PANIC", + .udesc = "Read CAS issued with panic non isoch priority (starved)", + .ucode = 0x800, + }, +}; + +static const intel_x86_umask_t ivbep_unc_m_rd_cas_rank0[]={ + { .uname = "BANK0", + .udesc = "Bank 0", + .ucode = 0x100, + }, + { .uname = "BANK1", + .udesc = "Bank 1", + .ucode = 0x200, + }, + { .uname = "BANK2", + .udesc = "Bank 2", + .ucode = 0x400, + }, + { .uname = "BANK3", + .udesc = "Bank 3", + .ucode = 0x800, + }, + { .uname = "BANK4", + .udesc = "Bank 4", + .ucode = 0x1000, + }, + { .uname = "BANK5", + .udesc = "Bank 5", + .ucode = 0x2000, + }, + { .uname = "BANK6", + .udesc = "Bank 6", + .ucode = 0x4000, + }, + { .uname = "BANK7", + .udesc = "Bank 7", + .ucode = 0x8000, + } +}; + +static const intel_x86_umask_t ivbep_unc_m_vmse_wr_push[]={ + { .uname = "WMM", + .udesc = "VMSE write push issued in WMM", + .ucode = 0x100, + }, + { .uname = "RMM", + .udesc = "VMSE write push issued in RMM", + .ucode = 0x200, + } +}; + +static const intel_x86_umask_t ivbep_unc_m_wmm_to_rmm[]={ + { .uname = "LOW_THRES", + .udesc = "Transition from WMM to RMM because of starve counter", + .ucode = 0x100, + }, + { .uname = "STARVE", + .udesc = "TBD", + .ucode = 0x200, + }, + { .uname = "VMSE_RETRY", + .udesc = "TBD", + .ucode = 0x400, + } +}; + + +static const intel_x86_entry_t intel_ivbep_unc_m_pe[]={ + { .name = "UNC_M_CLOCKTICKS", + .desc = "IMC Uncore clockticks (fixed counter)", + .modmsk = 0x0, + .cntmsk = 0x100000000ull, + .code = 0xff, /* perf pseudo encoding for fixed counter */ + .flags = INTEL_X86_FIXED, + }, + { .name = "UNC_M_DCLOCKTICKS", + .desc = "IMC Uncore clockticks (generic counters)", + .modmsk = IVBEP_UNC_IMC_ATTRS, + .cntmsk = 0xf, + .code = 0x00, /*encoding for generic counters */ + }, + { .name = "UNC_M_ACT_COUNT", + .desc = "DRAM Activate Count", + .code = 0x1, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_act_count), + .umasks = ivbep_unc_m_act_count + }, + { .name = "UNC_M_CAS_COUNT", + .desc = "DRAM RD_CAS and WR_CAS Commands.", + .code = 0x4, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_cas_count), + .umasks = ivbep_unc_m_cas_count + }, + { .name = "UNC_M_DRAM_PRE_ALL", + .desc = "DRAM Precharge All Commands", + .code = 0x6, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_DRAM_REFRESH", + .desc = "Number of DRAM Refreshes Issued", + .code = 0x5, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_dram_refresh), + .umasks = ivbep_unc_m_dram_refresh + }, + { .name = "UNC_M_ECC_CORRECTABLE_ERRORS", + .desc = "ECC Correctable Errors", + .code = 0x9, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_MAJOR_MODES", + .desc = "Cycles in a Major Mode", + .code = 0x7, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_major_modes), + .umasks = ivbep_unc_m_major_modes + }, + { .name = "UNC_M_POWER_CHANNEL_DLLOFF", + .desc = "Channel DLLOFF Cycles", + .code = 0x84, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_POWER_CHANNEL_PPD", + .desc = "Channel PPD Cycles", + .code = 0x85, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_POWER_CKE_CYCLES", + .desc = "CKE_ON_CYCLES by Rank", + .code = 0x83, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_power_cke_cycles), + .umasks = ivbep_unc_m_power_cke_cycles + }, + { .name = "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", + .desc = "Critical Throttle Cycles", + .code = 0x86, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_POWER_SELF_REFRESH", + .desc = "Clock-Enabled Self-Refresh", + .code = 0x43, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_POWER_THROTTLE_CYCLES", + .desc = "Throttle Cycles", + .code = 0x41, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_power_cke_cycles), + .umasks = ivbep_unc_m_power_cke_cycles /* identical to snbep_unc_m_power_cke_cycles */ + }, + { .name = "UNC_M_PREEMPTION", + .desc = "Read Preemption Count", + .code = 0x8, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_preemption), + .umasks = ivbep_unc_m_preemption + }, + { .name = "UNC_M_PRE_COUNT", + .desc = "DRAM Precharge commands.", + .code = 0x2, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_pre_count), + .umasks = ivbep_unc_m_pre_count + }, + { .name = "UNC_M_RPQ_CYCLES_NE", + .desc = "Read Pending Queue Not Empty", + .code = 0x11, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_RPQ_INSERTS", + .desc = "Read Pending Queue Allocations", + .code = 0x10, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WPQ_CYCLES_FULL", + .desc = "Write Pending Queue Full Cycles", + .code = 0x22, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WPQ_CYCLES_NE", + .desc = "Write Pending Queue Not Empty", + .code = 0x21, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WPQ_INSERTS", + .desc = "Write Pending Queue Allocations", + .code = 0x20, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WPQ_READ_HIT", + .desc = "Write Pending Queue CAM Match", + .code = 0x23, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WPQ_WRITE_HIT", + .desc = "Write Pending Queue CAM Match", + .code = 0x24, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_BYP_CMDS", + .desc = "Bypass command event", + .code = 0xa1, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_byp_cmds), + .umasks = ivbep_unc_m_byp_cmds + }, + { .name = "UNC_M_RD_CAS_PRIO", + .desc = "Read CAS priority", + .code = 0xa0, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_prio), + .umasks = ivbep_unc_m_rd_cas_prio + }, + { .name = "UNC_M_RD_CAS_RANK0", + .desc = "Read CAS access to Rank 0", + .code = 0xb0, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK1", + .desc = "Read CAS access to Rank 1", + .code = 0xb1, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK2", + .desc = "Read CAS access to Rank 2", + .code = 0xb2, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK3", + .desc = "Read CAS access to Rank 3", + .code = 0xb3, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK4", + .desc = "Read CAS access to Rank 4", + .code = 0xb4, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK5", + .desc = "Read CAS access to Rank 5", + .code = 0xb5, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK6", + .desc = "Read CAS access to Rank 6", + .code = 0xb6, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_RD_CAS_RANK7", + .desc = "Read CAS access to Rank 7", + .code = 0xb7, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_VMSE_MXB_WR_OCCUPANCY", + .desc = "VMSE MXB write buffer occupancy", + .code = 0x91, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_VMSE_WR_PUSH", + .desc = "VMSE WR push issued", + .code = 0x90, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_vmse_wr_push), + .umasks = ivbep_unc_m_vmse_wr_push + }, + { .name = "UNC_M_WMM_TO_RMM", + .desc = "Transitions from WMM to RMM because of low threshold", + .code = 0xc0, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_wmm_to_rmm), + .umasks = ivbep_unc_m_wmm_to_rmm + }, + { .name = "UNC_M_WRONG_MM", + .desc = "Not getting the requested major mode", + .code = 0xc1, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_IMC_ATTRS, + }, + { .name = "UNC_M_WR_CAS_RANK0", + .desc = "Write CAS access to Rank 0", + .code = 0xb8, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK1", + .desc = "Write CAS access to Rank 1", + .code = 0xb9, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK2", + .desc = "Write CAS access to Rank 2", + .code = 0xba, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK3", + .desc = "Write CAS access to Rank 3", + .code = 0xbb, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK4", + .desc = "Write CAS access to Rank 4", + .code = 0xbc, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK5", + .desc = "Write CAS access to Rank 5", + .code = 0xbd, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK6", + .desc = "Write CAS access to Rank 6", + .code = 0xbe, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, + { .name = "UNC_M_WR_CAS_RANK7", + .desc = "Write CAS access to Rank 7", + .code = 0xbf, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_IMC_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ + .umasks = ivbep_unc_m_rd_cas_rank0 + }, +}; diff --git a/lib/events/intel_ivbep_unc_irp_events.h b/lib/events/intel_ivbep_unc_irp_events.h new file mode 100644 index 0000000..86b5168 --- /dev/null +++ b/lib/events/intel_ivbep_unc_irp_events.h @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: ivbep_unc_irp (Intel IvyBridge-EP IRP uncore) + */ + +static const intel_x86_umask_t ivbep_unc_i_address_match[]={ + { .uname = "STALL_COUNT", + .udesc = "Number of time when it is not possible to merge two conflicting requests, a stall event occurs", + .ucode = 0x100, + }, + { .uname = "MERGE_COUNT", + .udesc = "Number of times when two reuqests to the same address from the same source are received back to back, it is possible to merge them", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_i_cache_ack_pending_occupancy[]={ + { .uname = "ANY", + .udesc = "Any source", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, + { .uname = "SOURCE", + .udesc = "Track all requests from any source port", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_i_tickles[]={ + { .uname = "LOST_OWNERSHIP", + .udesc = "Number of request that lost ownership as a result of a tickle", + .ucode = 0x100, + }, + { .uname = "TOP_OF_QUEUE", + .udesc = "Number of cases when a tickle was received but the request was at the head of the queue in the switch. In this case data is returned rather than releasing ownership", + .ucode = 0x200, + }, +}; + + +static const intel_x86_umask_t ivbep_unc_i_transactions[]={ + { .uname = "READS", + .udesc = "Number of read requests (not including read prefetches)", + .ucode = 0x100, + }, + { .uname = "WRITES", + .udesc = "Number of write requests. Each write should have a prefetch, so there is no need to explicitly track these requests", + .ucode = 0x200, + }, + { .uname = "RD_PREFETCHES", + .udesc = "Number of read prefetches", + .ucode = 0x400, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_i_pe[]={ + { .name = "UNC_I_CLOCKTICKS", + .desc = "Number of uclks in domain", + .code = 0x0, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_ADDRESS_MATCH", + .desc = "Address match conflict count", + .code = 0x17, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_address_match), + .umasks = ivbep_unc_i_address_match + }, + { .name = "UNC_I_CACHE_ACK_PENDING_OCCUPANCY", + .desc = "Write ACK pending occupancy", + .code = 0x14, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_cache_ack_pending_occupancy), + .umasks = ivbep_unc_i_cache_ack_pending_occupancy + }, + { .name = "UNC_I_CACHE_OWN_OCCUPANCY", + .desc = "Outstanding write ownership occupancy", + .code = 0x13, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_cache_ack_pending_occupancy), + .umasks = ivbep_unc_i_cache_ack_pending_occupancy /* shared */ + }, + { .name = "UNC_I_CACHE_READ_OCCUPANCY", + .desc = "Outstanding read occupancy", + .code = 0x10, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_cache_ack_pending_occupancy), + .umasks = ivbep_unc_i_cache_ack_pending_occupancy /* shared */ + }, + { .name = "UNC_I_CACHE_TOTAL_OCCUPANCY", + .desc = "Total write cache occupancy", + .code = 0x12, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_cache_ack_pending_occupancy), + .umasks = ivbep_unc_i_cache_ack_pending_occupancy /* shared */ + }, + { .name = "UNC_I_CACHE_WRITE_OCCUPANCY", + .desc = "Outstanding write occupancy", + .code = 0x11, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_cache_ack_pending_occupancy), + .umasks = ivbep_unc_i_cache_ack_pending_occupancy /* shared */ + }, + { .name = "UNC_I_RXR_AK_CYCLES_FULL", + .desc = "TBD", + .code = 0xb, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_AK_INSERTS", + .desc = "Egress cycles full", + .code = 0xa, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_AK_OCCUPANCY", + .desc = "TBD", + .code = 0x0c, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_DRS_CYCLES_FULL", + .desc = "TBD", + .code = 0x4, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_DRS_INSERTS", + .desc = "BL Ingress occupancy DRS", + .code = 0x1, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_DRS_OCCUPANCY", + .desc = "TBD", + .code = 0x7, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCB_CYCLES_FULL", + .desc = "TBD", + .code = 0x5, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCB_INSERTS", + .desc = "BL Ingress occupancy NCB", + .code = 0x2, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCB_OCCUPANCY", + .desc = "TBD", + .code = 0x8, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCS_CYCLES_FULL", + .desc = "TBD", + .code = 0x6, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCS_INSERTS", + .desc = "BL Ingress Occupancy NCS", + .code = 0x3, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_RXR_BL_NCS_OCCUPANCY", + .desc = "TBD", + .code = 0x9, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_TICKLES", + .desc = "Tickle count", + .code = 0x16, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_tickles), + .umasks = ivbep_unc_i_tickles + }, + { .name = "UNC_I_TRANSACTIONS", + .desc = "Inbound transaction count", + .code = 0x15, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = SNBEP_UNC_IRP_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_i_transactions), + .umasks = ivbep_unc_i_transactions + }, + { .name = "UNC_I_TXR_AD_STALL_CREDIT_CYCLES", + .desc = "No AD Egress credit stalls", + .code = 0x18, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_TXR_BL_STALL_CREDIT_CYCLES", + .desc = "No BL Egress credit stalls", + .code = 0x19, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_TXR_DATA_INSERTS_NCB", + .desc = "Outbound read requests", + .code = 0xe, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_TXR_DATA_INSERTS_NCS", + .desc = "Outbound read requests", + .code = 0xf, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_TXR_REQUEST_OCCUPANCY", + .desc = "Outbound request queue occupancy", + .code = 0xd, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, + { .name = "UNC_I_WRITE_ORDERING_STALL_CYCLES", + .desc = "Write ordering stalls", + .code = 0x1a, + .cntmsk = 0x3, + .modmsk = SNBEP_UNC_IRP_ATTRS, + }, +}; diff --git a/lib/events/intel_ivbep_unc_pcu_events.h b/lib/events/intel_ivbep_unc_pcu_events.h new file mode 100644 index 0000000..14188e5 --- /dev/null +++ b/lib/events/intel_ivbep_unc_pcu_events.h @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * PMU: ivbep_unc_pcu (Intel IvyBridge-EP PCU uncore) + */ + +static const intel_x86_umask_t ivbep_unc_p_power_state_occupancy[]={ + { .uname = "CORES_C0", + .udesc = "Counts number of cores in C0", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CORES_C3", + .udesc = "Counts number of cores in C3", + .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CORES_C6", + .udesc = "Counts number of cores in C6", + .ucode = 0xc000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_p_occupancy_counters[]={ + { .uname = "C0", + .udesc = "Counts number of cores in C0", + .ucode = 0x0100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "C3", + .udesc = "Counts number of cores in C3", + .ucode = 0x0200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "C6", + .udesc = "Counts number of cores in C6", + .ucode = 0x0300, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_p_pe[]={ + { .name = "UNC_P_CLOCKTICKS", + .desc = "PCU Uncore clockticks", + .modmsk = IVBEP_UNC_PCU_ATTRS, + .cntmsk = 0xf, + .code = 0x00, + }, + { .name = "UNC_P_CORE0_TRANSITION_CYCLES", + .desc = "Core 0 C State Transition Cycles", + .code = 0x70, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE1_TRANSITION_CYCLES", + .desc = "Core 1 C State Transition Cycles", + .code = 0x71, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE2_TRANSITION_CYCLES", + .desc = "Core 2 C State Transition Cycles", + .code = 0x72, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE3_TRANSITION_CYCLES", + .desc = "Core 3 C State Transition Cycles", + .code = 0x73, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE4_TRANSITION_CYCLES", + .desc = "Core 4 C State Transition Cycles", + .code = 0x74, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE5_TRANSITION_CYCLES", + .desc = "Core 5 C State Transition Cycles", + .code = 0x75, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE6_TRANSITION_CYCLES", + .desc = "Core 6 C State Transition Cycles", + .code = 0x76, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE7_TRANSITION_CYCLES", + .desc = "Core 7 C State Transition Cycles", + .code = 0x77, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE8_TRANSITION_CYCLES", + .desc = "Core 8 C State Transition Cycles", + .code = 0x78, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE9_TRANSITION_CYCLES", + .desc = "Core 9 C State Transition Cycles", + .code = 0x79, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE10_TRANSITION_CYCLES", + .desc = "Core 10 C State Transition Cycles", + .code = 0x7a, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE11_TRANSITION_CYCLES", + .desc = "Core 11 C State Transition Cycles", + .code = 0x7b, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE12_TRANSITION_CYCLES", + .desc = "Core 12 C State Transition Cycles", + .code = 0x7c, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE13_TRANSITION_CYCLES", + .desc = "Core 13 C State Transition Cycles", + .code = 0x7d, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_CORE14_TRANSITION_CYCLES", + .desc = "Core 14 C State Transition Cycles", + .code = 0x7e, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE0", + .desc = "Deep C state rejection Core 0", + .code = 0x17 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE1", + .desc = "Deep C state rejection Core 1", + .code = 0x18 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE2", + .desc = "Deep C state rejection Core 2", + .code = 0x19 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE3", + .desc = "Deep C state rejection Core 3", + .code = 0x1a | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE4", + .desc = "Deep C state rejection Core 4", + .code = 0x1b | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE5", + .desc = "Deep C state rejection Core 5", + .code = 0x1c | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE6", + .desc = "Deep C state rejection Core 6", + .code = 0x1d | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE7", + .desc = "Deep C state rejection Core 7", + .code = 0x1e | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE8", + .desc = "Deep C state rejection Core 8", + .code = 0x1f | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE9", + .desc = "Deep C state rejection Core 9", + .code = 0x20 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE10", + .desc = "Deep C state rejection Core 10", + .code = 0x21 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE11", + .desc = "Deep C state rejection Core 11", + .code = 0x22 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE12", + .desc = "Deep C state rejection Core 12", + .code = 0x23 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE13", + .desc = "Deep C state rejection Core 13", + .code = 0x24 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DELAYED_C_STATE_ABORT_CORE14", + .desc = "Deep C state rejection Core 14", + .code = 0x25 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE0", + .desc = "Core 0 C State Demotions", + .code = 0x1e, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE1", + .desc = "Core 1 C State Demotions", + .code = 0x1f, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE2", + .desc = "Core 2 C State Demotions", + .code = 0x20, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE3", + .desc = "Core 3 C State Demotions", + .code = 0x21, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE4", + .desc = "Core 4 C State Demotions", + .code = 0x22, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE5", + .desc = "Core 5 C State Demotions", + .code = 0x23, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE6", + .desc = "Core 6 C State Demotions", + .code = 0x24, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE7", + .desc = "Core 7 C State Demotions", + .code = 0x25, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE8", + .desc = "Core 8 C State Demotions", + .code = 0x40, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE9", + .desc = "Core 9 C State Demotions", + .code = 0x41, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE10", + .desc = "Core 10 C State Demotions", + .code = 0x42, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE11", + .desc = "Core 11 C State Demotions", + .code = 0x43, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE12", + .desc = "Core 12 C State Demotions", + .code = 0x44, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE13", + .desc = "Core 13 C State Demotions", + .code = 0x45, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE14", + .desc = "Core 14 C State Demotions", + .code = 0x46, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_DEMOTIONS_CORE14", + .desc = "Core 14 C State Demotions", + .code = 0x2d, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_BAND0_CYCLES", + .desc = "Frequency Residency", + .code = 0xb, + .cntmsk = 0xf, + .flags = INTEL_X86_NO_AUTOENCODE, + .modmsk = IVBEP_UNC_PCU_BAND_ATTRS, + .modmsk_req = _SNBEP_UNC_ATTR_FF, + }, + { .name = "UNC_P_FREQ_BAND1_CYCLES", + .desc = "Frequency Residency", + .code = 0xc, + .cntmsk = 0xf, + .flags = INTEL_X86_NO_AUTOENCODE, + .modmsk = IVBEP_UNC_PCU_BAND_ATTRS, + .modmsk_req = _SNBEP_UNC_ATTR_FF, + }, + { .name = "UNC_P_FREQ_BAND2_CYCLES", + .desc = "Frequency Residency", + .code = 0xd, + .cntmsk = 0xf, + .flags = INTEL_X86_NO_AUTOENCODE, + .modmsk = IVBEP_UNC_PCU_BAND_ATTRS, + .modmsk_req = _SNBEP_UNC_ATTR_FF, + }, + { .name = "UNC_P_FREQ_BAND3_CYCLES", + .desc = "Frequency Residency", + .code = 0xe, + .cntmsk = 0xf, + .flags = INTEL_X86_NO_AUTOENCODE, + .modmsk = IVBEP_UNC_PCU_BAND_ATTRS, + .modmsk_req = _SNBEP_UNC_ATTR_FF, + }, + { .name = "UNC_P_FREQ_MAX_CURRENT_CYCLES", + .desc = "Current Strongest Upper Limit Cycles", + .code = 0x7, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", + .desc = "Thermal Strongest Upper Limit Cycles", + .code = 0x4, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_MAX_OS_CYCLES", + .desc = "OS Strongest Upper Limit Cycles", + .code = 0x6, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_MAX_POWER_CYCLES", + .desc = "Power Strongest Upper Limit Cycles", + .code = 0x5, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_MIN_PERF_P_CYCLES", + .desc = "Perf P Limit Strongest Lower Limit Cycles", + .code = 0x02 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + + { .name = "UNC_P_FREQ_MIN_IO_P_CYCLES", + .desc = "IO P Limit Strongest Lower Limit Cycles", + .code = 0x61, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_FREQ_TRANS_CYCLES", + .desc = "Cycles spent changing Frequency", + .code = 0x60, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_MEMORY_PHASE_SHEDDING_CYCLES", + .desc = "Memory Phase Shedding Cycles", + .code = 0x2f, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_PKG_C_EXIT_LATENCY", + .desc = "Package C state exit latency. Counts cycles the package is transitioning from C2 to C3", + .code = 0x26 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_POWER_STATE_OCCUPANCY", + .desc = "Number of cores in C0", + .code = 0x80, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_PCU_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_p_power_state_occupancy), + .umasks = ivbep_unc_p_power_state_occupancy + }, + { .name = "UNC_P_PROCHOT_EXTERNAL_CYCLES", + .desc = "External Prochot", + .code = 0xa, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_PROCHOT_INTERNAL_CYCLES", + .desc = "Internal Prochot", + .code = 0x9, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_TOTAL_TRANSITION_CYCLES", + .desc = "Total Core C State Transition Cycles", + .code = 0x63, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_VOLT_TRANS_CYCLES_CHANGE", + .desc = "Cycles Changing Voltage", + .code = 0x3, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_VOLT_TRANS_CYCLES_DECREASE", + .desc = "Cycles Decreasing Voltage", + .code = 0x2, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_VOLT_TRANS_CYCLES_INCREASE", + .desc = "Cycles Increasing Voltage", + .code = 0x1, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, + { .name = "UNC_P_VR_HOT_CYCLES", + .desc = "VR Hot", + .code = 0x32, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_PCU_ATTRS, + }, +}; diff --git a/lib/events/intel_ivbep_unc_qpi_events.h b/lib/events/intel_ivbep_unc_qpi_events.h new file mode 100644 index 0000000..cfb3469 --- /dev/null +++ b/lib/events/intel_ivbep_unc_qpi_events.h @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: ivbep_unc_qpi (Intel IvyBridge-EP QPI uncore) + */ + +static const intel_x86_umask_t ivbep_unc_q_direct2core[]={ + { .uname = "FAILURE_CREDITS", + .udesc = "Number of spawn failures due to lack of Egress credits", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_CREDITS_RBT", + .udesc = "Number of spawn failures due to lack of Egress credit and route-back table (RBT) bit was not set", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_RBT_HIT", + .udesc = "Number of spawn failures because route-back table (RBT) specified that the transaction should not trigger a direct2core transaction", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SUCCESS_RBT_HIT", + .udesc = "Number of spawn successes", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_MISS", + .udesc = "Number of spawn failures due to RBT tag not matching although the valid bit was set and there was enough Egress credits", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_CREDITS_MISS", + .udesc = "Number of spawn failures due to RBT tag not matching and they were not enough Egress credits. The valid bit was set", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_RBT_MISS", + .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set but there were enough Egress credits", + .ucode = 0x4000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "FAILURE_CREDITS_RBT_MISS", + .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set and there were not enough Egress credits", + .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_rxl_credits_consumed_vn0[]={ + { .uname = "DRS", + .udesc = "Number of times VN0 consumed for DRS message class", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "HOM", + .udesc = "Number of times VN0 consumed for HOM message class", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCB", + .udesc = "Number of times VN0 consumed for NCB message class", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCS", + .udesc = "Number of times VN0 consumed for NCS message class", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NDR", + .udesc = "Number of times VN0 consumed for NDR message class", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SNP", + .udesc = "Number of times VN0 consumed for SNP message class", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_rxl_credits_consumed_vn1[]={ + { .uname = "DRS", + .udesc = "Number of times VN1 consumed for DRS message class", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "HOM", + .udesc = "Number of times VN1 consumed for HOM message class", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCB", + .udesc = "Number of times VN1 consumed for NCB message class", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCS", + .udesc = "Number of times VN1 consumed for NCS message class", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NDR", + .udesc = "Number of times VN1 consumed for NDR message class", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SNP", + .udesc = "Number of times VN1 consumed for SNP message class", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + + +static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g0[]={ + { .uname = "DATA", + .udesc = "Number of data flits over QPI", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IDLE", + .udesc = "Number of flits over QPI that do not hold protocol payload", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NON_DATA", + .udesc = "Number of non-NULL non-data flits over QPI", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_txl_flits_g0[]={ + { .uname = "DATA", + .udesc = "Number of data flits over QPI", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NON_DATA", + .udesc = "Number of non-NULL non-data flits over QPI", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g1[]={ + { .uname = "DRS", + .udesc = "Number of flits over QPI on the Data Response (DRS) channel", + .ucode = 0x1800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DRS_DATA", + .udesc = "Number of data flits over QPI on the Data Response (DRS) channel", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DRS_NONDATA", + .udesc = "Number of protocol flits over QPI on the Data Response (DRS) channel", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "HOM", + .udesc = "Number of flits over QPI on the home channel", + .ucode = 0x600, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "HOM_NONREQ", + .udesc = "Number of non-request flits over QPI on the home channel", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "HOM_REQ", + .udesc = "Number of data requests over QPI on the home channel", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "SNP", + .udesc = "Number of snoop requests flits over QPI", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g2[]={ + { .uname = "NCB", + .udesc = "Number of non-coherent bypass flits", + .ucode = 0xc00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCB_DATA", + .udesc = "Number of non-coherent data flits", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCB_NONDATA", + .udesc = "Number of bypass non-data flits", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCS", + .udesc = "Number of non-coherent standard (NCS) flits", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NDR_AD", + .udesc = "Number of flits received over Non-data response (NDR) channel", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NDR_AK", + .udesc = "Number of flits received on the Non-data response (NDR) channel)", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_q_txr_ad_hom_credit_acquired[]={ + { .uname = "VN0", + .udesc = "for VN0", + .ucode = 0x100, + }, + { .uname = "VN1", + .udesc = "for VN1", + .ucode = 0x200, + }, +}; + + static const intel_x86_umask_t ivbep_unc_q_txr_bl_drs_credit_acquired[]={ + { .uname = "VN0", + .udesc = "for VN0", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "VN1", + .udesc = "for VN1", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "VN_SHR", + .udesc = "for shared VN", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_q_pe[]={ + { .name = "UNC_Q_CLOCKTICKS", + .desc = "Number of qfclks", + .code = 0x14, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_CTO_COUNT", + .desc = "Count of CTO Events", + .code = 0x38 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_DIRECT2CORE", + .desc = "Direct 2 Core Spawning", + .code = 0x13, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_direct2core), + .umasks = ivbep_unc_q_direct2core + }, + { .name = "UNC_Q_L1_POWER_CYCLES", + .desc = "Cycles in L1", + .code = 0x12, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL0P_POWER_CYCLES", + .desc = "Cycles in L0p", + .code = 0x10, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL0_POWER_CYCLES", + .desc = "Cycles in L0", + .code = 0xf, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_BYPASSED", + .desc = "Rx Flit Buffer Bypassed", + .code = 0x9, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN0", + .desc = "VN0 Credit Consumed", + .code = 0x1e | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_credits_consumed_vn0), + .umasks = ivbep_unc_q_rxl_credits_consumed_vn0 + }, + { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN1", + .desc = "VN1 Credit Consumed", + .code = 0x39 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_credits_consumed_vn1), + .umasks = ivbep_unc_q_rxl_credits_consumed_vn1 + }, + { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VNA", + .desc = "VNA Credit Consumed", + .code = 0x1d | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_CYCLES_NE", + .desc = "RxQ Cycles Not Empty", + .code = 0xa, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_FLITS_G0", + .desc = "Flits Received - Group 0", + .code = 0x1, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g0), + .umasks = ivbep_unc_q_rxl_flits_g0 + }, + { .name = "UNC_Q_RXL_FLITS_G1", + .desc = "Flits Received - Group 1", + .code = 0x2 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g1), + .umasks = ivbep_unc_q_rxl_flits_g1 + }, + + { .name = "UNC_Q_RXL_FLITS_G2", + .desc = "Flits Received - Group 2", + .code = 0x3 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g2), + .umasks = ivbep_unc_q_rxl_flits_g2 + }, + { .name = "UNC_Q_RXL_INSERTS", + .desc = "Rx Flit Buffer Allocations", + .code = 0x8, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_INSERTS_DRS", + .desc = "Rx Flit Buffer Allocations - DRS", + .code = 0x9 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_INSERTS_HOM", + .desc = "Rx Flit Buffer Allocations - HOM", + .code = 0xc | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_INSERTS_NCB", + .desc = "Rx Flit Buffer Allocations - NCB", + .code = 0xa | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_INSERTS_NCS", + .desc = "Rx Flit Buffer Allocations - NCS", + .code = 0xb | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_INSERTS_NDR", + .desc = "Rx Flit Buffer Allocations - NDR", + .code = 0xe | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_INSERTS_SNP", + .desc = "Rx Flit Buffer Allocations - SNP", + .code = 0xd | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY", + .desc = "RxQ Occupancy - All Packets", + .code = 0xb, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_DRS", + .desc = "RxQ Occupancy - DRS", + .code = 0x15 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_HOM", + .desc = "RxQ Occupancy - HOM", + .code = 0x18 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_NCB", + .desc = "RxQ Occupancy - NCB", + .code = 0x16 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_NCS", + .desc = "RxQ Occupancy - NCS", + .code = 0x17 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_NDR", + .desc = "RxQ Occupancy - NDR", + .code = 0x1a | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_RXL_OCCUPANCY_SNP", + .desc = "RxQ Occupancy - SNP", + .code = 0x19 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXL0P_POWER_CYCLES", + .desc = "Cycles in L0p", + .code = 0xd, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXL0_POWER_CYCLES", + .desc = "Cycles in L0", + .code = 0xc, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXL_BYPASSED", + .desc = "Tx Flit Buffer Bypassed", + .code = 0x5, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXL_CYCLES_NE", + .desc = "Tx Flit Buffer Cycles not Empty", + .code = 0x6, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXL_FLITS_G0", + .desc = "Flits Transferred - Group 0", + .code = 0x0, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txl_flits_g0), + .umasks = ivbep_unc_q_txl_flits_g0 + }, + { .name = "UNC_Q_TXL_FLITS_G1", + .desc = "Flits Transferred - Group 1", + .code = 0x0 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g1), + .umasks = ivbep_unc_q_rxl_flits_g1 /* shared with rxl_flits_g1 */ + }, + { .name = "UNC_Q_TXL_FLITS_G2", + .desc = "Flits Transferred - Group 2", + .code = 0x1 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g2), + .umasks = ivbep_unc_q_rxl_flits_g2 /* shared with rxl_flits_g2 */ + }, + { .name = "UNC_Q_TXL_INSERTS", + .desc = "Tx Flit Buffer Allocations", + .code = 0x4, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXL_OCCUPANCY", + .desc = "Tx Flit Buffer Occupancy", + .code = 0x7, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_VNA_CREDIT_RETURNS", + .desc = "VNA Credits Returned", + .code = 0x1c | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_VNA_CREDIT_RETURN_OCCUPANCY", + .desc = "VNA Credits Pending Return - Occupancy", + .code = 0x1b | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_QPI_ATTRS, + }, + { .name = "UNC_Q_TXR_AD_HOM_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy AD HOM", + .code = 0x26 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AD_HOM_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy AD HOM", + .code = 0x22 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AD_NDR_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy AD NDR", + .code = 0x28 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AD_NDR_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy AD NDR", + .code = 0x24 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AD_SNP_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy AD SNP", + .code = 0x27 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AD_SNP_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy AD SNP", + .code = 0x23 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AK_NDR_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy AK NDR", + .code = 0x29 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_AK_NDR_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy AD NDR", + .code = 0x25 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_DRS_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy BL DRS", + .code = 0x2a | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_bl_drs_credit_acquired), + .umasks = ivbep_unc_q_txr_bl_drs_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_DRS_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy BL DRS", + .code = 0x1f | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_bl_drs_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_bl_drs_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_NCB_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy BL NCB", + .code = 0x2b | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_NCB_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy BL NCB", + .code = 0x20 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_NCS_CREDIT_ACQUIRED", + .desc = "R3QPI Egress credit occupancy BL NCS", + .code = 0x2c | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, + { .name = "UNC_Q_TXR_BL_NCS_CREDIT_OCCUPANCY", + .desc = "R3QPI Egress credit occupancy BL NCS", + .code = 0x21 | (1ULL << 21), /* sel_ext */ + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ + .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, + }, +}; diff --git a/lib/events/intel_ivbep_unc_r2pcie_events.h b/lib/events/intel_ivbep_unc_r2pcie_events.h new file mode 100644 index 0000000..59d96b9 --- /dev/null +++ b/lib/events/intel_ivbep_unc_r2pcie_events.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: ivbep_unc_r2pcie (Intel IvyBridge-EP R2PCIe uncore) + */ + +static const intel_x86_umask_t ivbep_unc_r2_ring_ad_used[]={ + { .uname = "CCW_VR0_EVEN", + .udesc = "Counter-clockwise and even ring polarity on virtual ring 0", + .ucode = 0x400, + }, + { .uname = "CCW_VR0_ODD", + .udesc = "Counter-clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x800, + }, + { .uname = "CW_VR0_EVEN", + .udesc = "Clockwise and even ring polarity on virtual ring 0", + .ucode = 0x100, + }, + { .uname = "CW_VR0_ODD", + .udesc = "Clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x200, + }, + { .uname = "CCW_VR1_EVEN", + .udesc = "Counter-clockwise and even ring polarity on virtual ring 1", + .ucode = 0x400, + }, + { .uname = "CCW_VR1_ODD", + .udesc = "Counter-clockwise and odd ring polarity on virtual ring 1", + .ucode = 0x800, + }, + { .uname = "CW_VR1_EVEN", + .udesc = "Clockwise and even ring polarity on virtual ring 1", + .ucode = 0x100, + }, + { .uname = "CW_VR1_ODD", + .udesc = "Clockwise and odd ring polarity on virtual ring 1", + .ucode = 0x200, + }, + { .uname = "CW", + .udesc = "Clockwise with any polarity on either virtual rings", + .ucode = 0x3300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xcc00, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r2_rxr_ak_bounces[]={ + { .uname = "CW", + .udesc = "Clockwise", + .ucode = 0x100, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r2_rxr_occupancy[]={ + { .uname = "DRS", + .udesc = "DRS Ingress queue", + .ucode = 0x800, + .uflags = INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r2_ring_iv_used[]={ + { .uname = "CW", + .udesc = "Clockwise with any polarity on either virtual rings", + .ucode = 0x3300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xcc00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ANY", + .udesc = "any direction and any polarity on any virtual ring", + .ucode = 0xff00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r2_rxr_cycles_ne[]={ + { .uname = "NCB", + .udesc = "NCB Ingress queue", + .ucode = 0x1000, + }, + { .uname = "NCS", + .udesc = "NCS Ingress queue", + .ucode = 0x2000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r2_txr_cycles_full[]={ + { .uname = "AD", + .udesc = "AD Egress queue", + .ucode = 0x100, + }, + { .uname = "AK", + .udesc = "AK Egress queue", + .ucode = 0x200, + }, + { .uname = "BL", + .udesc = "BL Egress queue", + .ucode = 0x400, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_r2_pe[]={ + { .name = "UNC_R2_CLOCKTICKS", + .desc = "Number of uclks in domain", + .code = 0x1, + .cntmsk = 0xf, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + }, + { .name = "UNC_R2_RING_AD_USED", + .desc = "R2 AD Ring in Use", + .code = 0x7, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_ring_ad_used), + .umasks = ivbep_unc_r2_ring_ad_used + }, + { .name = "UNC_R2_RING_AK_USED", + .desc = "R2 AK Ring in Use", + .code = 0x8, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_ring_ad_used), + .umasks = ivbep_unc_r2_ring_ad_used /* shared */ + }, + { .name = "UNC_R2_RING_BL_USED", + .desc = "R2 BL Ring in Use", + .code = 0x9, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_ring_ad_used), + .umasks = ivbep_unc_r2_ring_ad_used /* shared */ + }, + { .name = "UNC_R2_RING_IV_USED", + .desc = "R2 IV Ring in Use", + .code = 0xa, + .cntmsk = 0xf, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_ring_iv_used), + .umasks = ivbep_unc_r2_ring_iv_used + }, + { .name = "UNC_R2_RXR_AK_BOUNCES", + .desc = "AK Ingress Bounced", + .code = 0x12, + .cntmsk = 0x1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_rxr_ak_bounces), + .umasks = ivbep_unc_r2_rxr_ak_bounces + }, + { .name = "UNC_R2_RXR_OCCUPANCY", + .desc = "Ingress occpancy accumulator", + .code = 0x13, + .cntmsk = 0x1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_rxr_occupancy), + .umasks = ivbep_unc_r2_rxr_occupancy + }, + { .name = "UNC_R2_RXR_CYCLES_NE", + .desc = "Ingress Cycles Not Empty", + .code = 0x10, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_rxr_cycles_ne), + .umasks = ivbep_unc_r2_rxr_cycles_ne + }, + { .name = "UNC_R2_RXR_INSERTS", + .desc = "Ingress inserts", + .code = 0x11, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_rxr_cycles_ne), + .umasks = ivbep_unc_r2_rxr_cycles_ne, /* shared */ + }, + { .name = "UNC_R2_TXR_CYCLES_FULL", + .desc = "Egress Cycles Full", + .code = 0x25, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_txr_cycles_full), + .umasks = ivbep_unc_r2_txr_cycles_full + }, + { .name = "UNC_R2_TXR_CYCLES_NE", + .desc = "Egress Cycles Not Empty", + .code = 0x23, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_txr_cycles_full), + .umasks = ivbep_unc_r2_txr_cycles_full /* shared */ + }, + { .name = "UNC_R2_TXR_NACK_CCW", + .desc = "Egress counter-clockwise BACK", + .code = 0x28, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_txr_cycles_full), + .umasks = ivbep_unc_r2_txr_cycles_full /* shared */ + }, + { .name = "UNC_R2_TXR_NACK_CW", + .desc = "Egress clockwise BACK", + .code = 0x26, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_R2PCIE_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r2_txr_cycles_full), + .umasks = ivbep_unc_r2_txr_cycles_full /* shared */ + }, +}; diff --git a/lib/events/intel_ivbep_unc_r3qpi_events.h b/lib/events/intel_ivbep_unc_r3qpi_events.h new file mode 100644 index 0000000..75cb533 --- /dev/null +++ b/lib/events/intel_ivbep_unc_r3qpi_events.h @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * This file has been automatically generated. + * + * PMU: ivbep_unc_r3qpi (Intel IvyBridge-EP R3QPI uncore) + */ + +static const intel_x86_umask_t ivbep_unc_r3_iio_credits_acquired[]={ + { .uname = "DRS", + .udesc = "DRS", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCB", + .udesc = "NCB", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "NCS", + .udesc = "NCS", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_ring_ad_used[]={ + { .uname = "CCW_VR0_EVEN", + .udesc = "Counter-Clockwise and even ring polarity on virtual ring 0", + .ucode = 0x400, + }, + { .uname = "CCW_VR0_ODD", + .udesc = "Counter-Clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x800, + }, + { .uname = "CW_VR0_EVEN", + .udesc = "Clockwise and even ring polarity on virtual ring 0", + .ucode = 0x100, + }, + { .uname = "CW_VR0_ODD", + .udesc = "Clockwise and odd ring polarity on virtual ring 0", + .ucode = 0x200, + }, + { .uname = "CW", + .udesc = "Clockwise with any polarity on either virtual rings", + .ucode = 0x3300, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xcc00, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_ring_iv_used[]={ + { .uname = "CW", + .udesc = "Clockwise with any polarity on either virtual rings", + .ucode = 0x3300, + }, + { .uname = "CCW", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xcc00, + }, + { .uname = "ANY", + .udesc = "Counter-clockwise with any polarity on either virtual rings", + .ucode = 0xff00, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_rxr_cycles_ne[]={ + { .uname = "HOM", + .udesc = "HOM Ingress queue", + .ucode = 0x100, + }, + { .uname = "SNP", + .udesc = "SNP Ingress queue", + .ucode = 0x200, + }, + { .uname = "NDR", + .udesc = "NDR Ingress queue", + .ucode = 0x400, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_rxr_inserts[]={ + { .uname = "DRS", + .udesc = "DRS Ingress queue", + .ucode = 0x800, + }, + { .uname = "HOM", + .udesc = "HOM Ingress queue", + .ucode = 0x100, + }, + { .uname = "NCB", + .udesc = "NCB Ingress queue", + .ucode = 0x1000, + }, + { .uname = "NCS", + .udesc = "NCS Ingress queue", + .ucode = 0x2000, + }, + { .uname = "NDR", + .udesc = "NDR Ingress queue", + .ucode = 0x400, + }, + { .uname = "SNP", + .udesc = "SNP Ingress queue", + .ucode = 0x200, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_vn0_credits_used[]={ + { .uname = "HOM", + .udesc = "Filter HOM message class", + .ucode = 0x100, + }, + { .uname = "SNP", + .udesc = "Filter SNP message class", + .ucode = 0x200, + }, + { .uname = "NDR", + .udesc = "Filter NDR message class", + .ucode = 0x400, + }, + { .uname = "DRS", + .udesc = "Filter DRS message class", + .ucode = 0x800, + }, + { .uname = "NCB", + .udesc = "Filter NCB message class", + .ucode = 0x1000, + }, + { .uname = "NCS", + .udesc = "Filter NCS message class", + .ucode = 0x2000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_c_hi_ad_credits_empty[]={ + { .uname = "CBO8", + .udesc = "CBox 8", + .ucode = 0x100, + }, + { .uname = "CBO9", + .udesc = "CBox 9", + .ucode = 0x200, + }, + { .uname = "CBO10", + .udesc = "CBox 10", + .ucode = 0x400, + }, + { .uname = "CBO11", + .udesc = "CBox 11", + .ucode = 0x800, + }, + { .uname = "CBO12", + .udesc = "CBox 12", + .ucode = 0x1000, + }, + { .uname = "CBO13", + .udesc = "CBox 13", + .ucode = 0x2000, + }, + { .uname = "CBO14", + .udesc = "CBox 14 & 16", + .ucode = 0x4000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_c_lo_ad_credits_empty[]={ + { .uname = "CBO0", + .udesc = "CBox 0", + .ucode = 0x100, + }, + { .uname = "CBO1", + .udesc = "CBox 1", + .ucode = 0x200, + }, + { .uname = "CBO2", + .udesc = "CBox 2", + .ucode = 0x400, + }, + { .uname = "CBO3", + .udesc = "CBox 3", + .ucode = 0x800, + }, + { .uname = "CBO4", + .udesc = "CBox 4", + .ucode = 0x1000, + }, + { .uname = "CBO5", + .udesc = "CBox 5", + .ucode = 0x2000, + }, + { .uname = "CBO6", + .udesc = "CBox 6", + .ucode = 0x4000, + }, + { .uname = "CBO7", + .udesc = "CBox 7", + .ucode = 0x8000, + } +}; + +static const intel_x86_umask_t ivbep_unc_r3_ha_r2_bl_credits_empty[]={ + { .uname = "HA0", + .udesc = "HA0", + .ucode = 0x100, + }, + { .uname = "HA1", + .udesc = "HA1", + .ucode = 0x200, + }, + { .uname = "R2_NCB", + .udesc = "R2 NCB messages", + .ucode = 0x400, + }, + { .uname = "R2_NCS", + .udesc = "R2 NCS messages", + .ucode = 0x800, + } +}; + +static const intel_x86_umask_t ivbep_unc_r3_qpi0_ad_credits_empty[]={ + { .uname = "VNA", + .udesc = "VNA", + .ucode = 0x100, + }, + { .uname = "VN0_HOM", + .udesc = "VN0 HOM messages", + .ucode = 0x200, + }, + { .uname = "VN0_SNP", + .udesc = "VN0 SNP messages", + .ucode = 0x400, + }, + { .uname = "VN0_NDR", + .udesc = "VN0 NDR messages", + .ucode = 0x800, + }, + { .uname = "VN1_HOM", + .udesc = "VN1 HOM messages", + .ucode = 0x1000, + }, + { .uname = "VN1_SNP", + .udesc = "VN1 SNP messages", + .ucode = 0x2000, + }, + { .uname = "VN1_NDR", + .udesc = "VN1 NDR messages", + .ucode = 0x4000, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_txr_nack_ccw[]={ + { .uname = "AD", + .udesc = "BL counter-clockwise Egress queue", + .ucode = 0x100, + }, + { .uname = "AK", + .udesc = "AD clockwise Egress queue", + .ucode = 0x200, + }, + { .uname = "BL", + .udesc = "AD counter-clockwise Egress queue", + .ucode = 0x400, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_txr_nack_cw[]={ + { .uname = "AD", + .udesc = "AD clockwise Egress queue", + .ucode = 0x100, + }, + { .uname = "AK", + .udesc = "AD counter-clockwise Egress queue", + .ucode = 0x200, + }, + { .uname = "BL", + .udesc = "BL clockwise Egress queue", + .ucode = 0x400, + }, +}; + +static const intel_x86_umask_t ivbep_unc_r3_vna_credits_acquired[]={ + { .uname = "AD", + .udesc = "For AD ring", + .ucode = 0x100, + }, + { .uname = "BL", + .udesc = "For BL ring", + .ucode = 0x400, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_r3_pe[]={ + { .name = "UNC_R3_CLOCKTICKS", + .desc = "Number of uclks in domain", + .code = 0x1, + .cntmsk = 0x7, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_RING_AD_USED", + .desc = "R3 AD Ring in Use", + .code = 0x7, + .cntmsk = 0x7, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_ring_ad_used), + .umasks = ivbep_unc_r3_ring_ad_used + }, + { .name = "UNC_R3_RING_AK_USED", + .desc = "R3 AK Ring in Use", + .code = 0x8, + .cntmsk = 0x7, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_ring_ad_used), + .umasks = ivbep_unc_r3_ring_ad_used /* shared */ + }, + { .name = "UNC_R3_RING_BL_USED", + .desc = "R3 BL Ring in Use", + .code = 0x9, + .cntmsk = 0x7, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_ring_ad_used), + .umasks = ivbep_unc_r3_ring_ad_used /* shared */ + }, + { .name = "UNC_R3_RING_IV_USED", + .desc = "R3 IV Ring in Use", + .code = 0xa, + .cntmsk = 0x7, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_ring_iv_used), + .umasks = ivbep_unc_r3_ring_iv_used + }, + { .name = "UNC_R3_RXR_AD_BYPASSED", + .desc = "Ingress Bypassed", + .code = 0x12, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_RXR_CYCLES_NE", + .desc = "Ingress Cycles Not Empty", + .code = 0x10, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_rxr_cycles_ne), + .umasks = ivbep_unc_r3_rxr_cycles_ne + }, + { .name = "UNC_R3_RXR_INSERTS", + .desc = "Ingress Allocations", + .code = 0x11, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_rxr_inserts), + .umasks = ivbep_unc_r3_rxr_inserts + }, + { .name = "UNC_R3_RXR_OCCUPANCY", + .desc = "Ingress Occupancy Accumulator", + .code = 0x13, + .cntmsk = 0x1, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_rxr_inserts), + .umasks = ivbep_unc_r3_rxr_inserts/* shared */ + }, + { .name = "UNC_R3_TXR_CYCLES_FULL", + .desc = "Egress cycles full", + .code = 0x25, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_VN0_CREDITS_REJECT", + .desc = "VN0 Credit Acquisition Failed", + .code = 0x37, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vn0_credits_used), + .umasks = ivbep_unc_r3_vn0_credits_used + }, + { .name = "UNC_R3_VN0_CREDITS_USED", + .desc = "VN0 Credit Used", + .code = 0x36, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vn0_credits_used), + .umasks = ivbep_unc_r3_vn0_credits_used + }, + { .name = "UNC_R3_VNA_CREDITS_ACQUIRED", + .desc = "VNA credit Acquisitions", + .code = 0x33, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vna_credits_acquired), + .umasks = ivbep_unc_r3_vna_credits_acquired + }, + { .name = "UNC_R3_VNA_CREDITS_REJECT", + .desc = "VNA Credit Reject", + .code = 0x34, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vn0_credits_used), + .umasks = ivbep_unc_r3_vn0_credits_used /* shared */ + }, + { .name = "UNC_R3_VNA_CREDIT_CYCLES_OUT", + .desc = "Cycles with no VNA credits available", + .code = 0x31, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_VNA_CREDIT_CYCLES_USED", + .desc = "Cycles with 1 or more VNA credits in use", + .code = 0x32, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_C_HI_AD_CREDITS_EMPTY", + .desc = "Cbox AD credits empty", + .code = 0x2c, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_c_hi_ad_credits_empty), + .umasks = ivbep_unc_r3_c_hi_ad_credits_empty + }, + { .name = "UNC_R3_C_LO_AD_CREDITS_EMPTY", + .desc = "Cbox AD credits empty", + .code = 0x2b, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_c_lo_ad_credits_empty), + .umasks = ivbep_unc_r3_c_lo_ad_credits_empty + }, + { .name = "UNC_R3_HA_R2_BL_CREDITS_EMPTY", + .desc = "HA/R2 AD credits empty", + .code = 0x2f, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_ha_r2_bl_credits_empty), + .umasks = ivbep_unc_r3_ha_r2_bl_credits_empty + }, + { .name = "UNC_R3_QPI0_AD_CREDITS_EMPTY", + .desc = "QPI0 AD credits empty", + .code = 0x29, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_qpi0_ad_credits_empty), + .umasks = ivbep_unc_r3_qpi0_ad_credits_empty + }, + { .name = "UNC_R3_QPI0_BL_CREDITS_EMPTY", + .desc = "QPI0 BL credits empty", + .code = 0x2d, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_qpi0_ad_credits_empty), /* shared */ + .umasks = ivbep_unc_r3_qpi0_ad_credits_empty + }, + { .name = "UNC_R3_QPI1_AD_CREDITS_EMPTY", + .desc = "QPI1 AD credits empty", + .code = 0x2a, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_qpi0_ad_credits_empty), /* shared */ + .umasks = ivbep_unc_r3_qpi0_ad_credits_empty + }, + { .name = "UNC_R3_QPI1_BL_CREDITS_EMPTY", + .desc = "QPI1 BL credits empty", + .code = 0x2e, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_qpi0_ad_credits_empty), /* shared */ + .umasks = ivbep_unc_r3_qpi0_ad_credits_empty + }, + { .name = "UNC_R3_TXR_CYCLES_NE", + .desc = "Egress cycles not empty", + .code = 0x23, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + }, + { .name = "UNC_R3_TXR_NACK_CCW", + .desc = "Egress NACK counter-clockwise", + .code = 0x28, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_txr_nack_ccw), + .umasks = ivbep_unc_r3_txr_nack_ccw + }, + { .name = "UNC_R3_TXR_NACK_CW", + .desc = "Egress NACK counter-clockwise", + .code = 0x26, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_txr_nack_cw), + .umasks = ivbep_unc_r3_txr_nack_cw + }, + { .name = "UNC_R3_VN1_CREDITS_REJECT", + .desc = "VN1 Credit Acquisition Failed", + .code = 0x39, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vn0_credits_used), /* shared */ + .umasks = ivbep_unc_r3_vn0_credits_used + }, + { .name = "UNC_R3_VN1_CREDITS_USED", + .desc = "VN0 Credit Used", + .code = 0x38, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_R3QPI_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_r3_vn0_credits_used), /* shared */ + .umasks = ivbep_unc_r3_vn0_credits_used + }, +}; diff --git a/lib/events/intel_ivbep_unc_ubo_events.h b/lib/events/intel_ivbep_unc_ubo_events.h new file mode 100644 index 0000000..ce08bde --- /dev/null +++ b/lib/events/intel_ivbep_unc_ubo_events.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + * + * PMU: ivbep_unc_ubo (Intel IvyBridge-EP U-Box uncore PMU) + */ + +static const intel_x86_umask_t ivbep_unc_u_event_msg[]={ + { .uname = "DOORBELL_RCVD", + .udesc = "TBD", + .ucode = 0x800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "INT_PRIO", + .udesc = "TBD", + .ucode = 0x1000, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "IPI_RCVD", + .udesc = "TBD", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MSI_RCVD", + .udesc = "TBD", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "VLW_RCVD", + .udesc = "TBD", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, +}; + + +static const intel_x86_umask_t ivbep_unc_u_phold_cycles[]={ + { .uname = "ASSERT_TO_ACK", + .udesc = "Number of cycles asserted to ACK", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ACK_TO_DEASSERT", + .udesc = "Number of cycles ACK to deassert", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, +}; + +static const intel_x86_entry_t intel_ivbep_unc_u_pe[]={ + { .name = "UNC_U_EVENT_MSG", + .desc = "VLW Received", + .code = 0x42, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_UBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_u_event_msg), + .umasks = ivbep_unc_u_event_msg + }, + { .name = "UNC_U_LOCK_CYCLES", + .desc = "IDI Lock/SplitLock Cycles", + .code = 0x44, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_UBO_ATTRS, + }, + { .name = "UNC_U_PHOLD_CYCLES", + .desc = "Cycles PHOLD asserts to Ack", + .code = 0x45, + .cntmsk = 0x3, + .ngrp = 1, + .modmsk = IVBEP_UNC_UBO_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_u_phold_cycles), + .umasks = ivbep_unc_u_phold_cycles + }, + { .name = "UNC_U_RACU_REQUESTS", + .desc = "RACU requests", + .code = 0x46, + .cntmsk = 0x3, + .modmsk = IVBEP_UNC_UBO_ATTRS, + }, +}; diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index bd2f49f..cd68301 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -121,6 +121,41 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &intel_snbep_unc_r3qpi1_support, &intel_knc_support, &intel_slm_support, + &intel_ivbep_unc_cb0_support, + &intel_ivbep_unc_cb1_support, + &intel_ivbep_unc_cb2_support, + &intel_ivbep_unc_cb3_support, + &intel_ivbep_unc_cb4_support, + &intel_ivbep_unc_cb5_support, + &intel_ivbep_unc_cb6_support, + &intel_ivbep_unc_cb7_support, + &intel_ivbep_unc_cb8_support, + &intel_ivbep_unc_cb9_support, + &intel_ivbep_unc_cb10_support, + &intel_ivbep_unc_cb11_support, + &intel_ivbep_unc_cb12_support, + &intel_ivbep_unc_cb13_support, + &intel_ivbep_unc_cb14_support, + &intel_ivbep_unc_ha0_support, + &intel_ivbep_unc_ha1_support, + &intel_ivbep_unc_imc0_support, + &intel_ivbep_unc_imc1_support, + &intel_ivbep_unc_imc2_support, + &intel_ivbep_unc_imc3_support, + &intel_ivbep_unc_imc4_support, + &intel_ivbep_unc_imc5_support, + &intel_ivbep_unc_imc6_support, + &intel_ivbep_unc_imc7_support, + &intel_ivbep_unc_pcu_support, + &intel_ivbep_unc_qpi0_support, + &intel_ivbep_unc_qpi1_support, + &intel_ivbep_unc_qpi2_support, + &intel_ivbep_unc_ubo_support, + &intel_ivbep_unc_r2pcie_support, + &intel_ivbep_unc_r3qpi0_support, + &intel_ivbep_unc_r3qpi1_support, + &intel_ivbep_unc_r3qpi2_support, + &intel_ivbep_unc_irp_support, &intel_x86_arch_support, /* must always be last for x86 */ #endif diff --git a/lib/pfmlib_intel_ivbep_unc_cbo.c b/lib/pfmlib_intel_ivbep_unc_cbo.c new file mode 100644 index 0000000..6053aa5 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_cbo.c @@ -0,0 +1,125 @@ +/* + * pfmlib_intel_ivbep_unc_cbo.c : Intel IvyBridge-EP C-Box uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_cbo_events.h" + +static void +display_cbo(void *this, pfmlib_event_desc_t *e, void *val) +{ + const intel_x86_entry_t *pe = this_pe(this); + pfm_snbep_unc_reg_t *reg = val; + pfm_snbep_unc_reg_t f; + + __pfm_vbprintf("[UNC_CBO=0x%"PRIx64" event=0x%x umask=0x%x en=%d " + "inv=%d edge=%d thres=%d tid_en=%d] %s\n", + reg->val, + reg->cbo.unc_event, + reg->cbo.unc_umask, + reg->cbo.unc_en, + reg->cbo.unc_inv, + reg->cbo.unc_edge, + reg->cbo.unc_thres, + reg->cbo.unc_tid, + pe[e->event].name); + + if (e->count == 1) + return; + + f.val = e->codes[1]; + + __pfm_vbprintf("[UNC_CBOX_FILTER0=0x%"PRIx64" tid=%d core=0x%x" + " state=0x%x]\n", + f.val, + f.ivbep_cbo_filt0.tid, + f.ivbep_cbo_filt0.cid, + f.ivbep_cbo_filt0.state); + + if (e->count == 2) + return; + + f.val = e->codes[2]; + + __pfm_vbprintf("[UNC_CBOX_FILTER1=0x%"PRIx64" nid=%d opc=0x%x" + " nc=0x%x isoc=0x%x]\n", + f.val, + f.ivbep_cbo_filt1.nid, + f.ivbep_cbo_filt1.opc, + f.ivbep_cbo_filt1.nc, + f.ivbep_cbo_filt1.isoc); +} + +#define DEFINE_C_BOX(n) \ +pfmlib_pmu_t intel_ivbep_unc_cb##n##_support = {\ + .desc = "Intel Ivy Bridge-EP C-Box "#n" uncore",\ + .name = "ivbep_unc_cbo"#n,\ + .perf_name = "uncore_cbox_"#n,\ + .pmu = PFM_PMU_INTEL_IVBEP_UNC_CB##n,\ + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_c_pe),\ + .type = PFM_PMU_TYPE_UNCORE,\ + .num_cntrs = 4,\ + .num_fixed_cntrs = 0,\ + .max_encoding = 2,\ + .pe = intel_ivbep_unc_c_pe,\ + .atdesc = snbep_unc_mods,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK|INTEL_PMU_FL_UNC_CBO,\ + .pmu_detect = pfm_intel_ivbep_unc_detect,\ + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first,\ + .get_event_next = pfm_intel_x86_get_event_next,\ + .event_is_valid = pfm_intel_x86_event_is_valid,\ + .validate_table = pfm_intel_x86_validate_table,\ + .get_event_info = pfm_intel_x86_get_event_info,\ + .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ + .can_auto_encode = pfm_intel_x86_can_auto_encode, \ + .display_reg = display_cbo,\ +} + +DEFINE_C_BOX(0); +DEFINE_C_BOX(1); +DEFINE_C_BOX(2); +DEFINE_C_BOX(3); +DEFINE_C_BOX(4); +DEFINE_C_BOX(5); +DEFINE_C_BOX(6); +DEFINE_C_BOX(7); +DEFINE_C_BOX(8); +DEFINE_C_BOX(9); +DEFINE_C_BOX(10); +DEFINE_C_BOX(11); +DEFINE_C_BOX(12); +DEFINE_C_BOX(13); +DEFINE_C_BOX(14); diff --git a/lib/pfmlib_intel_ivbep_unc_ha.c b/lib/pfmlib_intel_ivbep_unc_ha.c new file mode 100644 index 0000000..d4bd6f9 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_ha.c @@ -0,0 +1,97 @@ +/* + * pfmlib_intel_ivbep_unc_ha.c : Intel IvyBridge-EP Home Agent (HA) uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_ha_events.h" + +static void +display_ha(void *this, pfmlib_event_desc_t *e, void *val) +{ + const intel_x86_entry_t *pe = this_pe(this); + pfm_snbep_unc_reg_t *reg = val; + pfm_snbep_unc_reg_t f; + + __pfm_vbprintf("[UNC_HA=0x%"PRIx64" event=0x%x umask=0x%x en=%d " + "inv=%d edge=%d thres=%d] %s\n", + reg->val, + reg->com.unc_event, + reg->com.unc_umask, + reg->com.unc_en, + reg->com.unc_inv, + reg->com.unc_edge, + reg->com.unc_thres, + pe[e->event].name); + + if (e->count == 1) + return; + + f.val = e->codes[1]; + __pfm_vbprintf("[UNC_HA_ADDR=0x%"PRIx64" lo_addr=0x%x hi_addr=0x%x]\n", + f.val, + f.ha_addr.lo_addr, + f.ha_addr.hi_addr); + + f.val = e->codes[2]; + __pfm_vbprintf("[UNC_HA_OPC=0x%"PRIx64" opc=0x%x]\n", f.val, f.ha_opc.opc); +} + +#define DEFINE_HA_BOX(n) \ +pfmlib_pmu_t intel_ivbep_unc_ha##n##_support = {\ + .desc = "Intel Ivy Bridge-EP HA "#n" uncore",\ + .name = "ivbep_unc_ha"#n,\ + .perf_name = "uncore_ha_"#n,\ + .pmu = PFM_PMU_INTEL_IVBEP_UNC_HA##n,\ + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_h_pe),\ + .type = PFM_PMU_TYPE_UNCORE,\ + .num_cntrs = 4,\ + .num_fixed_cntrs = 0,\ + .max_encoding = 3, /* address matchers */\ + .pe = intel_ivbep_unc_h_pe,\ + .atdesc = snbep_unc_mods,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .pmu_detect = pfm_intel_ivbep_unc_detect,\ + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first,\ + .get_event_next = pfm_intel_x86_get_event_next,\ + .event_is_valid = pfm_intel_x86_event_is_valid,\ + .validate_table = pfm_intel_x86_validate_table,\ + .get_event_info = pfm_intel_x86_get_event_info,\ + .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ + .display_reg = display_ha,\ +} + +DEFINE_HA_BOX(0); +DEFINE_HA_BOX(1); diff --git a/lib/pfmlib_intel_ivbep_unc_imc.c b/lib/pfmlib_intel_ivbep_unc_imc.c new file mode 100644 index 0000000..aa7d3a8 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_imc.c @@ -0,0 +1,71 @@ +/* + * pfmlib_intel_ivbep_unc_imc.c : Intel IvyBridge-EP Integrated Memory Controller (IMC) uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_imc_events.h" + +#define DEFINE_IMC_BOX(n) \ +pfmlib_pmu_t intel_ivbep_unc_imc##n##_support = { \ + .desc = "Intel Iyy Bridge-EP IMC"#n" uncore", \ + .name = "ivbep_unc_imc"#n, \ + .perf_name = "uncore_imc_"#n, \ + .pmu = PFM_PMU_INTEL_IVBEP_UNC_IMC##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_m_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .num_cntrs = 4, \ + .num_fixed_cntrs = 1, \ + .max_encoding = 1, \ + .pe = intel_ivbep_unc_m_pe, \ + .atdesc = snbep_unc_mods, \ + .flags = PFMLIB_PMU_FL_RAW_UMASK, \ + .pmu_detect = pfm_intel_ivbep_unc_detect, \ + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first, \ + .get_event_next = pfm_intel_x86_get_event_next, \ + .event_is_valid = pfm_intel_x86_event_is_valid, \ + .validate_table = pfm_intel_x86_validate_table, \ + .get_event_info = pfm_intel_x86_get_event_info, \ + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ +}; + +DEFINE_IMC_BOX(0); +DEFINE_IMC_BOX(1); +DEFINE_IMC_BOX(2); +DEFINE_IMC_BOX(3); +DEFINE_IMC_BOX(4); +DEFINE_IMC_BOX(5); +DEFINE_IMC_BOX(6); +DEFINE_IMC_BOX(7); diff --git a/lib/pfmlib_intel_ivbep_unc_irp.c b/lib/pfmlib_intel_ivbep_unc_irp.c new file mode 100644 index 0000000..84fc22f --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_irp.c @@ -0,0 +1,79 @@ +/* + * pfmlib_intel_ivbep_irp.c : Intel IvyBridge-EP IRP uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_irp_events.h" + +static void +display_irp(void *this, pfmlib_event_desc_t *e, void *val) +{ + const intel_x86_entry_t *pe = this_pe(this); + pfm_snbep_unc_reg_t *reg = val; + + __pfm_vbprintf("[UNC_IRP=0x%"PRIx64" event=0x%x umask=0x%x en=%d " + "edge=%d thres=%d] %s\n", + reg->val, + reg->irp.unc_event, + reg->irp.unc_umask, + reg->irp.unc_en, + reg->irp.unc_edge, + reg->irp.unc_thres, + pe[e->event].name); +} + +pfmlib_pmu_t intel_ivbep_unc_irp_support = { + .desc = "Intel Ivy Bridge-EP IRP uncore", + .name = "ivbep_unc_irp", + .perf_name = "uncore_irp", + .pmu = PFM_PMU_INTEL_IVBEP_UNC_IRP, + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_i_pe), + .type = PFM_PMU_TYPE_UNCORE, + .num_cntrs = 4, + .num_fixed_cntrs = 0, + .max_encoding = 3, + .pe = intel_ivbep_unc_i_pe, + .atdesc = snbep_unc_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_intel_ivbep_unc_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, + .display_reg = display_irp, +}; diff --git a/lib/pfmlib_intel_ivbep_unc_pcu.c b/lib/pfmlib_intel_ivbep_unc_pcu.c new file mode 100644 index 0000000..782f881 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_pcu.c @@ -0,0 +1,97 @@ +/* + * pfmlib_intel_ivbep_unc_pcu.c : Intel IvyBridge-EP Power Control Unit (PCU) uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_pcu_events.h" + +static void +display_pcu(void *this, pfmlib_event_desc_t *e, void *val) +{ + const intel_x86_entry_t *pe = this_pe(this); + pfm_snbep_unc_reg_t *reg = val; + pfm_snbep_unc_reg_t f; + + __pfm_vbprintf("[UNC_PCU=0x%"PRIx64" event=0x%x sel_ext=%d occ_sel=0x%x en=%d " + "edge=%d thres=%d occ_inv=%d occ_edge=%d] %s\n", + reg->val, + reg->ivbep_pcu.unc_event, + reg->ivbep_pcu.unc_sel_ext, + reg->ivbep_pcu.unc_occ, + reg->ivbep_pcu.unc_en, + reg->ivbep_pcu.unc_edge, + reg->ivbep_pcu.unc_thres, + reg->ivbep_pcu.unc_occ_inv, + reg->ivbep_pcu.unc_occ_edge, + pe[e->event].name); + + if (e->count == 1) + return; + + f.val = e->codes[1]; + + __pfm_vbprintf("[UNC_PCU_FILTER=0x%"PRIx64" band0=%u band1=%u band2=%u band3=%u]\n", + f.val, + f.pcu_filt.filt0, + f.pcu_filt.filt1, + f.pcu_filt.filt2, + f.pcu_filt.filt3); +} + + +pfmlib_pmu_t intel_ivbep_unc_pcu_support = { + .desc = "Intel Ivy Bridge-EP PCU uncore", + .name = "ivbep_unc_pcu", + .perf_name = "uncore_pcu", + .pmu = PFM_PMU_INTEL_IVBEP_UNC_PCU, + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_p_pe), + .type = PFM_PMU_TYPE_UNCORE, + .num_cntrs = 4, + .num_fixed_cntrs = 0, + .max_encoding = 2, + .pe = intel_ivbep_unc_p_pe, + .atdesc = snbep_unc_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_intel_ivbep_unc_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, + .can_auto_encode = pfm_intel_snbep_unc_can_auto_encode, + .display_reg = display_pcu, +}; diff --git a/lib/pfmlib_intel_ivbep_unc_qpi.c b/lib/pfmlib_intel_ivbep_unc_qpi.c new file mode 100644 index 0000000..e68eb75 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_qpi.c @@ -0,0 +1,85 @@ +/* + * pfmlib_intel_ivbep_qpi.c : Intel IvyBridge-EP QPI uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_qpi_events.h" + +static void +display_qpi(void *this, pfmlib_event_desc_t *e, void *val) +{ + const intel_x86_entry_t *pe = this_pe(this); + pfm_snbep_unc_reg_t *reg = val; + + __pfm_vbprintf("[UNC_QPI=0x%"PRIx64" event=0x%x sel_ext=%d umask=0x%x en=%d " + "inv=%d edge=%d thres=%d] %s\n", + reg->val, + reg->qpi.unc_event, + reg->qpi.unc_event_ext, + reg->qpi.unc_umask, + reg->qpi.unc_en, + reg->qpi.unc_inv, + reg->qpi.unc_edge, + reg->qpi.unc_thres, + pe[e->event].name); +} + +#define DEFINE_QPI_BOX(n) \ +pfmlib_pmu_t intel_ivbep_unc_qpi##n##_support = {\ + .desc = "Intel Ivy Bridge-EP QPI"#n" uncore",\ + .name = "ivbep_unc_qpi"#n,\ + .perf_name = "uncore_qpi_"#n,\ + .pmu = PFM_PMU_INTEL_IVBEP_UNC_QPI##n,\ + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_q_pe),\ + .type = PFM_PMU_TYPE_UNCORE,\ + .num_cntrs = 4,\ + .num_fixed_cntrs = 0,\ + .max_encoding = 3,\ + .pe = intel_ivbep_unc_q_pe,\ + .atdesc = snbep_unc_mods,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .pmu_detect = pfm_intel_ivbep_unc_detect,\ + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first,\ + .get_event_next = pfm_intel_x86_get_event_next,\ + .event_is_valid = pfm_intel_x86_event_is_valid,\ + .validate_table = pfm_intel_x86_validate_table,\ + .get_event_info = pfm_intel_x86_get_event_info,\ + .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ + .display_reg = display_qpi,\ +} +DEFINE_QPI_BOX(0); +DEFINE_QPI_BOX(1); +DEFINE_QPI_BOX(2); diff --git a/lib/pfmlib_intel_ivbep_unc_r2pcie.c b/lib/pfmlib_intel_ivbep_unc_r2pcie.c new file mode 100644 index 0000000..87c680c --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_r2pcie.c @@ -0,0 +1,61 @@ +/* + * pfmlib_intel_ivbep_r2pcie.c : Intel IvyBridge-EP R2PCIe uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_r2pcie_events.h" + +pfmlib_pmu_t intel_ivbep_unc_r2pcie_support = { + .desc = "Intel Ivy Bridge-EP R2PCIe uncore", + .name = "ivbep_unc_r2pcie", + .perf_name = "uncore_r2pcie", + .pmu = PFM_PMU_INTEL_IVBEP_UNC_R2PCIE, + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_r2_pe), + .type = PFM_PMU_TYPE_UNCORE, + .num_cntrs = 4, + .num_fixed_cntrs = 0, + .max_encoding = 1, + .pe = intel_ivbep_unc_r2_pe, + .atdesc = snbep_unc_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_intel_ivbep_unc_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, +}; diff --git a/lib/pfmlib_intel_ivbep_unc_r3qpi.c b/lib/pfmlib_intel_ivbep_unc_r3qpi.c new file mode 100644 index 0000000..5ed7e4d --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_r3qpi.c @@ -0,0 +1,65 @@ +/* + * pfmlib_intel_ivbep_r3qpi.c : Intel IvyBridge-EP R3QPI uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_r3qpi_events.h" + +#define DEFINE_R3QPI_BOX(n) \ +pfmlib_pmu_t intel_ivbep_unc_r3qpi##n##_support = {\ + .desc = "Intel Ivy Bridge-EP R3QPI"#n" uncore", \ + .name = "ivbep_unc_r3qpi"#n,\ + .perf_name = "uncore_r3qpi_"#n, \ + .pmu = PFM_PMU_INTEL_IVBEP_UNC_R3QPI##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_r3_pe),\ + .type = PFM_PMU_TYPE_UNCORE,\ + .num_cntrs = 3,\ + .num_fixed_cntrs = 0,\ + .max_encoding = 1,\ + .pe = intel_ivbep_unc_r3_pe,\ + .atdesc = snbep_unc_mods,\ + .flags = PFMLIB_PMU_FL_RAW_UMASK,\ + .pmu_detect = pfm_intel_ivbep_unc_detect,\ + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ + .get_event_first = pfm_intel_x86_get_event_first,\ + .get_event_next = pfm_intel_x86_get_event_next,\ + .event_is_valid = pfm_intel_x86_event_is_valid,\ + .validate_table = pfm_intel_x86_validate_table,\ + .get_event_info = pfm_intel_x86_get_event_info,\ + .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ +} +DEFINE_R3QPI_BOX(0); +DEFINE_R3QPI_BOX(1); +DEFINE_R3QPI_BOX(2); diff --git a/lib/pfmlib_intel_ivbep_unc_ubo.c b/lib/pfmlib_intel_ivbep_unc_ubo.c new file mode 100644 index 0000000..db7f629 --- /dev/null +++ b/lib/pfmlib_intel_ivbep_unc_ubo.c @@ -0,0 +1,61 @@ +/* + * pfmlib_intel_ivbep_unc_ubo.c : Intel IvyBridge-EP U-Box uncore PMU + * + * Copyright (c) 2014 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" +#include "pfmlib_intel_x86_priv.h" +#include "pfmlib_intel_snbep_unc_priv.h" +#include "events/intel_ivbep_unc_ubo_events.h" + +pfmlib_pmu_t intel_ivbep_unc_ubo_support = { + .desc = "Intel Ivy Bridge-EP U-Box uncore", + .name = "ivbep_unc_ubo", + .perf_name = "uncore_ubox", + .pmu = PFM_PMU_INTEL_IVBEP_UNC_UBOX, + .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_u_pe), + .type = PFM_PMU_TYPE_UNCORE, + .num_cntrs = 2, + .num_fixed_cntrs = 1, + .max_encoding = 1, + .pe = intel_ivbep_unc_u_pe, + .atdesc = snbep_unc_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK, + .pmu_detect = pfm_intel_ivbep_unc_detect, + .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), + PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, +}; diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c index 7ca86ca..8fb6472 100644 --- a/lib/pfmlib_intel_snbep_unc.c +++ b/lib/pfmlib_intel_snbep_unc.c @@ -39,9 +39,10 @@ const pfmlib_attr_desc_t snbep_unc_mods[]={ PFM_ATTR_I("t", "threshold in range [0-31]"), /* threshold */ PFM_ATTR_I("tf", "thread id filter [0-1]"), /* thread id */ PFM_ATTR_I("cf", "core id filter [0-7]"), /* core id */ - PFM_ATTR_I("nf", "node id bitmask filter [0-255]"),/* nodeid mask */ + PFM_ATTR_I("nf", "node id bitmask filter [0-255]"),/* nodeid mask filter0 */ PFM_ATTR_I("ff", "frequency >= 100Mhz * [0-255]"),/* freq filter */ PFM_ATTR_I("addr", "physical address matcher [40 bits]"),/* address matcher */ + PFM_ATTR_I("nf", "node id bitmask filter [0-255]"),/* nodeid mask filter1 */ PFM_ATTR_NULL }; @@ -65,6 +66,26 @@ pfm_intel_snbep_unc_detect(void *this) return PFM_SUCCESS; } +int +pfm_intel_ivbep_unc_detect(void *this) +{ + int ret; + + ret = pfm_intel_x86_detect(); + if (ret != PFM_SUCCESS) + + if (pfm_intel_x86_cfg.family != 6) + return PFM_ERR_NOTSUPP; + + switch(pfm_intel_x86_cfg.model) { + case 62: /* SandyBridge-EP */ + break; + default: + return PFM_ERR_NOTSUPP; + } + return PFM_SUCCESS; +} + static void display_com(void *this, pfmlib_event_desc_t *e, void *val) { @@ -113,7 +134,7 @@ int snbep_unc_add_defaults(void *this, pfmlib_event_desc_t *e, unsigned int msk, uint64_t *umask, - pfm_snbep_unc_reg_t *filter, + pfm_snbep_unc_reg_t *filters, unsigned int max_grpid) { const intel_x86_entry_t *pe = this_pe(this); @@ -167,7 +188,9 @@ snbep_unc_add_defaults(void *this, pfmlib_event_desc_t *e, * ucode must reflect actual code */ *umask |= ent->umasks[idx].ucode >> 8; - filter->val |= pe[e->event].umasks[idx].ufilters[0]; + + filters[0].val |= pe[e->event].umasks[idx].ufilters[0]; + filters[1].val |= pe[e->event].umasks[idx].ufilters[1]; e->attrs[k].id = j; /* pattrs index */ e->attrs[k].ival = 0; @@ -211,7 +234,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) int umodmsk = 0, modmsk_r = 0; int pcu_filt_band = -1; pfm_snbep_unc_reg_t reg; - pfm_snbep_unc_reg_t filter; + pfm_snbep_unc_reg_t filters[INTEL_X86_MAX_FILTERS]; pfm_snbep_unc_reg_t addr; pfm_event_attr_info_t *a; uint64_t val, umask1, umask2; @@ -224,8 +247,8 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) memset(grpcounts, 0, sizeof(grpcounts)); memset(ncombo, 0, sizeof(ncombo)); + memset(filters, 0, sizeof(filters)); - filter.val = 0; addr.val = 0; pe = this_pe(this); @@ -310,10 +333,12 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) last_grpid = grpid; um = pe[e->event].umasks[a->idx].ucode; - filter.val |= pe[e->event].umasks[a->idx].ufilters[0]; + filters[0].val |= pe[e->event].umasks[a->idx].ufilters[0]; + filters[1].val |= pe[e->event].umasks[a->idx].ufilters[1]; um >>= 8; umask2 |= um; + ugrpmsk |= 1 << pe[e->event].umasks[a->idx].grpid; /* PCU occ event */ @@ -377,30 +402,38 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) } reg.cbo.unc_tid = 1; has_cbo_tid = 1; - filter.cbo_filt.tid = ival; + filters[0].cbo_filt.tid = ival; umodmsk |= _SNBEP_UNC_ATTR_TF; break; case SNBEP_UNC_ATTR_CF: /* core id */ if (ival > 7) return PFM_ERR_ATTR_VAL; reg.cbo.unc_tid = 1; - filter.cbo_filt.cid = ival; + filters[0].cbo_filt.cid = ival; has_cbo_tid = 1; umodmsk |= _SNBEP_UNC_ATTR_CF; break; - case SNBEP_UNC_ATTR_NF: /* node id */ + case SNBEP_UNC_ATTR_NF: /* node id filter0 */ if (ival > 255 || ival == 0) { DPRINT("invalid nf, 0 < nf < 256\n"); return PFM_ERR_ATTR_VAL; } - filter.cbo_filt.nid = ival; + filters[0].cbo_filt.nid = ival; umodmsk |= _SNBEP_UNC_ATTR_NF; break; + case SNBEP_UNC_ATTR_NF1: /* node id filter1 */ + if (ival > 255 || ival == 0) { + DPRINT("invalid nf, 0 < nf < 256\n"); + return PFM_ERR_ATTR_VAL; + } + filters[1].ivbep_cbo_filt1.nid = ival; + umodmsk |= _SNBEP_UNC_ATTR_NF1; + break; case SNBEP_UNC_ATTR_FF: /* freq band filter */ if (ival > 255) return PFM_ERR_ATTR_VAL; pcu_filt_band = get_pcu_filt_band(this, reg); - filter.val = ival << (pcu_filt_band * 8); + filters[0].val = ival << (pcu_filt_band * 8); umodmsk |= _SNBEP_UNC_ATTR_FF; break; case SNBEP_UNC_ATTR_A: /* addr filter */ @@ -421,7 +454,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) if (pe[e->event].numasks && (ugrpmsk != grpmsk || ugrpmsk == 0)) { uint64_t um = 0; ugrpmsk ^= grpmsk; - ret = snbep_unc_add_defaults(this, e, ugrpmsk, &um, &filter, max_grpid); + ret = snbep_unc_add_defaults(this, e, ugrpmsk, &um, filters, max_grpid); if (ret != PFM_SUCCESS) return ret; umask2 |= um; @@ -434,6 +467,10 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) DPRINT("using nf= on an umask which does not require it\n"); return PFM_ERR_ATTR; } + if (!(modmsk_r & _SNBEP_UNC_ATTR_NF1) && (umodmsk & _SNBEP_UNC_ATTR_NF1)) { + DPRINT("using nf= on an umask which does not require it\n"); + return PFM_ERR_ATTR; + } if (modmsk_r && !(umodmsk & modmsk_r)) { DPRINT("required modifiers missing: 0x%x\n", modmsk_r); @@ -452,6 +489,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) else if (a->type == PFM_ATTR_RAW_UMASK) evt_strcat(e->fstr, ":0x%x", a->idx); } + DPRINT("umask2=0x%"PRIx64" umask1=0x%"PRIx64"\n", umask2, umask1); e->count = 0; reg.val |= (umask1 | umask2) << 8; @@ -460,8 +498,10 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) /* * handles C-box filter */ - if (filter.val || has_cbo_tid) - e->codes[e->count++] = filter.val; + if (filters[0].val || filters[1].val || has_cbo_tid) + e->codes[e->count++] = filters[0].val; + if (filters[1].val) + e->codes[e->count++] = filters[1].val; /* HA address matcher */ if (addr.val) @@ -500,10 +540,15 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.cbo.unc_tid); break; case SNBEP_UNC_ATTR_FF: - evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, (filter.val >> (pcu_filt_band*8)) & 0xff); + evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, (filters[0].val >> (pcu_filt_band*8)) & 0xff); break; case SNBEP_UNC_ATTR_NF: - evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filter.cbo_filt.nid); + if (modmsk_r & _SNBEP_UNC_ATTR_NF) + evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[0].cbo_filt.nid); + break; + case SNBEP_UNC_ATTR_NF1: + if (modmsk_r & _SNBEP_UNC_ATTR_NF1) + evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[1].ivbep_cbo_filt1.nid); break; case SNBEP_UNC_ATTR_A: evt_strcat(e->fstr, ":%s=0x%lx", snbep_unc_mods[idx].name, diff --git a/lib/pfmlib_intel_snbep_unc_perf_event.c b/lib/pfmlib_intel_snbep_unc_perf_event.c index e6cc625..2b06cb2 100644 --- a/lib/pfmlib_intel_snbep_unc_perf_event.c +++ b/lib/pfmlib_intel_snbep_unc_perf_event.c @@ -83,14 +83,21 @@ pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) attr->config = reg.val; - /* - * various filters - */ - if (e->count == 2) - attr->config1 = e->codes[1]; - - if (e->count == 3) - attr->config2 = e->codes[2]; + if (is_cbo_filt_event(this, reg) && e->count > 1) { + if (e->count >= 2) + attr->config1 = e->codes[1]; + if (e->count >= 3) + attr->config1 |= e->codes[2] << 32; + } else { + /* + * various filters + */ + if (e->count >= 2) + attr->config1 = e->codes[1]; + + if (e->count >= 3) + attr->config2 = e->codes[2]; + } /* * uncore measures at all priv levels diff --git a/lib/pfmlib_intel_snbep_unc_priv.h b/lib/pfmlib_intel_snbep_unc_priv.h index 3631930..d93b5f8 100644 --- a/lib/pfmlib_intel_snbep_unc_priv.h +++ b/lib/pfmlib_intel_snbep_unc_priv.h @@ -1,5 +1,5 @@ /* - * pfmlib_intel_snbep_unc_priv.c : Intel SandyBridge-EP common definitions + * pfmlib_intel_snbep_unc_priv.c : Intel SandyBridge/IvyBridge-EP common definitions * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian @@ -28,6 +28,7 @@ * Intel x86 specific pmu flags (pmu->flags 16 MSB) */ #define INTEL_PMU_FL_UNC_OCC 0x10000 /* PMU has occupancy counter filters */ +#define INTEL_PMU_FL_UNC_CBO 0x20000 /* PMU is Cbox */ #define SNBEP_UNC_ATTR_E 0 @@ -36,9 +37,10 @@ #define SNBEP_UNC_ATTR_T5 3 #define SNBEP_UNC_ATTR_TF 4 #define SNBEP_UNC_ATTR_CF 5 -#define SNBEP_UNC_ATTR_NF 6 +#define SNBEP_UNC_ATTR_NF 6 /* for filter0 */ #define SNBEP_UNC_ATTR_FF 7 #define SNBEP_UNC_ATTR_A 8 +#define SNBEP_UNC_ATTR_NF1 9 /* for filter1 */ #define _SNBEP_UNC_ATTR_I (1 << SNBEP_UNC_ATTR_I) #define _SNBEP_UNC_ATTR_E (1 << SNBEP_UNC_ATTR_E) @@ -49,41 +51,79 @@ #define _SNBEP_UNC_ATTR_NF (1 << SNBEP_UNC_ATTR_NF) #define _SNBEP_UNC_ATTR_FF (1 << SNBEP_UNC_ATTR_FF) #define _SNBEP_UNC_ATTR_A (1 << SNBEP_UNC_ATTR_A) +#define _SNBEP_UNC_ATTR_NF1 (1 << SNBEP_UNC_ATTR_NF1) + +#define SNBEP_UNC_IRP_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define SNBEP_UNC_R3QPI_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) +#define IVBEP_UNC_R3QPI_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + #define SNBEP_UNC_R2PCIE_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) +#define IVBEP_UNC_R2PCIE_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + #define SNBEP_UNC_QPI_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) +#define IVBEP_UNC_QPI_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + #define SNBEP_UNC_UBO_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) +#define IVBEP_UNC_UBO_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + + #define SNBEP_UNC_PCU_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T5) +#define IVBEP_UNC_PCU_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T5) + #define SNBEP_UNC_PCU_BAND_ATTRS \ (SNBEP_UNC_PCU_ATTRS | _SNBEP_UNC_ATTR_FF) +#define IVBEP_UNC_PCU_BAND_ATTRS \ + (IVBEP_UNC_PCU_ATTRS | _SNBEP_UNC_ATTR_FF) + #define SNBEP_UNC_IMC_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) -#define SNBEP_UNC_CBO_ATTRS \ +#define IVBEP_UNC_IMC_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + +#define SNBEP_UNC_CBO_ATTRS \ (_SNBEP_UNC_ATTR_I |\ _SNBEP_UNC_ATTR_E |\ _SNBEP_UNC_ATTR_T8 |\ _SNBEP_UNC_ATTR_CF |\ _SNBEP_UNC_ATTR_TF) +#define IVBEP_UNC_CBO_ATTRS \ + (_SNBEP_UNC_ATTR_E |\ + _SNBEP_UNC_ATTR_T8 |\ + _SNBEP_UNC_ATTR_CF |\ + _SNBEP_UNC_ATTR_TF) + #define SNBEP_UNC_CBO_NID_ATTRS \ (SNBEP_UNC_CBO_ATTRS|_SNBEP_UNC_ATTR_NF) +#define IVBEP_UNC_CBO_NID_ATTRS \ + (IVBEP_UNC_CBO_ATTRS|_SNBEP_UNC_ATTR_NF1) + #define SNBEP_UNC_HA_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) +#define IVBEP_UNC_HA_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) + #define SNBEP_UNC_HA_OPC_ATTRS \ (SNBEP_UNC_HA_ATTRS|_SNBEP_UNC_ATTR_A) @@ -133,6 +173,24 @@ typedef union { } pcu; /* covers pcu */ struct { unsigned long unc_event:8; /* event code */ + unsigned long unc_res1:6; /* reserved */ + unsigned long unc_occ:2; /* occ select */ + unsigned long unc_res2:1; /* reserved */ + unsigned long unc_rst:1; /* reset */ + unsigned long unc_edge:1; /* edge detec */ + unsigned long unc_res3:1; /* reserved */ + unsigned long unc_ov_en:1; /* overflow enable */ + unsigned long unc_sel_ext:1; /* event_sel extension */ + unsigned long unc_en:1; /* enable */ + unsigned long unc_res4:1; /* reserved */ + unsigned long unc_thres:5; /* threshold */ + unsigned long unc_res5:1; /* reserved */ + unsigned long unc_occ_inv:1; /* occupancy invert */ + unsigned long unc_occ_edge:1; /* occupancy edge detect */ + unsigned long unc_res6:32; /* reserved */ + } ivbep_pcu; /* covers ivb-ep pcu */ + struct { + unsigned long unc_event:8; /* event code */ unsigned long unc_umask:8; /* unit maks */ unsigned long unc_res1:1; /* reserved */ unsigned long unc_rst:1; /* reset */ @@ -158,6 +216,23 @@ typedef union { unsigned long res4:32; } cbo_filt; /* cbox filter */ struct { + unsigned long tid:1; + unsigned long cid:4; + unsigned long res0:12; + unsigned long state:6; + unsigned long res1:9; + unsigned long res2:32; + } ivbep_cbo_filt0; /* ivbep cbox filter0 */ + struct { + unsigned long nid:16; + unsigned long res0:4; + unsigned long opc:9; + unsigned long res1:1; + unsigned long nc:1; + unsigned long isoc:1; + unsigned long res2:32; + } ivbep_cbo_filt1; /* ivbep cbox filter1 */ + struct { unsigned long filt0:8; /* band0 freq filter */ unsigned long filt1:8; /* band1 freq filter */ unsigned long filt2:8; /* band2 freq filter */ @@ -175,13 +250,43 @@ typedef union { unsigned long res1:26; /* reserved */ unsigned long res2:32; /* reserved */ } ha_opc; + struct { + unsigned long unc_event:8; /* event code */ + unsigned long unc_umask:8; /* unit mask */ + unsigned long unc_res1:1; /* reserved */ + unsigned long unc_rst:1; /* reset */ + unsigned long unc_edge:1; /* edge detec */ + unsigned long unc_res2:3; /* reserved */ + unsigned long unc_en:1; /* enable */ + unsigned long unc_res3:1; /* reserved */ + unsigned long unc_thres:8; /* counter mask */ + unsigned long unc_res4:32; /* reserved */ + } irp; /* covers irp */ } pfm_snbep_unc_reg_t; extern void pfm_intel_snbep_unc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); extern int pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e); extern const pfmlib_attr_desc_t snbep_unc_mods[]; extern int pfm_intel_snbep_unc_detect(void *this); +extern int pfm_intel_ivbep_unc_detect(void *this); extern int pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); extern int pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx); extern int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); + +static inline int +is_cbo_filt_event(void *this, pfm_intel_x86_reg_t reg) +{ + pfmlib_pmu_t *pmu = this; + uint64_t sel = reg.sel_event_select; + /* + * umask bit 0 must be 1 (OPCODE) + * TOR_INSERT: event code 0x35 + * TOR_OCCUPANCY: event code 0x36 + * LLC_LOOKUP : event code 0x34 + */ + return (pmu->flags & INTEL_PMU_FL_UNC_CBO) + && (reg.sel_unit_mask & 0x1) + && (sel == 0x35 || sel == 0x36 || sel == 0x34); +} + #endif /* __PFMLIB_INTEL_SNBEP_UNC_PRIV_H__ */ diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h index 95a7565..3b077b1 100644 --- a/lib/pfmlib_intel_x86_priv.h +++ b/lib/pfmlib_intel_x86_priv.h @@ -34,7 +34,7 @@ * maximum number of unit masks groups per event */ #define INTEL_X86_NUM_GRP 8 -#define INTEL_X86_MAX_FILTERS 2 +#define INTEL_X86_MAX_FILTERS 3 /* * unit mask description diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index e1a2458..2b5d33e 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -267,6 +267,41 @@ extern pfmlib_pmu_t intel_snbep_unc_ubo_support; extern pfmlib_pmu_t intel_snbep_unc_r2pcie_support; extern pfmlib_pmu_t intel_snbep_unc_r3qpi0_support; extern pfmlib_pmu_t intel_snbep_unc_r3qpi1_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb0_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb1_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb2_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb3_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb4_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb5_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb6_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb7_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb8_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb9_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb10_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb11_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb12_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb13_support; +extern pfmlib_pmu_t intel_ivbep_unc_cb14_support; +extern pfmlib_pmu_t intel_ivbep_unc_ha0_support; +extern pfmlib_pmu_t intel_ivbep_unc_ha1_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc0_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc1_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc2_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc3_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc4_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc5_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc6_support; +extern pfmlib_pmu_t intel_ivbep_unc_imc7_support; +extern pfmlib_pmu_t intel_ivbep_unc_pcu_support; +extern pfmlib_pmu_t intel_ivbep_unc_qpi0_support; +extern pfmlib_pmu_t intel_ivbep_unc_qpi1_support; +extern pfmlib_pmu_t intel_ivbep_unc_qpi2_support; +extern pfmlib_pmu_t intel_ivbep_unc_ubo_support; +extern pfmlib_pmu_t intel_ivbep_unc_r2pcie_support; +extern pfmlib_pmu_t intel_ivbep_unc_r3qpi0_support; +extern pfmlib_pmu_t intel_ivbep_unc_r3qpi1_support; +extern pfmlib_pmu_t intel_ivbep_unc_r3qpi2_support; +extern pfmlib_pmu_t intel_ivbep_unc_irp_support; extern pfmlib_pmu_t intel_knc_support; extern pfmlib_pmu_t intel_slm_support; extern pfmlib_pmu_t power4_support; diff --git a/tests/validate_x86.c b/tests/validate_x86.c index d0a6666..2edacd5 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1251,7 +1251,7 @@ static const test_event_t x86_test_events[]={ .count = 2, .codes[0] = 0x334, .codes[1] = 0x7c0000, - .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:STATE_MESIF:e=0:i=0:t=0:tf=0:nf=0", + .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:STATE_MESIF:e=0:i=0:t=0:tf=0", }, { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:nf=1", @@ -1294,7 +1294,7 @@ static const test_event_t x86_test_events[]={ .ret = PFM_SUCCESS, .count = 1, .codes[0] = 0x1035, - .fstr = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:WB:e=0:i=0:t=0:tf=0:nf=0", + .fstr = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:WB:e=0:i=0:t=0:tf=0", }, { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF", @@ -1302,7 +1302,7 @@ static const test_event_t x86_test_events[]={ .count = 2, .codes[0] = 0x135, .codes[1] = 0xca000000, - .fstr = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF:e=0:i=0:t=0:tf=0:nf=0", + .fstr = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF:e=0:i=0:t=0:tf=0", }, { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF:nf=1", @@ -2185,7 +2185,513 @@ static const test_event_t x86_test_events[]={ .name = "slm::no_alloc_cycles:any:t=1", .ret = PFM_ERR_ATTR }, + { SRC_LINE, + .name = "ivbep_unc_irp::unc_i_clockticks", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x0, + .fstr = "ivbep_unc_irp::UNC_I_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_irp::unc_i_tickles:lost_ownership", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x116, + .fstr = "ivbep_unc_irp::UNC_I_TICKLES:LOST_OWNERSHIP:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_irp::unc_i_transactions:reads", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x115, + .fstr = "ivbep_unc_irp::UNC_I_TRANSACTIONS:READS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_irp::unc_i_transactions:reads:c=1:i", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_irp::unc_i_transactions:reads:t=6", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x6000115, + .fstr = "ivbep_unc_irp::UNC_I_TRANSACTIONS:READS:e=0:t=6", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo1::UNC_C_CLOCKTICKS:u", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x00, + .fstr = "ivbep_unc_cbo0::UNC_C_CLOCKTICKS", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x334, + .codes[1] = 0x7e0000, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:STATE_MESIF:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:nf=1", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x1134, + .codes[1] = 0x7e0000, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:ANY:STATE_MESIF:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_M", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:nf=3", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x5134, + .codes[1] = 0x7e0000, + .codes[2] = 0x3, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:ANY:NID:STATE_MESIF:e=0:t=0:tf=0:nf=3", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_M:tid=1", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:WRITE", + .ret = PFM_ERR_FEATCOMB, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:WRITE:NID:nf=3:tf=1:e:t=1", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x10c4534, + .codes[1] = 0x7e0001, + .codes[2] = 0x3, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_LOOKUP:WRITE:NID:STATE_MESIF:e=1:t=1:tf=1:nf=3", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS", + .ret = PFM_ERR_UMASK, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:NID", + .ret = PFM_ERR_UMASK, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:NID:nf=1", + .ret = PFM_ERR_UMASK, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x137, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M:STATE_S", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x537, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M:STATE_S:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M:STATE_S:NID:nf=1", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x4537, + .codes[1] = 0x0, + .codes[2] = 0x1, + .fstr = "ivbep_unc_cbo0::UNC_C_LLC_VICTIMS:STATE_M:STATE_S:NID:e=0:t=0:tf=0:nf=1", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE", + .ret = PFM_ERR_UMASK, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:WB", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1035, + .fstr = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:WB:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x135, + .codes[1] = 0x0, + .codes[2] = 0x19400000ull, + .fstr = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:OPCODE:OPC_PCIWILF:nf=1", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:NID_OPCODE:OPC_PCIRDCUR:nf=1", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x4135, + .codes[1] = 0x0, + .codes[2] = 0x19e00001ull, + .fstr = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:NID_OPCODE:OPC_PCIRDCUR:e=0:t=0:tf=0:nf=1", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:OPC_RFO:NID_OPCODE:nf=1", + .ret = PFM_SUCCESS, + .count = 3, + .codes[0] = 0x4135, + .codes[1] = 0x0, + .codes[2] = 0x18000001ull, + .fstr = "ivbep_unc_cbo0::UNC_C_TOR_INSERTS:NID_OPCODE:OPC_RFO:e=0:t=0:tf=0:nf=1", + }, + { SRC_LINE, + .name = "ivbep_unc_cbo0::UNC_C_TOR_OCCUPANCY:MISS_REMOTE", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8a36, + .fstr = "ivbep_unc_cbo0::UNC_C_TOR_OCCUPANCY:MISS_REMOTE:e=0:t=0:tf=0", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x0, + .fstr = "ivbep_unc_pcu::UNC_P_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_CLOCKTICKS:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1000000, + .fstr = "ivbep_unc_pcu::UNC_P_CLOCKTICKS:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x70, + .fstr = "ivbep_unc_pcu::UNC_P_CORE0_TRANSITION_CYCLES:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND1_CYCLES", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND2_CYCLES", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND3_CYCLES", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0xb, + .codes[1] = 0x20, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=0:t=0:ff=32", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND1_CYCLES:ff=16", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0xc, + .codes[1] = 0x1000, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND1_CYCLES:e=0:t=0:ff=16", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND2_CYCLES:ff=8", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0xd, + .codes[1] = 0x80000, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND2_CYCLES:e=0:t=0:ff=8", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND3_CYCLES:ff=40", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0xe, + .codes[1] = 0x28000000, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND3_CYCLES:e=0:t=0:ff=40", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:e", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x4000b, + .codes[1] = 0x20, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=1:t=0:ff=32", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:t=24", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x1800000b, + .codes[1] = 0x20, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=0:t=24:ff=32", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:ff=32:e:t=4", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x404000b, + .codes[1] = 0x20, + .fstr = "ivbep_unc_pcu::UNC_P_FREQ_BAND0_CYCLES:e=1:t=4:ff=32", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x4080, + .fstr = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C0:e=0:t=0" + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C3", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8080, + .fstr = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C3:e=0:t=0", + }, + { SRC_LINE, + .name = "IVBEP_UNC_PCU::UNC_P_POWER_STATE_OCCUPANCY:CORES_C6", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0xc080, + .fstr = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C6:e=0:t=0" + }, + { SRC_LINE, + .name = "IVBEP_UNC_PCU::UNC_P_POWER_STATE_OCCUPANCY:CORES_C0:t=6", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x6004080, + .fstr = "ivbep_unc_pcu::UNC_P_POWER_STATE_OCCUPANCY:CORES_C0:e=0:t=6" + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_DEMOTIONS_CORE10", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x42, + .fstr = "ivbep_unc_pcu::UNC_P_DEMOTIONS_CORE10:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_pcu::UNC_P_DEMOTIONS_CORE14", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x46, + .fstr = "ivbep_unc_pcu::UNC_P_DEMOTIONS_CORE14:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_ha0::UNC_H_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x0, + .fstr = "ivbep_unc_ha0::UNC_H_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_ha1::UNC_H_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x0, + .fstr = "ivbep_unc_ha1::UNC_H_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_ha1::UNC_H_REQUESTS:READS:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1000301, + .fstr = "ivbep_unc_ha1::UNC_H_REQUESTS:READS:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_ha0::UNC_H_IMC_WRITES:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1000f1a, + .fstr = "ivbep_unc_ha0::UNC_H_IMC_WRITES:ALL:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_ha0::UNC_H_IMC_READS:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1000117, + .fstr = "ivbep_unc_ha0::UNC_H_IMC_READS:NORMAL:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0xff, + .fstr = "ivbep_unc_imc0::UNC_M_CLOCKTICKS", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_CLOCKTICKS:t=1", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_DCLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x00, + .fstr = "ivbep_unc_imc0::UNC_M_DCLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc4::UNC_M_DCLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x00, + .fstr = "ivbep_unc_imc4::UNC_M_DCLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_CAS_COUNT:RD", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x0304, + .fstr = "ivbep_unc_imc0::UNC_M_CAS_COUNT:RD:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_POWER_CKE_CYCLES:RANK0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x183, + .fstr = "ivbep_unc_imc0::UNC_M_POWER_CKE_CYCLES:RANK0:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_CAS_COUNT:WR", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0xc04, + .fstr = "ivbep_unc_imc0::UNC_M_CAS_COUNT:WR:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK0:BANK0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1b0, + .fstr = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK0:BANK0:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK4:BANK7", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x80b4, + .fstr = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK4:BANK7:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK4:BANK7:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x10080b4, + .fstr = "ivbep_unc_imc0::UNC_M_RD_CAS_RANK4:BANK7:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x14, + .fstr = "ivbep_unc_qpi0::UNC_Q_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G0:DATA", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x201, + .fstr = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G0:DATA:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G0:IDLE:t=1", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1000101, + .fstr = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G0:IDLE:e=0:t=1", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_TXL_FLITS_G0:DATA", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x200, + .fstr = "ivbep_unc_qpi0::UNC_Q_TXL_FLITS_G0:DATA:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G1:HOM", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x200602, + .fstr = "ivbep_unc_qpi0::UNC_Q_RXL_FLITS_G1:HOM:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_qpi0::UNC_Q_TXL_FLITS_G1:HOM", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x200600, + .fstr = "ivbep_unc_qpi0::UNC_Q_TXL_FLITS_G1:HOM:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_ubo::UNC_U_LOCK_CYCLES", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x44, + .fstr = "ivbep_unc_ubo::UNC_U_LOCK_CYCLES:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r2pcie::UNC_R2_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1, + .fstr = "ivbep_unc_r2pcie::UNC_R2_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r2pcie::UNC_R2_RING_AD_USED:CW", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x3307, + .fstr = "ivbep_unc_r2pcie::UNC_R2_RING_AD_USED:CW:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r3qpi0::UNC_R3_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1, + .fstr = "ivbep_unc_r3qpi0::UNC_R3_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r3qpi0::UNC_R3_TXR_CYCLES_FULL:e=0:t=0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x25, + .fstr = "ivbep_unc_r3qpi0::UNC_R3_TXR_CYCLES_FULL:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r3qpi1::UNC_R3_CLOCKTICKS", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x1, + .fstr = "ivbep_unc_r3qpi1::UNC_R3_CLOCKTICKS:e=0:t=0", + }, + { SRC_LINE, + .name = "ivbep_unc_r3qpi1::UNC_R3_TXR_CYCLES_FULL:e=0:t=0", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x25, + .fstr = "ivbep_unc_r3qpi1::UNC_R3_TXR_CYCLES_FULL:e=0:t=0", + }, }; + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) static int commit b4b234e6191fc2a9add4d4088a4e7ba2178ec881 Author: Stephane Eranian Date: Wed Mar 12 23:41:42 2014 +0100 update event table - added missin INT_MISC event - added DSB2MITE_SWITCHES:PENALTY_CYCLES Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h index 28e0216..4e1b600 100644 --- a/lib/events/intel_ivb_events.h +++ b/lib/events/intel_ivb_events.h @@ -286,7 +286,12 @@ static const intel_x86_umask_t ivb_cpu_clk_unhalted[]={ static const intel_x86_umask_t ivb_dsb2mite_switches[]={ { .uname = "COUNT", .udesc = "Number of DSB to MITE switches", - .ucode = 0x100, + .ucode = 0x0100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "PENALTY_CYCLES", + .udesc = "Number of DSB to MITE switch true penalty cycles", + .ucode = 0x0200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; @@ -1652,6 +1657,20 @@ static const intel_x86_umask_t ivb_lsd[]={ }, }; +static const intel_x86_umask_t ivb_int_misc[]={ + { .uname = "RECOVERY_CYCLES", + .udesc = "Number of cycles waiting for Machine Clears except JEClear", + .ucode = 0x300, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RECOVERY_STALLS_COUNT", + .udesc = "Number of occurrences waiting for Machine Clears", + .ucode = 0x300 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ + .uflags = INTEL_X86_NCOMBO, + .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, + }, +}; + static const intel_x86_entry_t intel_ivb_pe[]={ { .name = "ARITH", .desc = "Counts arithmetic multiply operations", @@ -2297,6 +2316,15 @@ static const intel_x86_entry_t intel_ivb_pe[]={ .ngrp = 1, .umasks = ivb_lsd, }, + { .name = "INT_MISC", + .desc = "Miscellaneous interruptions", + .code = 0xd, + .cntmsk = 0xff, + .ngrp = 1, + .modmsk = INTEL_V3_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(ivb_int_misc), + .umasks = ivb_int_misc + }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V3_ATTRS, commit a2e7213dc9e0a3f6736e6149a21cee089a604714 Author: Stephane Eranian Date: Wed Mar 12 23:42:32 2014 +0100 add DSB2MITE_SWITCHES event Was missing and is public. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 0491fdd..78b2800 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -1737,6 +1737,14 @@ static const intel_x86_umask_t hsw_lsd[]={ }, }; +static const intel_x86_umask_t hsw_dsb2mite_switches[]={ + { .uname = "PENALTY_CYCLES", + .udesc = "Number of DSB to MITE switch true penalty cycles", + .ucode = 0x0200, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + static const intel_x86_entry_t intel_hsw_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", @@ -2301,6 +2309,15 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .ngrp = 1, .umasks = hsw_page_walker_loads, }, +{ .name = "DSB2MITE_SWITCHES", + .desc = "Number of DSB to MITE switches", + .modmsk = INTEL_V4_ATTRS, + .cntmsk = 0xff, + .code = 0xab, + .numasks = LIBPFM_ARRAY_SIZE(hsw_dsb2mite_switches), + .ngrp = 1, + .umasks = hsw_dsb2mite_switches, +}, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, commit e48874920d8efe93c3167afe6d17cbc282937eb4 Author: Steve Kaufmann Date: Fri Apr 11 10:14:04 2014 +0200 fix more spelling mistakes This patch fixes spelling mistakes in event description tables. Signed-off-by: Steve Kaufmann diff --git a/lib/events/amd64_events_fam11h.h b/lib/events/amd64_events_fam11h.h index 5760e26..b53e7f6 100644 --- a/lib/events/amd64_events_fam11h.h +++ b/lib/events/amd64_events_fam11h.h @@ -1033,7 +1033,7 @@ static const amd64_entry_t amd64_fam11h_pe[]={ .umasks = amd64_fam11h_system_read_responses, }, { .name = "QUADWORDS_WRITTEN_TO_SYSTEM", - .desc = "Quawords Written to System", + .desc = "Quadwords Written to System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_quadwords_written_to_system), diff --git a/lib/events/amd64_events_fam14h.h b/lib/events/amd64_events_fam14h.h index 0cf11a9..905094a 100644 --- a/lib/events/amd64_events_fam14h.h +++ b/lib/events/amd64_events_fam14h.h @@ -1076,7 +1076,7 @@ static const amd64_entry_t amd64_fam14h_pe[]={ .code = 0x22, }, { .name = "RSQ_FULL", - .desc = "Number of cycles that the RSQ holds retired stores. This buffer holds the stores waiting to retired as well as requests that missed the data cacge and waiting on a refill", + .desc = "Number of cycles that the RSQ holds retired stores. This buffer holds the stores waiting to retired as well as requests that missed the data cache and waiting on a refill", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x23, }, diff --git a/lib/events/intel_coreduo_events.h b/lib/events/intel_coreduo_events.h index 9e0cb5e..33531cf 100644 --- a/lib/events/intel_coreduo_events.h +++ b/lib/events/intel_coreduo_events.h @@ -525,7 +525,7 @@ static const intel_x86_entry_t intel_coreduo_pe[]={ .umasks = coreduo_l2_ads, }, { .name = "DBUS_BUSY", - .desc = "Core cycle during which data buswas busy (increments by 4)", + .desc = "Core cycle during which data bus was busy (increments by 4)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x22, diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 78b2800..fee2a65 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -410,12 +410,12 @@ static const intel_x86_umask_t hsw_idq[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS", - .udesc = "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + .udesc = "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_MITE_UOPS", - .udesc = "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + .udesc = "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, @@ -446,14 +446,14 @@ static const intel_x86_umask_t hsw_idq[]={ .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_UOPS_CYCLES", - .udesc = "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + .udesc = "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_OCCUR", - .udesc = "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy", + .udesc = "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1:e=1", .uflags = INTEL_X86_NCOMBO, diff --git a/lib/events/intel_ivbep_unc_cbo_events.h b/lib/events/intel_ivbep_unc_cbo_events.h index ee8959f..318521d 100644 --- a/lib/events/intel_ivbep_unc_cbo_events.h +++ b/lib/events/intel_ivbep_unc_cbo_events.h @@ -228,7 +228,7 @@ static const intel_x86_umask_t ivbep_unc_c_llc_victims[]={ static const intel_x86_umask_t ivbep_unc_c_ring_ad_used[]={ { .uname = "UP_VR0_EVEN", - .udesc = "Up and Even ring polarity filter on virutal ring 0", + .udesc = "Up and Even ring polarity filter on virtual ring 0", .ucode = 0x100, }, { .uname = "UP_VR0_ODD", @@ -244,7 +244,7 @@ static const intel_x86_umask_t ivbep_unc_c_ring_ad_used[]={ .ucode = 0x800, }, { .uname = "UP_VR1_EVEN", - .udesc = "Up and Even ring polarity filter on virutal ring 1", + .udesc = "Up and Even ring polarity filter on virtual ring 1", .ucode = 0x1000, }, { .uname = "UP_VR1_ODD", @@ -786,7 +786,7 @@ static const intel_x86_umask_t ivbep_unc_c_misc[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT_S", - .udesc = "Counts the number of times that an RFO hits in S state. This is usfeul for determining if it might be good for a workload to use RSPIWB instead of RSPSWB", + .udesc = "Counts the number of times that an RFO hits in S state. This is useful for determining if it might be good for a workload to use RSPIWB instead of RSPSWB", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, @@ -827,7 +827,7 @@ static const intel_x86_entry_t intel_ivbep_unc_c_pe[]={ .umasks = ivbep_unc_c_llc_victims, }, { .name = "UNC_C_MISC", - .desc = "Miscelleanous C-Box events", + .desc = "Miscellaneous C-Box events", .modmsk = IVBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x39, diff --git a/lib/events/intel_ivbep_unc_ha_events.h b/lib/events/intel_ivbep_unc_ha_events.h index 58f0e06..d92ca9e 100644 --- a/lib/events/intel_ivbep_unc_ha_events.h +++ b/lib/events/intel_ivbep_unc_ha_events.h @@ -568,11 +568,11 @@ static const intel_x86_umask_t ivbep_unc_h_snp_resp_recv_local[]={ static const intel_x86_umask_t ivbep_unc_h_tracker_cycles_full[]={ { .uname = "GP", - .udesc = "Number of cycles when the general purpose (GP) HA trakcer pool is completely used. It will not return valid count when BT is disabled", + .udesc = "Number of cycles when the general purpose (GP) HA tracker pool is completely used. It will not return valid count when BT is disabled", .ucode = 0x100, }, { .uname = "ALL", - .udesc = "Number of cycles when the general purpose (GP) HA trakcer pool is completely used including reserved HT entries. It will not return vaid count when BT is disabled", + .udesc = "Number of cycles when the general purpose (GP) HA tracker pool is completely used including reserved HT entries. It will not return vaid count when BT is disabled", .ucode = 0x200, .uflags = INTEL_X86_DFL, }, diff --git a/lib/events/intel_ivbep_unc_imc_events.h b/lib/events/intel_ivbep_unc_imc_events.h index ac8ef41..473afc4 100644 --- a/lib/events/intel_ivbep_unc_imc_events.h +++ b/lib/events/intel_ivbep_unc_imc_events.h @@ -80,7 +80,7 @@ static const intel_x86_umask_t ivbep_unc_m_dram_refresh[]={ static const intel_x86_umask_t ivbep_unc_m_major_modes[]={ { .uname = "ISOCH", - .udesc = "Counts cycles in ISOCH Major maode", + .udesc = "Counts cycles in ISOCH Major mode", .ucode = 0x800, }, { .uname = "PARTIAL", diff --git a/lib/events/intel_ivbep_unc_irp_events.h b/lib/events/intel_ivbep_unc_irp_events.h index 86b5168..19c8512 100644 --- a/lib/events/intel_ivbep_unc_irp_events.h +++ b/lib/events/intel_ivbep_unc_irp_events.h @@ -33,7 +33,7 @@ static const intel_x86_umask_t ivbep_unc_i_address_match[]={ .ucode = 0x100, }, { .uname = "MERGE_COUNT", - .udesc = "Number of times when two reuqests to the same address from the same source are received back to back, it is possible to merge them", + .udesc = "Number of times when two requests to the same address from the same source are received back to back, it is possible to merge them", .ucode = 0x200, }, }; diff --git a/lib/events/intel_ivbep_unc_r2pcie_events.h b/lib/events/intel_ivbep_unc_r2pcie_events.h index 59d96b9..081aaf2 100644 --- a/lib/events/intel_ivbep_unc_r2pcie_events.h +++ b/lib/events/intel_ivbep_unc_r2pcie_events.h @@ -188,7 +188,7 @@ static const intel_x86_entry_t intel_ivbep_unc_r2_pe[]={ .umasks = ivbep_unc_r2_rxr_ak_bounces }, { .name = "UNC_R2_RXR_OCCUPANCY", - .desc = "Ingress occpancy accumulator", + .desc = "Ingress occupancy accumulator", .code = 0x13, .cntmsk = 0x1, .modmsk = IVBEP_UNC_R2PCIE_ATTRS, diff --git a/lib/events/intel_p6_events.h b/lib/events/intel_p6_events.h index f8a83a8..6964eff 100644 --- a/lib/events/intel_p6_events.h +++ b/lib/events/intel_p6_events.h @@ -216,7 +216,7 @@ static const intel_x86_entry_t intel_p6_pe[]={ .code = 0x85, }, { .name = "IFU_MEM_STALL", - .desc = "Number of cycles instruction fetch is stalled for any reason. Includs IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", + .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, diff --git a/lib/events/intel_pii_events.h b/lib/events/intel_pii_events.h index 5846c64..5c614c0 100644 --- a/lib/events/intel_pii_events.h +++ b/lib/events/intel_pii_events.h @@ -180,7 +180,7 @@ static const intel_x86_entry_t intel_pii_pe[]={ .code = 0x85, }, { .name = "IFU_MEM_STALL", - .desc = "Number of cycles instruction fetch is stalled for any reason. Includs IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", + .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, diff --git a/lib/events/intel_pm_events.h b/lib/events/intel_pm_events.h index e32a883..1bbefdb 100644 --- a/lib/events/intel_pm_events.h +++ b/lib/events/intel_pm_events.h @@ -283,7 +283,7 @@ static const intel_x86_entry_t intel_pm_pe[]={ .code = 0x85, }, { .name = "IFU_MEM_STALL", - .desc = "Number of cycles instruction fetch is stalled for any reason. Includs IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", + .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, diff --git a/lib/events/intel_ppro_events.h b/lib/events/intel_ppro_events.h index 0555652..d0b9df6 100644 --- a/lib/events/intel_ppro_events.h +++ b/lib/events/intel_ppro_events.h @@ -121,7 +121,7 @@ static const intel_x86_entry_t intel_ppro_pe[]={ .code = 0x85, }, { .name = "IFU_MEM_STALL", - .desc = "Number of cycles instruction fetch is stalled for any reason. Includs IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", + .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h index 558dbf8..c7b5ce7 100644 --- a/lib/events/intel_slm_events.h +++ b/lib/events/intel_slm_events.h @@ -97,7 +97,7 @@ static const intel_x86_umask_t slm_inst_retired[]={ static const intel_x86_umask_t slm_l2_reject_xq[]={ { .uname = "ALL", - .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions fro mthe L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", + .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions from the L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", .ucode = 0x000, .uflags= INTEL_X86_DFL, }, diff --git a/lib/events/intel_snbep_unc_cbo_events.h b/lib/events/intel_snbep_unc_cbo_events.h index 40d3e37..ae10b61 100644 --- a/lib/events/intel_snbep_unc_cbo_events.h +++ b/lib/events/intel_snbep_unc_cbo_events.h @@ -580,7 +580,7 @@ static const intel_x86_umask_t snbep_unc_c_txr_inserts[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "IV_CACHE", - .udesc = "Counts the number of ring transactions from Cachebo ton IV ring", + .udesc = "Counts the number of ring transactions from Cachebo to IV ring", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, @@ -701,7 +701,7 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ .code = 0x07, }, { .name = "UNC_C_RXR_EXT_STARVED", - .desc = "Ingress arbiter blockig cycles", + .desc = "Ingress arbiter blocking cycles", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x12, diff --git a/lib/events/intel_snbep_unc_imc_events.h b/lib/events/intel_snbep_unc_imc_events.h index e2494b2..b49487c 100644 --- a/lib/events/intel_snbep_unc_imc_events.h +++ b/lib/events/intel_snbep_unc_imc_events.h @@ -78,7 +78,7 @@ static const intel_x86_umask_t snbep_unc_m_dram_refresh[]={ static const intel_x86_umask_t snbep_unc_m_major_modes[]={ { .uname = "ISOCH", - .udesc = "Counts cycles in ISOCH Major maode", + .udesc = "Counts cycles in ISOCH Major mode", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, commit 42c3455dd1a51804c854d7ce651434df7701f4c2 Author: Stephane Eranian Date: Wed Apr 16 17:49:58 2014 +0200 add Intel Haswell event alias for MEM_LOAD_UOPS_L3_* To be more consistent with IvyBridge: MEM_LOAD_UOPS_LLC_HIT_RETIRED = MEM_LOAD_UOPS_L3_HIT_RETIRED MEM_LOAD_UOPS_LLC_MISS_RETIRED = MEM_LOAD_UOPS_L3_MISS_RETIRED Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index fee2a65..8706634 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -2095,6 +2095,17 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .numasks = LIBPFM_ARRAY_SIZE(hsw_mem_load_uops_l3_hit_retired), .umasks = hsw_mem_load_uops_l3_hit_retired }, + { .name = "MEM_LOAD_UOPS_LLC_HIT_RETIRED", + .desc = "L3 hit load uops retired (Precise Event)", + .equiv = "MEM_LOAD_UOPS_L3_HIT_RETIRED", + .code = 0xd2, + .cntmsk = 0xf, + .ngrp = 1, + .flags = INTEL_X86_PEBS, + .modmsk = INTEL_V4_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(hsw_mem_load_uops_l3_hit_retired), + .umasks = hsw_mem_load_uops_l3_hit_retired + }, { .name = "MEM_LOAD_UOPS_L3_MISS_RETIRED", .desc = "Load uops retired that missed the L3 (Precise Event)", .code = 0xd3, @@ -2105,6 +2116,17 @@ static const intel_x86_entry_t intel_hsw_pe[]={ .numasks = LIBPFM_ARRAY_SIZE(hsw_mem_load_uops_l3_miss_retired), .umasks = hsw_mem_load_uops_l3_miss_retired }, + { .name = "MEM_LOAD_UOPS_LLC_MISS_RETIRED", + .desc = "Load uops retired that missed the L3 (Precise Event)", + .equiv = "MEM_LOAD_UOPS_L3_MISS_RETIRED", + .code = 0xd3, + .cntmsk = 0xf, + .ngrp = 1, + .flags = INTEL_X86_PEBS, + .modmsk = INTEL_V4_ATTRS, + .numasks = LIBPFM_ARRAY_SIZE(hsw_mem_load_uops_l3_miss_retired), + .umasks = hsw_mem_load_uops_l3_miss_retired + }, { .name = "MEM_LOAD_UOPS_RETIRED", .desc = "Retired load uops (Precise Event)", .code = 0xd1, commit 89daf8f981e8ba70447cd16fca4f5715dbb99b14 Author: Stephane Eranian Date: Thu Apr 17 16:39:43 2014 +0200 Add Intel Haswell L1D_PEND_MISS.EDGE alias Alias to L1D_PEND_MISS.OCCURRENCES to be consistent with IvyBridge Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 8706634..8ec1165 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -578,6 +578,14 @@ static const intel_x86_umask_t hsw_l1d_pend_miss[]={ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, + { .uname = "EDGE", + .udesc = "Number L1D miss outstanding", + .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ + .uequiv = "PENDING:c=1:e=1", + .ucntmsk = 0x4, + .uflags = INTEL_X86_NCOMBO, + .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, + }, }; static const intel_x86_umask_t hsw_l2_demand_rqsts[]={ commit 1c324233cc97f99195557a1c4658c93f4939cd96 Author: Steve Kaufmann Date: Thu Apr 17 16:43:31 2014 +0200 more spelling fixes In man pages this time. Signed-off-by: Steve Kaufmann diff --git a/docs/man3/libpfm_amd64_fam15h.3 b/docs/man3/libpfm_amd64_fam15h.3 index 84ae455..ac594c2 100644 --- a/docs/man3/libpfm_amd64_fam15h.3 +++ b/docs/man3/libpfm_amd64_fam15h.3 @@ -12,7 +12,7 @@ libpfm_amd64_fam15h - support for AMD64 Family 15h processors .sp .SH DESCRIPTION The library supports AMD Family 15h processors core PMU in both 32 and 64-bit modes. The -uncore PMU (Northbridge) is also supported as a separate PMU model. +uncore (NorthBridge) PMU is also supported as a separate PMU model. .SH MODIFIERS The following modifiers are supported on AMD64 Family 15h core PMU: diff --git a/docs/man3/libpfm_intel_ivb_unc.3 b/docs/man3/libpfm_intel_ivb_unc.3 index 1e9921f..5176ee7 100644 --- a/docs/man3/libpfm_intel_ivb_unc.3 +++ b/docs/man3/libpfm_intel_ivb_unc.3 @@ -13,7 +13,7 @@ The library supports the Intel Ivy Bridge client part (model 58) uncore PMU. The support is currently limited to the Coherency Box, so called C-Box for up to 4 physical cores. -Each physical core has an associated C-Box which it uses to communictate with +Each physical core has an associated C-Box which it uses to communicate with the L3 cache. The C-boxes all support the same set of events. However, Core 0 C-box (snb_unc_cbo0) supports an additional uncore clock ticks event: \fBUNC_CLOCKTICKS\fR. diff --git a/docs/man3/libpfm_intel_ivbep_unc_cbo.3 b/docs/man3/libpfm_intel_ivbep_unc_cbo.3 index e417a46..b6023b7 100644 --- a/docs/man3/libpfm_intel_ivbep_unc_cbo.3 +++ b/docs/man3/libpfm_intel_ivbep_unc_cbo.3 @@ -36,11 +36,11 @@ the threshold. This is an integer modifier with values in the range [0:255]. Node filter. Certain events, such as UNC_C_LLC_LOOKUP, UNC_C_LLC_VICTIMS, provide a \fBNID\fR umask. Sometimes the \fBNID\fR is combined with other filtering capabilities, such as opcodes. The node filter is an 8-bit max bitmask. A node corresponds to a processor -socket. The legal values therefore depdend on the underlying hardware configuration. For +socket. The legal values therefore depend on the underlying hardware configuration. For dual-socket systems, the bitmask has two valid bits [0:1]. .TP .B cf -Core Filter. This is a 3-bit filter which is used to filter based on phyiscal core origin +Core Filter. This is a 3-bit filter which is used to filter based on physical core origin of the C-Box request. Possible values are 0-7. If the filter is not specified, then no filtering takes place. .TP @@ -53,7 +53,7 @@ specified, then no filtering takes place. Certain events, such as UNC_C_TOR_INSERTS supports opcode matching on the C-BOX transaction type. To use this feature, first an opcode matching umask must be selected, e.g., MISS_OPCODE. -Second, the opcode to match on must be selected via a second umasks amongs the OPC_* umasks. +Second, the opcode to match on must be selected via a second umask among the OPC_* umasks. For instance, UNC_C_TOR_INSERTS:OPCODE:OPC_RFO, counts the number of TOR insertions for RFO transactions. diff --git a/docs/man3/libpfm_intel_ivbep_unc_pcu.3 b/docs/man3/libpfm_intel_ivbep_unc_pcu.3 index 65ec6a3..399824c 100644 --- a/docs/man3/libpfm_intel_ivbep_unc_pcu.3 +++ b/docs/man3/libpfm_intel_ivbep_unc_pcu.3 @@ -26,7 +26,7 @@ the threshold. This is an integer modifier with values in the range [0:15]. .B ff Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be -multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +multiplied by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are counted. .SH Frequency band filtering diff --git a/docs/man3/libpfm_intel_ivbep_unc_ubo.3 b/docs/man3/libpfm_intel_ivbep_unc_ubo.3 index 2c4a6c1..8b4024d 100644 --- a/docs/man3/libpfm_intel_ivbep_unc_ubo.3 +++ b/docs/man3/libpfm_intel_ivbep_unc_ubo.3 @@ -36,7 +36,7 @@ modifier (t) with a value greater or equal to one. This is a boolean modifier. .B ff Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be -multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +multiplied by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are counted. .SH Frequency band filtering diff --git a/docs/man3/libpfm_intel_rapl.3 b/docs/man3/libpfm_intel_rapl.3 index cf7835c..d5e6227 100644 --- a/docs/man3/libpfm_intel_rapl.3 +++ b/docs/man3/libpfm_intel_rapl.3 @@ -10,7 +10,7 @@ libpfm_intel_rapl - support for Intel RAPL PMU .sp .SH DESCRIPTION The library supports the Intel Running Average Power Limit (RAPL) -energy consomption counters. This is a socket-level set of counters +energy consumption counters. This is a socket-level set of counters which reports energy consumption in Joules. There are up to 3 counters each measuring only one event. The following events are defined: .TP diff --git a/docs/man3/libpfm_intel_snb_unc.3 b/docs/man3/libpfm_intel_snb_unc.3 index 9591832..c0ba38d 100644 --- a/docs/man3/libpfm_intel_snb_unc.3 +++ b/docs/man3/libpfm_intel_snb_unc.3 @@ -13,7 +13,7 @@ The library supports the Intel Sandy Bridge client part (model 42) uncore PMU. The support is currently limited to the Coherency Box, so called C-Box for up to 4 physical cores. -Each physical core has an associated C-Box which it uses to communictate with +Each physical core has an associated C-Box which it uses to communicate with the L3 cache. The C-boxes all support the same set of events. However, Core 0 C-box (snb_unc_cbo0) supports an additional uncore clock ticks event: \fBUNC_CLOCKTICKS\fR. diff --git a/docs/man3/libpfm_intel_snbep_unc_cbo.3 b/docs/man3/libpfm_intel_snbep_unc_cbo.3 index 24fd517..6281ed1 100644 --- a/docs/man3/libpfm_intel_snbep_unc_cbo.3 +++ b/docs/man3/libpfm_intel_snbep_unc_cbo.3 @@ -40,11 +40,11 @@ the threshold. This is an integer modifier with values in the range [0:255]. Node filter. Certain events, such as UNC_C_LLC_LOOKUP, UNC_C_LLC_VICTIMS, provide a \fBNID\fR umask. Sometimes the \fBNID\fR is combined with other filtering capabilities, such as opcodes. The node filter is an 8-bit max bitmask. A node corresponds to a processor -socket. The legal values therefore depdend on the underlying hardware configuration. For +socket. The legal values therefore depend on the underlying hardware configuration. For dual-socket systems, the bitmask has two valid bits [0:1]. .TP .B cf -Core Filter. This is a 3-bit filter which is used to filter based on phyiscal core origin +Core Filter. This is a 3-bit filter which is used to filter based on physical core origin of the C-Box request. Possible values are 0-7. If the filter is not specified, then no filtering takes place. .TP @@ -57,7 +57,7 @@ specified, then no filtering takes place. Certain events, such as UNC_C_TOR_INSERTS supports opcode matching on the C-BOX transaction type. To use this feature, first an opcode matching umask must be selected, e.g., MISS_OPCODE. -Second, the opcode to match on must be selected via a second umasks amongs the OPC_* umasks. +Second, the opcode to match on must be selected via a second umask among the OPC_* umasks. For instance, UNC_C_TOR_INSERTS:OPCODE:OPC_RFO, counts the number of TOR insertions for RFO transactions. diff --git a/docs/man3/libpfm_intel_snbep_unc_pcu.3 b/docs/man3/libpfm_intel_snbep_unc_pcu.3 index 47ea498..9675cd5 100644 --- a/docs/man3/libpfm_intel_snbep_unc_pcu.3 +++ b/docs/man3/libpfm_intel_snbep_unc_pcu.3 @@ -31,7 +31,7 @@ the threshold. This is an integer modifier with values in the range [0:15]. .B ff Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be -multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +multiplied by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are counted. .SH Frequency band filtering diff --git a/docs/man3/libpfm_intel_snbep_unc_ubo.3 b/docs/man3/libpfm_intel_snbep_unc_ubo.3 index afd246e..97b5fa8 100644 --- a/docs/man3/libpfm_intel_snbep_unc_ubo.3 +++ b/docs/man3/libpfm_intel_snbep_unc_ubo.3 @@ -41,7 +41,7 @@ modifier (t) with a value greater or equal to one. This is a boolean modifier. .B ff Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be -multipled by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are +multiplied by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are counted. .SH Frequency band filtering diff --git a/docs/man3/libpfm_perf_event_raw.3 b/docs/man3/libpfm_perf_event_raw.3 index 2f8b535..fa1f70f 100644 --- a/docs/man3/libpfm_perf_event_raw.3 +++ b/docs/man3/libpfm_perf_event_raw.3 @@ -22,7 +22,7 @@ The syntax is very simple: rX. X is the hexadecimal 64-bit value for the event. It may include event filters on some PMU models. The hexadecimal number is passed without the 0x prefix, e.g., r01c4. -The library's standard perf_events attributes are supported oby this PMU model. +The library's standard perf_events attributes are supported by this PMU model. They are separated with colons as is customary with the library. .SH MODIFIERS @@ -59,7 +59,7 @@ event should be sampled 100 times per second on average. There is no default val .TP .B excl The associated event is the only event measured on the PMU. This applies only to hardware -events. This atrtribute requires admin privileges. Default is off. +events. This attribute requires admin privileges. Default is off. .TP .B precise Enables precise sampling mode. This option is only valid on sampling events. This is an diff --git a/docs/man3/pfm_find_event.3 b/docs/man3/pfm_find_event.3 index 0ac2a42..7a52c41 100644 --- a/docs/man3/pfm_find_event.3 +++ b/docs/man3/pfm_find_event.3 @@ -16,7 +16,7 @@ and try to find the matching event. The event string is a structured string and it is composed as follows: .TP -.B [pmu_name::]event_name[:unit_mask][:modifer|:modifier=val] +.B [pmu_name::]event_name[:unit_mask][:modifier|:modifier=val] .PP The various components are separated by \fB:\fR or \fB::\fR, they @@ -24,7 +24,7 @@ are defined as follows: .TP .B pmu_name -This is an optional prefix to designte a specific PMU model. With the +This is an optional prefix to designate a specific PMU model. With the prefix the event which matches the event_name is used. In case multiple PMU models are activated, there may be conflict with identical event names to mean the same or different things. In that case, it is necessary diff --git a/docs/man3/pfm_get_event_next.3 b/docs/man3/pfm_get_event_next.3 index 3a14af6..386ac03 100644 --- a/docs/man3/pfm_get_event_next.3 +++ b/docs/man3/pfm_get_event_next.3 @@ -25,7 +25,7 @@ when encoding events. To iterate over a list of events for a given PMU model, all that is needed is an initial identifier for the PMU. The first event identifier -is usually obainted via \fBpfm_get_pmu_info()\fR. +is usually obtained via \fBpfm_get_pmu_info()\fR. The \fBpfm_get_event_next()\fR function returns the identifier of next supported event after the one passed in \fBidx\fR. This iterator commit 285a80941e04e17f66c3322a2741bb27766a0373 Author: Stephane Eranian Date: Fri Apr 18 14:54:54 2014 +0200 add more HSW umask aliases to match IVB Ths patch adds some more umask aliases to the following events to maintain compat with IvyBridge: - L2_RQSTS - BR_MISP_EXEC - BR_MISP_RETIRED - BR_INST_RETIRED - BR_INST_EXEC Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 8ec1165..b4e7c00 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -38,11 +38,24 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "NONTAKEN_COND", + .udesc = "All macro conditional nontaken branch instructions", + .ucode = 0x4100, + .uequiv = "NONTAKEN_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "TAKEN_CONDITIONAL", .udesc = "Taken speculative and retired macro-conditional branches", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "TAKEN_COND", + .udesc = "Taken speculative and retired macro-conditional branches", + .ucode = 0x8100, + .uequiv = "TAKEN_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "TAKEN_DIRECT_JUMP", .udesc = "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects", .ucode = 0x8200, @@ -68,6 +81,18 @@ static const intel_x86_umask_t hsw_br_inst_exec[]={ .ucode = 0xc100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "ALL_COND", + .udesc = "Speculative and retired macro-conditional branches", + .ucode = 0xc100, + .uequiv = "ALL_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ANY_COND", + .udesc = "Speculative and retired macro-conditional branches", + .ucode = 0xc100, + .uequiv = "ALL_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "ALL_DIRECT_JMP", .udesc = "Speculative and retired macro-unconditional branches excluding calls and indirects", .ucode = 0xc200, @@ -106,6 +131,12 @@ static const intel_x86_umask_t hsw_br_inst_retired[]={ .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, + { .uname = "COND", + .udesc = "Counts all taken and not taken macro conditional branch instructions", + .ucode = 0x100, + .uequiv = "CONDITIONAL", + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, { .uname = "NEAR_CALL", .udesc = "Counts all macro direct and indirect near calls", .ucode = 0x200, @@ -144,11 +175,23 @@ static const intel_x86_umask_t hsw_br_misp_exec[]={ .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "NONTAKEN_COND", + .udesc = "Not taken speculative and retired mispredicted macro conditional branches", + .ucode = 0x4100, + .uequiv = "NONTAKEN_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "TAKEN_CONDITIONAL", .udesc = "Taken speculative and retired mispredicted macro conditional branches", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "TAKEN_COND", + .udesc = "Taken speculative and retired mispredicted macro conditional branches", + .ucode = 0x8100, + .uequiv = "TAKEN_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "TAKEN_INDIRECT_JUMP_NON_CALL_RET", .udesc = "Taken speculative and retired mispredicted indirect branches excluding calls and returns", .ucode = 0x8400, @@ -164,6 +207,12 @@ static const intel_x86_umask_t hsw_br_misp_exec[]={ .ucode = 0xc100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "ANY_COND", + .udesc = "Speculative and retired mispredicted macro conditional branches", + .ucode = 0xc100, + .uequiv = "ALL_CONDITIONAL", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "ALL_INDIRECT_JUMP_NON_CALL_RET", .udesc = "All mispredicted indirect branches that are not calls nor returns", .ucode = 0xc400, @@ -187,6 +236,12 @@ static const intel_x86_umask_t hsw_br_misp_retired[]={ .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, + { .uname = "COND", + .udesc = "All mispredicted macro conditional branch instructions", + .ucode = 0x100, + .uequiv = "CONDITIONAL", + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, { .uname = "ALL_BRANCHES", .udesc = "All mispredicted macro branches (architectural event)", .ucode = 0x0, /* architectural encoding */ @@ -654,11 +709,23 @@ static const intel_x86_umask_t hsw_l2_rqsts[]={ .ucode = 0x2200, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "RFO_MISS", + .udesc = "RFO requests that miss L2 cache", + .ucode = 0x2200, + .uequiv = "DEMAND_RFO_MISS", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "DEMAND_RFO_HIT", .udesc = "RFO requests that hit L2 cache", .ucode = 0x4200, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "RFO_HIT", + .udesc = "RFO requests that hit L2 cache", + .ucode = 0x4200, + .uequiv = "DEMAND_RFO_HIT", + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "CODE_RD_MISS", .udesc = "L2 cache misses when fetching instructions", .ucode = 0x2400, commit 92a7428d9153d652456d8728e6c8a67f0d375280 Author: Stephane Eranian Date: Mon May 12 21:19:58 2014 +0200 add missing Haswell cpu_clk_unhalted:ref_p Add missing cpu_clk_unhalted:ref_p as an alias cpu_clk_unhalted:ref_xclk. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index b4e7c00..028f295 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -279,6 +279,12 @@ static const intel_x86_umask_t hsw_cpu_clk_thread_unhalted[]={ .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "REF_P", + .udesc = "Cycles when the core is unhalted (count at 100 Mhz)", + .ucode = 0x100, + .uequiv = "REF_XCLK", + .uflags= INTEL_X86_NCOMBO, + }, { .uname = "THREAD_P", .udesc = "Cycles when thread is not halted", .ucode = 0x000, commit 58e7a3c6b29f121e9eaf0faf35304957e1d48fdf Author: Stephane Eranian Date: Tue Jun 10 21:31:30 2014 +0200 remove duplicated ivbep_unc_pcu::UNC_P_DEMOTIONS_CORE14 Remove duplicated UNC_P_DEMOTIONS_CORE14 Reported-by: James Ralph Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_ivbep_unc_pcu_events.h b/lib/events/intel_ivbep_unc_pcu_events.h index 14188e5..05b0118 100644 --- a/lib/events/intel_ivbep_unc_pcu_events.h +++ b/lib/events/intel_ivbep_unc_pcu_events.h @@ -338,12 +338,6 @@ static const intel_x86_entry_t intel_ivbep_unc_p_pe[]={ .cntmsk = 0xf, .modmsk = IVBEP_UNC_PCU_ATTRS, }, - { .name = "UNC_P_DEMOTIONS_CORE14", - .desc = "Core 14 C State Demotions", - .code = 0x2d, - .cntmsk = 0xf, - .modmsk = IVBEP_UNC_PCU_ATTRS, - }, { .name = "UNC_P_FREQ_BAND0_CYCLES", .desc = "Frequency Residency", .code = 0xb, commit e1d5a2359c6b161a5e5242fe0bfd69f2bce77f2a Author: Gary Mohr Date: Sun Jun 15 09:40:09 2014 +0200 Fix NID filter for SNB-EP Cbox LLC_LOOKUP, LLC_VICTIMS This patch fixes NID management for SNB-EP Cbox LLC_VICTIMS and LLC_LOOKUP events. This patch also adds UNC_C_LLC_LOOKUP:ANY. The validation test suite is updated accordingly. Signed-off-by: Gary Mohr diff --git a/lib/events/intel_snbep_unc_cbo_events.h b/lib/events/intel_snbep_unc_cbo_events.h index ae10b61..430a47c 100644 --- a/lib/events/intel_snbep_unc_cbo_events.h +++ b/lib/events/intel_snbep_unc_cbo_events.h @@ -162,6 +162,12 @@ } static const intel_x86_umask_t snbep_unc_c_llc_lookup[]={ + { .uname = "ANY", + .udesc = "Any request", + .grpid = 0, + .uflags = INTEL_X86_NCOMBO, + .ucode = 0x1f00, + }, { .uname = "DATA_READ", .udesc = "Data read requests", .grpid = 0, @@ -181,7 +187,7 @@ static const intel_x86_umask_t snbep_unc_c_llc_lookup[]={ .ucode = 0x900, }, { .uname = "NID", - .udesc = "Match a given RTID destination NID", + .udesc = "Match a given RTID destination NID (must provide nf=X modifier)", .uflags = INTEL_X86_NCOMBO | INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .grpid = 1, @@ -208,7 +214,9 @@ static const intel_x86_umask_t snbep_unc_c_llc_victims[]={ .ucode = 0x800, }, { .uname = "NID", - .udesc = "Victimized Lines matching the NID filter", + .udesc = "Victimized Lines matching the NID filter (must provide nf=X modifier)", + .uflags = INTEL_X86_NCOMBO, + .umodmsk_req = _SNBEP_UNC_ATTR_NF, .ucode = 0x4000, }, }; @@ -438,7 +446,7 @@ static const intel_x86_umask_t snbep_unc_c_tor_inserts[]={ .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_ALL", - .udesc = "Number of NID-matched transactions inserted into the TOR", + .udesc = "Number of NID-matched transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, @@ -633,9 +641,10 @@ static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ }, { .name = "UNC_C_LLC_VICTIMS", .desc = "Lines victimized", - .modmsk = SNBEP_UNC_CBO_ATTRS, + .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .cntmsk = 0x3, .code = 0x37, + .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_llc_victims), .ngrp = 1, .umasks = snbep_unc_c_llc_victims, diff --git a/tests/validate_x86.c b/tests/validate_x86.c index 2edacd5..b5da6a6 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -1254,6 +1254,14 @@ static const test_event_t x86_test_events[]={ .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:STATE_MESIF:e=0:i=0:t=0:tf=0", }, { SRC_LINE, + .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:ANY", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x1f34, + .codes[1] = 0x7c0000, + .fstr = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:ANY:STATE_MESIF:e=0:i=0:t=0:tf=0", + }, + { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:DATA_READ:nf=1", .ret = PFM_ERR_ATTR, }, @@ -1262,6 +1270,10 @@ static const test_event_t x86_test_events[]={ .ret = PFM_ERR_ATTR, }, { SRC_LINE, + .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, .name = "snbep_unc_cbo0::UNC_C_LLC_LOOKUP:NID:STATE_M", .ret = PFM_ERR_ATTR, }, commit 9d96dfd265971c66ee7de876b3bcbaf1c05d6291 Author: Stephane Eranian Date: Sun Jun 15 09:47:44 2014 +0200 Fix IVB-EP Cbox descrptions for LLC_VICTIMS and LLC_LOOKUP To add explicit language to umasks depending on the nf=X filter. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_ivbep_unc_cbo_events.h b/lib/events/intel_ivbep_unc_cbo_events.h index 318521d..3827e03 100644 --- a/lib/events/intel_ivbep_unc_cbo_events.h +++ b/lib/events/intel_ivbep_unc_cbo_events.h @@ -187,7 +187,7 @@ static const intel_x86_umask_t ivbep_unc_c_llc_lookup[]={ .ucode = 0x1100, }, { .uname = "NID", - .udesc = "Match a given RTID destination NID", + .udesc = "Match a given RTID destination NID (must provide nf=X modifier)", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .grpid = 1, .ucode = 0x4100, @@ -218,7 +218,7 @@ static const intel_x86_umask_t ivbep_unc_c_llc_victims[]={ .grpid = 0, }, { .uname = "NID", - .udesc = "Victimized Lines matching the NID filter", + .udesc = "Victimized Lines matching the NID filter (must provide nf=X modifier)", .ucode = 0x4000, .uflags = INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, @@ -536,7 +536,7 @@ static const intel_x86_umask_t ivbep_unc_c_tor_inserts[]={ .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_ALL", - .udesc = "Number of NID-matched transactions inserted into the TOR", + .udesc = "Number of NID-matched transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, commit 9926c1c21c23c14b0ab496a871e65028aeb103d7 Author: Stephane Eranian Date: Mon Jun 23 22:41:38 2014 +0200 add Intel Haswell CYCLE_ACTIVITY:STALLS_L1D_PENDING Event missing from table. Count the number of stall cycles due to pending L1D load cache misses. See SDM Vol3b. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 028f295..c51120c 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -310,6 +310,12 @@ static const intel_x86_umask_t hsw_cycle_activity[]={ .ucntmsk= 0x4, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "STALL_L1D_PENDING", + .udesc = "Executions stalls due to pending L1D load cache misses", + .ucode = 0x0c00 | (0xc << INTEL_X86_CMASK_BIT), + .ucntmsk= 0x4, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "STALLS_L2_PENDING", .udesc = "Execution stalls due to L2 pending loads (must use with HT off only)", .ucode = 0x0500 | (0x5 << INTEL_X86_CMASK_BIT), commit 69ef1478d2c68ed32b2709fefd1169e1ff93dd7f Author: Stephane Eranian Date: Tue Jul 22 23:06:24 2014 +0200 add ivb::BR_MISP_EXEC:TAKEN_RETURN_NEAR As an alias to ivb::BR_MISP_EXEC:TAKEN_NEAR_RETURN to be consistent with SNB. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h index 4e1b600..dac8163 100644 --- a/lib/events/intel_ivb_events.h +++ b/lib/events/intel_ivb_events.h @@ -170,6 +170,12 @@ static const intel_x86_umask_t ivb_br_misp_exec[]={ .ucode = 0x8800, .uflags= INTEL_X86_NCOMBO, }, + { .uname = "TAKEN_RETURN_NEAR", + .udesc = "All taken mispredicted indirect branches that have a return mnemonic", + .ucode = 0x8800, + .uequiv ="TAKEN_NEAR_RETURN", + .uflags= INTEL_X86_NCOMBO, + }, { .uname = "TAKEN_DIRECT_NEAR_CALL", .udesc = "All taken mispredicted non-indirect calls", .ucode = 0x9000, commit 60cdc9bfae9e9c7d5294010fa135cccc9d014d10 Author: James Ralph Date: Wed Jul 23 09:52:26 2014 +0200 add RAPL support for more Haswell models This patch adds RAPL support for Haswell models: - 60 - 69 - 71 Signed-off-by: James Ralph diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c index 1215731..4eb67ef 100644 --- a/lib/pfmlib_intel_rapl.c +++ b/lib/pfmlib_intel_rapl.c @@ -83,7 +83,10 @@ pfm_rapl_detect(void *this) switch(pfm_intel_x86_cfg.model) { case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ + case 60: /* Haswell */ + case 69: /* Haswell */ case 70: /* Haswell */ + case 71: /* Haswell */ /* already setup by default */ break; case 45: /* Sandy Bridg-EP */ commit 56976f3b1b419fd82016b815f32d62b456e7f121 Author: Gary Mohr Date: Wed Sep 3 12:13:22 2014 +0200 various fixes uncovered by Coverity This patch fixes: - AMD64: broken model detection for model 5 and 4 - common: useless assignment in event parsing code problems identified by running the coverity tool. Signed-off-by: Gary Mohr diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index 31e2181..98f55b8 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -112,10 +112,14 @@ amd64_get_revision(pfm_amd64_config_t *cfg) if (cfg->family == 15) { switch (cfg->model >> 4) { case 0: - if (cfg->model == 5 && cfg->stepping < 2) + if (cfg->model == 5 && cfg->stepping < 2) { rev = PFM_PMU_AMD64_K8_REVB; - if (cfg->model == 4 && cfg->stepping == 0) + break; + } + if (cfg->model == 4 && cfg->stepping == 0) { rev = PFM_PMU_AMD64_K8_REVB; + break; + } rev = PFM_PMU_AMD64_K8_REVC; break; case 1: diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index f4bdb78..fbb420e 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -945,7 +945,7 @@ found_attr: if (type != PFM_ATTR_UMASK && type != PFM_ATTR_RAW_UMASK && !has_val) { if (type != PFM_ATTR_MOD_BOOL) return PFM_ERR_ATTR_VAL; - has_val = 1; s = yes; /* no const */ + s = yes; /* no const */ goto handle_bool; } commit 14b358a6c1f19bb5ab2697e9a76d84a7a8a19b0a Author: Stephane Eranian Date: Thu Sep 11 15:37:33 2014 +0200 update Intel Haswell event table Based on published HSW table from download.01.org: - add missing EPT_* umask to PAGE_WALKER_LOADS - fix counter constraint on PAGE_WALKER_LOADS - add EPT:CYCLES event - add MACHINE_CLEARS:CYCLES - add CYCLE_ACTIVITY:CYCLES_NO_EXECUTE - remove MEM_UOPS_RETIRED:LOCK_STORE Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index c51120c..0b8f524 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -322,6 +322,12 @@ static const intel_x86_umask_t hsw_cycle_activity[]={ .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "CYCLES_NO_EXECUTE", + .udesc = "Cycles during which no instructions were executed in the execution stage of the pipeline", + .ucode = 0x0400 | (0x4 << INTEL_X86_CMASK_BIT), + .ucntmsk= 0xf, + .uflags = INTEL_X86_NCOMBO, + }, }; static const intel_x86_umask_t hsw_dtlb_load_misses[]={ @@ -904,6 +910,11 @@ static const intel_x86_umask_t hsw_longest_lat_cache[]={ }; static const intel_x86_umask_t hsw_machine_clears[]={ + { .uname = "CYCLES", + .udesc = "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes", + .ucode = 0x100, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "MEMORY_ORDERING", .udesc = "Number of Memory Ordering Machine Clears detected", .ucode = 0x200, @@ -1020,11 +1031,6 @@ static const intel_x86_umask_t hsw_mem_uops_retired[]={ .ucode = 0x2100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, - { .uname = "LOCK_STORES", - .udesc = "Store uops with locked access retired", - .ucode = 0x2200, - .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, - }, { .uname = "SPLIT_LOADS", .udesc = "Line-splitted load uops retired", .ucode = 0x4100, @@ -1804,6 +1810,36 @@ static const intel_x86_umask_t hsw_page_walker_loads[]={ .ucode = 0x2400, .uflags= INTEL_X86_NCOMBO, }, + { .uname = "EPT_DTLB_L1", + .udesc = "Number of extended page table walks from the DTLB that hit in the L1D and line fill buffer", + .ucode = 0x4100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_ITLB_L1", + .udesc = "Number of extended page table walks from the ITLB that hit in the L1D and line fill buffer", + .ucode = 0x8100, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_DTLB_L2", + .udesc = "Number of extended page table walks from the DTLB that hit in the L2", + .ucode = 0x4200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_ITLB_L2", + .udesc = "Number of extended page table walks from the ITLB that hit in the L2", + .ucode = 0x8200, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_DTLB_L3", + .udesc = "Number of extended page table walks from the DTLB that hit in the L3", + .ucode = 0x4400, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_ITLB_L3", + .udesc = "Number of extended page table walks from the ITLB that hit in the L3", + .ucode = 0x8400, + .uflags= INTEL_X86_NCOMBO, + }, { .uname = "DTLB_MEMORY", .udesc = "Number of DTLB page walker loads that hit memory", .ucode = 0x1800, @@ -1814,6 +1850,16 @@ static const intel_x86_umask_t hsw_page_walker_loads[]={ .ucode = 0x2800, .uflags= INTEL_X86_NCOMBO, }, + { .uname = "EPT_DTLB_MEMORY", + .udesc = "Number of extended page table walks from the DTLB that hit memory", + .ucode = 0x4800, + .uflags= INTEL_X86_NCOMBO, + }, + { .uname = "EPT_ITLB_MEMORY", + .udesc = "Number of extended page table walks from the ITLB that hit memory", + .ucode = 0x8800, + .uflags= INTEL_X86_NCOMBO, + }, }; static const intel_x86_umask_t hsw_lsd[]={ @@ -1832,6 +1878,14 @@ static const intel_x86_umask_t hsw_dsb2mite_switches[]={ }, }; +static const intel_x86_umask_t hsw_ept[]={ + { .uname = "CYCLES", + .udesc = "Cycles for an extended page table walk", + .ucode = 0x0200, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, + }, +}; + static const intel_x86_entry_t intel_hsw_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", @@ -2412,21 +2466,30 @@ static const intel_x86_entry_t intel_hsw_pe[]={ { .name = "PAGE_WALKER_LOADS", .desc = "Page walker loads", .modmsk = INTEL_V4_ATTRS, - .cntmsk = 0xff, + .cntmsk = 0xf, .code = 0xbc, .numasks = LIBPFM_ARRAY_SIZE(hsw_page_walker_loads), .ngrp = 1, .umasks = hsw_page_walker_loads, }, -{ .name = "DSB2MITE_SWITCHES", - .desc = "Number of DSB to MITE switches", - .modmsk = INTEL_V4_ATTRS, - .cntmsk = 0xff, - .code = 0xab, - .numasks = LIBPFM_ARRAY_SIZE(hsw_dsb2mite_switches), - .ngrp = 1, - .umasks = hsw_dsb2mite_switches, -}, + { .name = "DSB2MITE_SWITCHES", + .desc = "Number of DSB to MITE switches", + .modmsk = INTEL_V4_ATTRS, + .cntmsk = 0xff, + .code = 0xab, + .numasks = LIBPFM_ARRAY_SIZE(hsw_dsb2mite_switches), + .ngrp = 1, + .umasks = hsw_dsb2mite_switches, + }, + { .name = "EPT", + .desc = "Extended page table", + .modmsk = INTEL_V4_ATTRS, + .cntmsk = 0xff, + .code = 0x4f, + .numasks = LIBPFM_ARRAY_SIZE(hsw_ept), + .ngrp = 1, + .umasks = hsw_ept, + }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, commit 98c00b8e690bd0f317a169b57aa0e3a51495768c Author: Stephane Eranian Date: Thu Sep 11 17:30:07 2014 +0200 add support for Intel Haswell-EP This patch adds core PMU support for Intel Haswell-EP processors. Based on information from download.01.org/perfmon/HSX Signed-off-by: Stephane Eranian diff --git a/docs/man3/libpfm_intel_hsw.3 b/docs/man3/libpfm_intel_hsw.3 index fb36acf..f67db2b 100644 --- a/docs/man3/libpfm_intel_hsw.3 +++ b/docs/man3/libpfm_intel_hsw.3 @@ -7,9 +7,11 @@ libpfm_intel_hsw - support for Intel Haswell core PMU .sp .B PMU name: hsw .B PMU desc: Intel Haswell +.B PMU name: hsw_ep +.B PMU desc: Intel Haswell-EP .sp .SH DESCRIPTION -The library supports the Intel Haswell core PMU. It should be noted that +The library supports the Intel Haswell and Haswell-EP core PMU. It should be noted that this PMU model only covers each core's PMU and not the socket level PMU. diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 0169511..43ec178 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -242,6 +242,8 @@ typedef enum { PFM_PMU_INTEL_IVBEP_UNC_R3QPI2, /* Intel IvyBridge-EP R3QPI 2 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IRP, /* Intel IvyBridge-EP IRP uncore */ + PFM_PMU_INTEL_HSW_EP, /* Intel Haswell EP */ + /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index 0b8f524..a17d15f 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -961,6 +961,24 @@ static const intel_x86_umask_t hsw_mem_load_uops_l3_miss_retired[]={ .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, + { .uname = "REMOTE_DRAM", + .udesc = "Number of retired load uops that missed L3 but were service by remote RAM, snoop not needed, snoop miss, snoop hit data not forwarded (Precise Event)", + .ucode = 0x400, + .umodel = PFM_PMU_INTEL_HSW_EP, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "REMOTE_HITM", + .udesc = "Number of retired load uops whose data sources was remote HITM (Precise Event)", + .ucode = 0x1000, + .umodel = PFM_PMU_INTEL_HSW_EP, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "REMOTE_FWD", + .udesc = "Load uops that miss in the L3 whose data source was forwarded from a remote cache (Precise Event)", + .ucode = 0x2000, + .umodel = PFM_PMU_INTEL_HSW_EP, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, }; static const intel_x86_umask_t hsw_mem_load_uops_retired[]={ @@ -1500,34 +1518,157 @@ static const intel_x86_umask_t hsw_offcore_response[]={ { .uname = "LLC_HITM", .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", .ucode = 1ULL << (18+8), + .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, { .uname = "LLC_HITE", .udesc = "Supplier: counts L3 hits in E-state", .ucode = 1ULL << (19+8), + .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, { .uname = "LLC_HITS", .udesc = "Supplier: counts L3 hits in S-state", .ucode = 1ULL << (20+8), + .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, { .uname = "LLC_HITF", .udesc = "Supplier: counts L3 hits in F-state", .ucode = 1ULL << (21+8), - .grpid = 1, - }, - { .uname = "LLC_MISS_LOCAL", - .udesc = "Supplier: counts L3 misses to local DRAM", - .ucode = 1ULL << (22+8), + .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, { .uname = "LLC_HITMESF", .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", .ucode = 0xfULL << (18+8), .uequiv = "LLC_HITM:LLC_HITE:LLC_HITS:LLC_HITF", + .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, + { .uname = "LLC_HIT", + .udesc = "Alias for LLC_HITMESF", + .ucode = 0xfULL << (18+8), + .uequiv = "LLC_HITM:LLC_HITE:LLC_HITS:LLC_HITF", + .umodel = PFM_PMU_INTEL_HSW, + .grpid = 1, + }, + { .uname = "LLC_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (22+8), + .umodel = PFM_PMU_INTEL_HSW, + .grpid = 1, + }, + { .uname = "L3_HITM", + .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", + .ucode = 1ULL << (18+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_HITE", + .udesc = "Supplier: counts L3 hits in E-state", + .ucode = 1ULL << (19+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_HITS", + .udesc = "Supplier: counts L3 hits in S-state", + .ucode = 1ULL << (20+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_HITF", + .udesc = "Supplier: counts L3 hits in F-state", + .ucode = 1ULL << (21+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_HIT", + .udesc = "Supplier: counts L3 hits in M, E, S, F state", + .ucode = 0xfULL << (18+8), + .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L4_HIT_LOCAL_L4", + .udesc = "Supplier: counts L4 hits to local L4 cache", + .ucode = 0x1ULL << (22+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L4_HIT_REMOTE_HOP0_L4", + .udesc = "Supplier: counts L4 hits to remote L4 cache with 0 hop", + .ucode = 0x1ULL << (23+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L4_HIT_REMOTE_HOP1_L4", + .udesc = "Supplier: counts L4 hits to remote L4 cache with 1 hop", + .ucode = 0x1ULL << (24+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L4_HIT_REMOTE_HOP2P_L4", + .udesc = "Supplier: counts L4 hits to remote L4 cache with 2P hops", + .ucode = 0x1ULL << (25+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L4_HIT", + .udesc = "Supplier: counts L4 hits", + .ucode = 0xfULL << (22+8), + .uequiv = "L4_HIT_LOCAL_L4:L4_HIT_REMOTE_HOP0_L4:L4_HIT_REMOTE_HOP1_L4:L4_HIT_REMOTE_HOP2P_L4", + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "LLC_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (22+8), + .umodel = PFM_PMU_INTEL_HSW, + .grpid = 1, + }, + { .uname = "L3_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (26+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_HOP0_DRAM", + .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", + .ucode = 0x1ULL << (27+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_HOP1_DRAM", + .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", + .ucode = 0x1ULL << (28+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_HOP2P_DRAM", + .udesc = "Supplier: counts L3 misses to remote DRAM with 2P hops", + .ucode = 0x1ULL << (29+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS", + .udesc = "Supplier: counts L3 misses to local or remote DRAM", + .ucode = 0xfULL << (26+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_DRAM", + .udesc = "Supplier: counts L3 misses to remote DRAM", + .ucode = 0x3ULL << (27+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, + { .uname = "SPL_HIT", + .udesc = "Supplier: counts L3 misses to remote DRAM", + .ucode = 0x1ULL << (30+8), + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, { .uname = "SNP_NONE", .udesc = "Snoop: counts number of times no snoop-related information is available", .ucode = 1ULL << (31+8), diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index fbb420e..81903ff 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -98,6 +98,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &intel_ivb_unc_cbo3_support, &intel_ivb_ep_support, &intel_hsw_support, + &intel_hsw_ep_support, &intel_rapl_support, &intel_snbep_unc_cb0_support, &intel_snbep_unc_cb1_support, diff --git a/lib/pfmlib_intel_hsw.c b/lib/pfmlib_intel_hsw.c index f4975f6..1feae63 100644 --- a/lib/pfmlib_intel_hsw.c +++ b/lib/pfmlib_intel_hsw.c @@ -40,7 +40,6 @@ pfm_hsw_detect(void *this) switch (pfm_intel_x86_cfg.model) { case 60: /* Haswell */ - case 63: /* Haswell */ case 69: /* Haswell */ case 70: /* Haswell */ case 71: /* Haswell */ @@ -52,6 +51,27 @@ pfm_hsw_detect(void *this) } static int +pfm_hsw_ep_detect(void *this) +{ + int ret; + + ret = pfm_intel_x86_detect(); + if (ret != PFM_SUCCESS) + return ret; + + if (pfm_intel_x86_cfg.family != 6) + return PFM_ERR_NOTSUPP; + + switch (pfm_intel_x86_cfg.model) { + case 63: /* Haswell EP */ + break; + default: + return PFM_ERR_NOTSUPP; + } + return PFM_SUCCESS; +} + +static int pfm_hsw_init(void *this) { pfm_intel_x86_cfg.arch_version = 4; @@ -86,3 +106,32 @@ pfmlib_pmu_t intel_hsw_support={ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; + +pfmlib_pmu_t intel_hsw_ep_support={ + .desc = "Intel Haswell EP", + .name = "hsw_ep", + .pmu = PFM_PMU_INTEL_HSW_EP, + .pme_count = LIBPFM_ARRAY_SIZE(intel_hsw_pe), + .type = PFM_PMU_TYPE_CORE, + .supported_plm = INTEL_X86_PLM, + .num_cntrs = 8, /* consider with HT off by default */ + .num_fixed_cntrs = 3, + .max_encoding = 2, /* offcore_response */ + .pe = intel_hsw_pe, + .atdesc = intel_x86_mods, + .flags = PFMLIB_PMU_FL_RAW_UMASK + | INTEL_X86_PMU_FL_ECMASK, + .pmu_detect = pfm_hsw_ep_detect, + .pmu_init = pfm_hsw_init, + .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, + PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), + .get_event_first = pfm_intel_x86_get_event_first, + .get_event_next = pfm_intel_x86_get_event_next, + .event_is_valid = pfm_intel_x86_event_is_valid, + .validate_table = pfm_intel_x86_validate_table, + .get_event_info = pfm_intel_x86_get_event_info, + .get_event_attr_info = pfm_intel_x86_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, + .can_auto_encode = pfm_intel_x86_can_auto_encode, +}; diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index aa974e0..129a815 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -246,6 +246,7 @@ extern pfmlib_pmu_t intel_ivb_unc_cbo2_support; extern pfmlib_pmu_t intel_ivb_unc_cbo3_support; extern pfmlib_pmu_t intel_ivb_ep_support; extern pfmlib_pmu_t intel_hsw_support; +extern pfmlib_pmu_t intel_hsw_ep_support; extern pfmlib_pmu_t intel_rapl_support; extern pfmlib_pmu_t intel_snbep_unc_cb0_support; extern pfmlib_pmu_t intel_snbep_unc_cb1_support; diff --git a/tests/validate_x86.c b/tests/validate_x86.c index b5da6a6..743cc14 100644 --- a/tests/validate_x86.c +++ b/tests/validate_x86.c @@ -2702,8 +2702,110 @@ static const test_event_t x86_test_events[]={ .codes[0] = 0x25, .fstr = "ivbep_unc_r3qpi1::UNC_R3_TXR_CYCLES_FULL:e=0:t=0", }, + { SRC_LINE, + .name = "hsw_ep::mem_trans_retired:latency_above_threshold:ldlat=3:u", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5101cd, + .codes[1] = 3, + .fstr = "hsw_ep::MEM_TRANS_RETIRED:LOAD_LATENCY:k=0:u=1:e=0:i=0:c=0:t=0:ldlat=3:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::mem_trans_retired:latency_above_threshold:ldlat=1000000", + .ret = PFM_ERR_ATTR_VAL, + }, + { SRC_LINE, + .name = "hsw_ep::mem_trans_retired:load_latency", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301cd, + .codes[1] = 3, + .fstr = "hsw_ep::MEM_TRANS_RETIRED:LOAD_LATENCY:k=1:u=1:e=0:i=0:c=0:t=0:ldlat=3:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::mem_trans_retired:load_latency:ldlat=1000000", + .ret = PFM_ERR_ATTR_VAL, + }, + { SRC_LINE, + .name = "hsw_ep::mem_trans_retired:latency_above_threshold:ldlat=2:intx=0:intxcp=0", + .ret = PFM_ERR_ATTR_VAL, + }, + { SRC_LINE, + .name = "hsw_ep::inst_Retired:any_p:intx", + .count = 1, + .codes[0] = 0x1005300c0, + .fstr = "hsw_ep::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=1:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::inst_Retired:any_p:intx:intxcp", + .count = 1, + .codes[0] = 0x3005300c0, + .fstr = "hsw_ep::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=1:intxcp=1", + }, + { SRC_LINE, + .name = "hsw_ep::inst_Retired:any_p:intx=0:intxcp", + .count = 1, + .codes[0] = 0x2005300c0, + .fstr = "hsw_ep::INST_RETIRED:ANY_P:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=1", + }, + { SRC_LINE, + .name = "hsw_ep::cycle_activity:cycles_l2_pending", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x15301a3, + .fstr = "hsw_ep::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::cycle_activity:cycles_l2_pending:c=8", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "hsw_ep::hle_retired:aborted", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x5304c8, + .fstr = "hsw_ep::HLE_RETIRED:ABORTED:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::mem_load_uops_l3_miss_retired:remote_dram", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x5304d3, + .fstr = "hsw_ep::MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::offcore_response_0:any_data:L3_miss_local", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1] = 0x3f84000091ull, + .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_LLC_DATA_RD:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::offcore_response_0:any_data:LLC_miss_local", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "hsw_ep::offcore_response_0:any_data:LLC_miss_remote", + .ret = PFM_ERR_ATTR, + }, + { SRC_LINE, + .name = "hsw_ep::offcore_response_0:any_data:L3_HIT", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] =0x5301b7, + .codes[1] =0x3f803c0091ull, + .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_LLC_DATA_RD:L3_HITM:L3_HITE:L3_HITS:L3_HITF:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "hsw_ep::offcore_response_0:any_data:L4_HIT", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, + .codes[1] =0x3f83c00091ull, + .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_LLC_DATA_RD:L4_HIT_LOCAL_L4:L4_HIT_REMOTE_HOP0_L4:L4_HIT_REMOTE_HOP1_L4:L4_HIT_REMOTE_HOP2P_L4:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, }; - #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) static int commit 70ecfb02bf0e8c94b6ee5694626ca269bfbed4d2 Author: Stephane Eranian Date: Thu Sep 11 17:50:32 2014 +0200 remove duplicated offcore_Response:LLC_MISS_LOCAL from Haswell offcore The umask was present twice in the event table. Signed-off-by: Stephane Eranian diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h index a17d15f..0087f01 100644 --- a/lib/events/intel_hsw_events.h +++ b/lib/events/intel_hsw_events.h @@ -1553,12 +1553,6 @@ static const intel_x86_umask_t hsw_offcore_response[]={ .umodel = PFM_PMU_INTEL_HSW, .grpid = 1, }, - { .uname = "LLC_MISS_LOCAL", - .udesc = "Supplier: counts L3 misses to local DRAM", - .ucode = 1ULL << (22+8), - .umodel = PFM_PMU_INTEL_HSW, - .grpid = 1, - }, { .uname = "L3_HITM", .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", .ucode = 1ULL << (18+8),