diff --git a/SOURCES/libpfm-rhbz1440249.patch b/SOURCES/libpfm-rhbz1440249.patch new file mode 100644 index 0000000..9326b02 --- /dev/null +++ b/SOURCES/libpfm-rhbz1440249.patch @@ -0,0 +1,1421 @@ +From b9709a7866498a84dc4ab60fb006631569bedbf0 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 22:48:31 -0700 +Subject: [PATCH 1/7] Revert "fix struct validation for pfm_event_attr_info_t" + +This reverts commit 06b296c72838be44d8950dc03227fe0dc8ca1fb1. + +Break ABI compatibility from 4.7 to 4.8. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 5 ++--- + tests/validate.c | 3 +-- + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index 0e370ba50318..d9be4453accf 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -490,7 +490,6 @@ typedef struct { + size_t size; /* struct sizeof */ + uint64_t code; /* attribute code */ + pfm_attr_t type; /* attribute type */ +- int pad; /* padding */ + uint64_t idx; /* attribute opaque index */ + pfm_attr_ctrl_t ctrl; /* what is providing attr */ + struct { +@@ -520,13 +519,13 @@ typedef struct { + #if __WORDSIZE == 64 + #define PFM_PMU_INFO_ABI0 56 + #define PFM_EVENT_INFO_ABI0 64 +-#define PFM_ATTR_INFO_ABI0 72 ++#define PFM_ATTR_INFO_ABI0 64 + + #define PFM_RAW_ENCODE_ABI0 32 + #else + #define PFM_PMU_INFO_ABI0 44 + #define PFM_EVENT_INFO_ABI0 48 +-#define PFM_ATTR_INFO_ABI0 56 ++#define PFM_ATTR_INFO_ABI0 48 + + #define PFM_RAW_ENCODE_ABI0 20 + #endif +diff --git a/tests/validate.c b/tests/validate.c +index 0da0adc4995a..522a6ab7140d 100644 +--- a/tests/validate.c ++++ b/tests/validate.c +@@ -201,7 +201,6 @@ static const struct_desc_t pfmlib_structs[]={ + FIELD(code, pfm_event_attr_info_t), + FIELD(type, pfm_event_attr_info_t), + FIELD(idx, pfm_event_attr_info_t), +- FIELD(pad, pfm_event_attr_info_t), /* padding */ + FIELD(ctrl, pfm_event_attr_info_t), + LAST_FIELD + }, +@@ -271,7 +270,7 @@ validate_structs(void) + } + + if (sz != d->sz) { +- printf("Failed (invisible padding of %zu bytes, total struct size %zu bytes)\n", d->sz - sz, d->sz); ++ printf("Failed (invisible padding of %zu bytes)\n", d->sz - sz); + errors++; + continue; + } +-- +2.7.4 + +From 01c24ef2c781c614544eeb5ce3922313118e3053 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 22:49:18 -0700 +Subject: [PATCH 2/7] Revert "Fix pfmlib_parse_event_attr() parsing of raw + umask for 32-bit" + +This reverts commit bfb9baf1c8a9533fde271d0436ecd465934dfa17. + +support for 32-bit umask as implemented breaks ABI between 4.7 and 4.8. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_common.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index cff4d2ecbd2c..c88e2aaae274 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -1011,10 +1011,10 @@ pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) + ainfo->name = "RAW_UMASK"; + ainfo->type = PFM_ATTR_RAW_UMASK; + ainfo->ctrl = PFM_ATTR_CTRL_PMU; +- ainfo->idx = strtoull(s, &endptr, 0); ++ ainfo->idx = strtoul(s, &endptr, 0); + ainfo->equiv= NULL; + if (*endptr) { +- DPRINT("raw umask (%s) is not a number\n", s); ++ DPRINT("raw umask (%s) is not a number\n"); + return PFM_ERR_ATTR; + } + +@@ -1368,9 +1368,9 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) + for (i = 0; i < d->nattrs; i++) { + pfm_event_attr_info_t *a = attr(d, i); + if (a->type != PFM_ATTR_RAW_UMASK) +- DPRINT("%d %d %"PRIu64" %s\n", d->event, i, a->idx, d->pattrs[d->attrs[i].id].name); ++ DPRINT("%d %d %d %s\n", d->event, i, a->idx, d->pattrs[d->attrs[i].id].name); + else +- DPRINT("%d %d RAW_UMASK (0x%"PRIx64")\n", d->event, i, a->idx); ++ DPRINT("%d %d RAW_UMASK (0x%x)\n", d->event, i, a->idx); + } + error: + free(str); +-- +2.7.4 + +From e206315c36e39409b7fc1e4cdd72caa5040b45c4 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 22:52:22 -0700 +Subject: [PATCH 3/7] Revert "Allow raw umask for OFFCORE_RESPONSE on Intel + core PMUs" + +This reverts commit 4dc4c6ada254f30eee8cd2ae27bb0869a111b613. + +32-bit raw umask support break ABI between 4.7 and 4.8, so remove +for now. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 4 +- + lib/pfmlib_intel_x86.c | 16 ++-- + tests/validate_x86.c | 232 ----------------------------------------------- + 3 files changed, 9 insertions(+), 243 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index d9be4453accf..6904c1c79b68 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -490,8 +490,8 @@ typedef struct { + size_t size; /* struct sizeof */ + uint64_t code; /* attribute code */ + pfm_attr_t type; /* attribute type */ +- uint64_t idx; /* attribute opaque index */ +- pfm_attr_ctrl_t ctrl; /* what is providing attr */ ++ int idx; /* attribute opaque index */ ++ pfm_attr_ctrl_t ctrl; /* what is providing attr */ + struct { + unsigned int is_dfl:1; /* is default umask */ + unsigned int is_precise:1; /* Intel X86: supports PEBS */ +diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c +index b698144f1da4..497cf1b9246a 100644 +--- a/lib/pfmlib_intel_x86.c ++++ b/lib/pfmlib_intel_x86.c +@@ -481,18 +481,16 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + reg.sel_event_select = last_ucode; + } + } else if (a->type == PFM_ATTR_RAW_UMASK) { +- uint64_t rmask; ++ + /* there can only be one RAW_UMASK per event */ +- if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) { +- rmask = (1ULL << 38) - 1; +- } else { +- rmask = 0xff; +- } +- if (a->idx & ~rmask) { +- DPRINT("raw umask is too wide\n"); ++ ++ /* sanity check */ ++ if (a->idx & ~0xff) { ++ DPRINT("raw umask is 8-bit wide\n"); + return PFM_ERR_ATTR; + } +- umask2 = a->idx & rmask; ++ /* override umask */ ++ umask2 = a->idx & 0xff; + ugrpmsk = grpmsk; + } else { + uint64_t ival = e->attrs[k].ival; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 790ba585d8e7..906afba636e1 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4057,238 +4057,6 @@ static const test_event_t x86_test_events[]={ + .fstr = "hsw::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:c=1:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +- .name = "wsm::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "wsm::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "wsm::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "wsm::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "wsm::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "snb::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "snb::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "hsw::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "hsw::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_0:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xf, +- .fstr = "skl::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_0:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffffull, +- .fstr = "skl::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_0:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "wsm::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "wsm::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "wsm::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_1:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xf, +- .fstr = "snb::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "snb::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "snb::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_1:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xf, +- .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", +- }, +- { SRC_LINE, +- .name = "ivb_ep::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_1:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xf, +- .fstr = "hsw::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "hsw::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "hsw::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_1:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xf, +- .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "bdw_ep::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_1:0xf", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xf, +- .fstr = "skl::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_1:0xfffffffff", +- .ret = PFM_SUCCESS, +- .count = 2, +- .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffffull, +- .fstr = "skl::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", +- }, +- { SRC_LINE, +- .name = "skl::offcore_response_1:0x7fffffffff", +- .ret = PFM_ERR_ATTR, +- }, +- { SRC_LINE, + .name = "glm::offcore_response_1:any_request", + .ret = PFM_SUCCESS, + .count = 2, +-- +2.7.4 + +From 1e01aa2112461ecb67ddc58750316cadd19a8612 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 22:55:16 -0700 +Subject: [PATCH 4/7] improve error message in validate.c + +Add more detailed info in czase of size mismatch. + +Signed-off-by: Stephane Eranian +--- + tests/validate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/validate.c b/tests/validate.c +index 522a6ab7140d..e4a8025f3f14 100644 +--- a/tests/validate.c ++++ b/tests/validate.c +@@ -270,7 +270,7 @@ validate_structs(void) + } + + if (sz != d->sz) { +- printf("Failed (invisible padding of %zu bytes)\n", d->sz - sz); ++ printf("Failed (invisible padding of %zu bytes, total struct size %zu bytes)\n", d->sz - sz, d->sz); + errors++; + continue; + } +-- +2.7.4 + +From 321133e1486084ea2b1494bc67b38ee085b31f71 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 23:32:50 -0700 +Subject: [PATCH 5/7] create internal type for perf_event_attr_info_t + +This patch creates an internal version of the ABI +pfm_event_attr_info structure called pfmlib_event_attr_info_t. +The advantage is that we can change the internal version without +ABI changes. The new struct is just a clone of the external version. +But it can be customized for internal needs. + +The pfm_get_event_attr_info() converts the internal version into +the external version. + +This patch changes internal interface to use pfmlib_event_attr_info_t +for all architectures. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_amd64.c | 4 ++-- + lib/pfmlib_amd64_priv.h | 2 +- + lib/pfmlib_arm.c | 4 ++-- + lib/pfmlib_arm_priv.h | 2 +- + lib/pfmlib_common.c | 32 ++++++++++++++++++++------------ + lib/pfmlib_intel_netburst.c | 4 ++-- + lib/pfmlib_intel_nhm_unc.c | 2 +- + lib/pfmlib_intel_snbep_unc.c | 4 ++-- + lib/pfmlib_intel_snbep_unc_priv.h | 2 +- + lib/pfmlib_intel_x86.c | 10 +++++----- + lib/pfmlib_intel_x86_perf_event.c | 6 +++--- + lib/pfmlib_intel_x86_priv.h | 2 +- + lib/pfmlib_mips.c | 4 ++-- + lib/pfmlib_mips_priv.h | 2 +- + lib/pfmlib_perf_event.c | 4 ++-- + lib/pfmlib_perf_event_pmu.c | 6 +++--- + lib/pfmlib_perf_event_raw.c | 2 +- + lib/pfmlib_power_priv.h | 2 +- + lib/pfmlib_powerpc.c | 2 +- + lib/pfmlib_priv.h | 26 ++++++++++++++++++++++++-- + lib/pfmlib_sparc.c | 4 ++-- + lib/pfmlib_sparc_priv.h | 2 +- + lib/pfmlib_torrent.c | 2 +- + 23 files changed, 80 insertions(+), 50 deletions(-) + +diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c +index 13838040b55a..be2a4ef86faf 100644 +--- a/lib/pfmlib_amd64.c ++++ b/lib/pfmlib_amd64.c +@@ -426,7 +426,7 @@ pfm_amd64_get_encoding(void *this, pfmlib_event_desc_t *e) + { + const amd64_entry_t *pe = this_pe(this); + pfm_amd64_reg_t reg; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + uint64_t umask = 0; + unsigned int plmmsk = 0; + int k, ret, grpid; +@@ -661,7 +661,7 @@ pfm_amd64_event_is_valid(void *this, int pidx) + } + + int +-pfm_amd64_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_amd64_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + const amd64_entry_t *pe = this_pe(this); + int numasks, idx; +diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h +index 66ca49ef1b1d..c3caae514f52 100644 +--- a/lib/pfmlib_amd64_priv.h ++++ b/lib/pfmlib_amd64_priv.h +@@ -202,7 +202,7 @@ extern int pfm_amd64_get_encoding(void *this, pfmlib_event_desc_t *e); + extern int pfm_amd64_get_event_first(void *this); + extern int pfm_amd64_get_event_next(void *this, int idx); + extern int pfm_amd64_event_is_valid(void *this, int idx); +-extern int pfm_amd64_get_event_attr_info(void *this, int idx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_amd64_get_event_attr_info(void *this, int idx, int attr_idx, pfmlib_event_attr_info_t *info); + extern int pfm_amd64_get_event_info(void *this, int idx, pfm_event_info_t *info); + extern int pfm_amd64_validate_table(void *this, FILE *fp); + extern int pfm_amd64_detect(void *this); +diff --git a/lib/pfmlib_arm.c b/lib/pfmlib_arm.c +index a49ca4504644..91c35c670ebe 100644 +--- a/lib/pfmlib_arm.c ++++ b/lib/pfmlib_arm.c +@@ -180,7 +180,7 @@ pfm_arm_get_encoding(void *this, pfmlib_event_desc_t *e) + { + + const arm_entry_t *pe = this_pe(this); +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + pfm_arm_reg_t reg; + unsigned int plm = 0; + int i, idx, has_plm = 0; +@@ -305,7 +305,7 @@ pfm_arm_validate_table(void *this, FILE *fp) + } + + int +-pfm_arm_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_arm_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + int idx; + +diff --git a/lib/pfmlib_arm_priv.h b/lib/pfmlib_arm_priv.h +index 81a9df9afdc7..4fc2e74955e4 100644 +--- a/lib/pfmlib_arm_priv.h ++++ b/lib/pfmlib_arm_priv.h +@@ -66,7 +66,7 @@ extern int pfm_arm_get_event_first(void *this); + extern int pfm_arm_get_event_next(void *this, int idx); + extern int pfm_arm_event_is_valid(void *this, int pidx); + extern int pfm_arm_validate_table(void *this, FILE *fp); +-extern int pfm_arm_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_arm_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); + extern int pfm_arm_get_event_info(void *this, int idx, pfm_event_info_t *info); + extern unsigned int pfm_arm_get_event_nattrs(void *this, int pidx); + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index c88e2aaae274..f3c6dfa23e55 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -504,7 +504,7 @@ pfmlib_compact_attrs(pfmlib_event_desc_t *e, int i) + static inline int + pfmlib_same_attr(pfmlib_event_desc_t *d, int i, int j) + { +- pfm_event_attr_info_t *a1, *a2; ++ pfmlib_event_attr_info_t *a1, *a2; + pfmlib_attr_t *b1, *b2; + + a1 = attr(d, i); +@@ -967,7 +967,7 @@ pfmlib_sanitize_event(pfmlib_event_desc_t *d) + static int + pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) + { +- pfm_event_attr_info_t *ainfo; ++ pfmlib_event_attr_info_t *ainfo; + char *s, *p, *q, *endptr; + char yes[2] = "y"; + pfm_attr_t type; +@@ -1366,7 +1366,7 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) + ret = pfmlib_sanitize_event(d); + + for (i = 0; i < d->nattrs; i++) { +- pfm_event_attr_info_t *a = attr(d, i); ++ pfmlib_event_attr_info_t *a = attr(d, i); + if (a->type != PFM_ATTR_RAW_UMASK) + DPRINT("%d %d %d %s\n", d->event, i, a->idx, d->pattrs[d->attrs[i].id].name); + else +@@ -1549,7 +1549,7 @@ static int + pfmlib_pmu_validate_encoding(pfmlib_pmu_t *pmu, FILE *fp) + { + pfm_event_info_t einfo; +- pfm_event_attr_info_t ainfo; ++ pfmlib_event_attr_info_t ainfo; + char *buf; + size_t maxlen = 0, len; + int i, u, n = 0, um; +@@ -1811,7 +1811,7 @@ pfm_get_event_info(int idx, pfm_os_t os, pfm_event_info_t *uinfo) + int + pfm_get_event_attr_info(int idx, int attr_idx, pfm_os_t os, pfm_event_attr_info_t *uinfo) + { +- pfm_event_attr_info_t info; ++ pfmlib_event_attr_info_t info; + pfmlib_event_desc_t e; + pfmlib_pmu_t *pmu; + size_t sz = sizeof(info); +@@ -1857,17 +1857,25 @@ pfm_get_event_attr_info(int idx, int attr_idx, pfm_os_t os, pfm_event_attr_info_ + info = e.pattrs[attr_idx]; + + /* +- * rewrite size to reflect what we are returning +- */ +- info.size = sz; +- /* + * info.idx = private, namespace specific index, + * should not be visible externally, so override + * with public index ++ * ++ * cannot memcpy() info into uinfo as they do not ++ * have the same size, cf. idx field (uint64 vs, uint32) + */ +- info.idx = attr_idx; +- +- memcpy(uinfo, &info, sz); ++ uinfo->name = info.name; ++ uinfo->desc = info.desc; ++ uinfo->equiv = info.equiv; ++ uinfo->size = sz; ++ uinfo->code = info.code; ++ uinfo->type = info.type; ++ uinfo->idx = attr_idx; ++ uinfo->ctrl = info.ctrl; ++ uinfo->is_dfl= info.is_dfl; ++ uinfo->is_precise = info.is_precise; ++ uinfo->reserved_bits = 0; ++ uinfo->dfl_val64 = info.dfl_val64; + + ret = PFM_SUCCESS; + error: +diff --git a/lib/pfmlib_intel_netburst.c b/lib/pfmlib_intel_netburst.c +index 9d8f22b7705d..9b4960583523 100644 +--- a/lib/pfmlib_intel_netburst.c ++++ b/lib/pfmlib_intel_netburst.c +@@ -110,7 +110,7 @@ netburst_add_defaults(pfmlib_event_desc_t *e, unsigned int *evmask) + int + pfm_netburst_get_encoding(void *this, pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + netburst_escr_value_t escr; + netburst_cccr_value_t cccr; + unsigned int evmask = 0; +@@ -322,7 +322,7 @@ pfm_netburst_event_is_valid(void *this, int pidx) + } + + static int +-pfm_netburst_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_netburst_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + const netburst_entry_t *pe = this_pe(this); + int numasks, idx; +diff --git a/lib/pfmlib_intel_nhm_unc.c b/lib/pfmlib_intel_nhm_unc.c +index 4c27b070f2d6..6731f4045332 100644 +--- a/lib/pfmlib_intel_nhm_unc.c ++++ b/lib/pfmlib_intel_nhm_unc.c +@@ -82,7 +82,7 @@ static int + pfm_nhm_unc_get_encoding(void *this, pfmlib_event_desc_t *e) + { + pfm_intel_x86_reg_t reg; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + const intel_x86_entry_t *pe = this_pe(this); + unsigned int grpmsk, ugrpmsk = 0; + int umodmsk = 0, modmsk_r = 0; +diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c +index 075ae33b3a57..1e80147fc1a3 100644 +--- a/lib/pfmlib_intel_snbep_unc.c ++++ b/lib/pfmlib_intel_snbep_unc.c +@@ -281,7 +281,7 @@ pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) + pfm_snbep_unc_reg_t reg; + pfm_snbep_unc_reg_t filters[INTEL_X86_MAX_FILTERS]; + pfm_snbep_unc_reg_t addr; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + uint64_t val, umask1, umask2; + int k, ret; + int has_cbo_tid = 0; +@@ -641,7 +641,7 @@ pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx) + } + + int +-pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + const intel_x86_entry_t *pe = this_pe(this); + const pfmlib_attr_desc_t *atdesc = this_atdesc(this); +diff --git a/lib/pfmlib_intel_snbep_unc_priv.h b/lib/pfmlib_intel_snbep_unc_priv.h +index 500ff84cc123..4984242c35bb 100644 +--- a/lib/pfmlib_intel_snbep_unc_priv.h ++++ b/lib/pfmlib_intel_snbep_unc_priv.h +@@ -329,7 +329,7 @@ extern int pfm_intel_hswep_unc_detect(void *this); + extern int pfm_intel_knl_unc_detect(void *this); + extern int pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); + extern int pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx); +-extern int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); + + static inline int + is_cbo_filt_event(void *this, pfm_intel_x86_reg_t reg) +diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c +index 497cf1b9246a..09a0f50a3a4e 100644 +--- a/lib/pfmlib_intel_x86.c ++++ b/lib/pfmlib_intel_x86.c +@@ -296,7 +296,7 @@ static int + intel_x86_check_pebs(void *this, pfmlib_event_desc_t *e) + { + const intel_x86_entry_t *pe = this_pe(this); +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int numasks = 0, pebs = 0; + int i; + +@@ -340,7 +340,7 @@ static int + intel_x86_check_max_grpid(void *this, pfmlib_event_desc_t *e, int max_grpid) + { + const intel_x86_entry_t *pe; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i, grpid; + + DPRINT("check: max_grpid=%d\n", max_grpid); +@@ -366,7 +366,7 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + + { + pfmlib_pmu_t *pmu = this; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + const intel_x86_entry_t *pe; + pfm_intel_x86_reg_t reg, reg2; + unsigned int grpmsk, ugrpmsk = 0; +@@ -964,7 +964,7 @@ pfm_intel_x86_validate_table(void *this, FILE *fp) + } + + int +-pfm_intel_x86_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_intel_x86_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + const intel_x86_entry_t *pe = this_pe(this); + const pfmlib_attr_desc_t *atdesc = this_atdesc(this); +@@ -1029,7 +1029,7 @@ pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info) + int + pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i, npebs = 0, numasks = 0; + + /* first check at the event level */ +diff --git a/lib/pfmlib_intel_x86_perf_event.c b/lib/pfmlib_intel_x86_perf_event.c +index f346d4f92be5..0735ef9d88c1 100644 +--- a/lib/pfmlib_intel_x86_perf_event.c ++++ b/lib/pfmlib_intel_x86_perf_event.c +@@ -60,7 +60,7 @@ find_pmu_type_by_name(const char *name) + static int + has_ldlat(void *this, pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i; + + for (i = 0; i < e->nattrs; i++) { +@@ -217,7 +217,7 @@ pfm_intel_nhm_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) + int + pfm_intel_x86_requesting_pebs(pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i; + + for (i = 0; i < e->nattrs; i++) { +@@ -233,7 +233,7 @@ pfm_intel_x86_requesting_pebs(pfmlib_event_desc_t *e) + static int + intel_x86_event_has_pebs(void *this, pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i; + + /* first check at the event level */ +diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h +index 963b41a8a766..e2dfbf3d9b40 100644 +--- a/lib/pfmlib_intel_x86_priv.h ++++ b/lib/pfmlib_intel_x86_priv.h +@@ -335,7 +335,7 @@ extern int pfm_intel_x86_get_event_next(void *this, int idx); + extern int pfm_intel_x86_get_event_umask_first(void *this, int idx); + extern int pfm_intel_x86_get_event_umask_next(void *this, int idx, int attr); + extern int pfm_intel_x86_validate_table(void *this, FILE *fp); +-extern int pfm_intel_x86_get_event_attr_info(void *this, int idx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_intel_x86_get_event_attr_info(void *this, int idx, int attr_idx, pfmlib_event_attr_info_t *info); + extern int pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info); + extern int pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e); + extern int pfm_intel_x86_perf_event_encoding(pfmlib_event_desc_t *e, void *data); +diff --git a/lib/pfmlib_mips.c b/lib/pfmlib_mips.c +index 8357ea515045..61db613be433 100644 +--- a/lib/pfmlib_mips.c ++++ b/lib/pfmlib_mips.c +@@ -174,7 +174,7 @@ pfm_mips_get_encoding(void *this, pfmlib_event_desc_t *e) + + pfmlib_pmu_t *pmu = this; + const mips_entry_t *pe = this_pe(this); +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + pfm_mips_sel_reg_t reg; + uint64_t ival, cntmask = 0; + int plmmsk = 0, code; +@@ -333,7 +333,7 @@ pfm_mips_get_event_nattrs(void *this, int pidx) + } + + int +-pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + /* no umasks, so all attrs are modifiers */ + +diff --git a/lib/pfmlib_mips_priv.h b/lib/pfmlib_mips_priv.h +index c5112f510acf..1ed2bcba28c8 100644 +--- a/lib/pfmlib_mips_priv.h ++++ b/lib/pfmlib_mips_priv.h +@@ -107,7 +107,7 @@ extern int pfm_mips_get_event_first(void *this); + extern int pfm_mips_get_event_next(void *this, int idx); + extern int pfm_mips_event_is_valid(void *this, int pidx); + extern int pfm_mips_validate_table(void *this, FILE *fp); +-extern int pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); + extern int pfm_mips_get_event_info(void *this, int idx, pfm_event_info_t *info); + extern unsigned int pfm_mips_get_event_nattrs(void *this, int pidx); + +diff --git a/lib/pfmlib_perf_event.c b/lib/pfmlib_perf_event.c +index 8618d6070968..df18821a540d 100644 +--- a/lib/pfmlib_perf_event.c ++++ b/lib/pfmlib_perf_event.c +@@ -82,7 +82,7 @@ pfmlib_perf_event_encode(void *this, const char *str, int dfl_plm, void *data) + struct perf_event_attr my_attr, *attr; + pfmlib_pmu_t *pmu; + pfmlib_event_desc_t e; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + size_t orig_sz, asz, sz = sizeof(arg); + uint64_t ival; + int has_plm = 0, has_vmx_plm = 0; +@@ -357,7 +357,7 @@ static int + perf_get_os_attr_info(void *this, pfmlib_event_desc_t *e) + { + pfmlib_os_t *os = this; +- pfm_event_attr_info_t *info; ++ pfmlib_event_attr_info_t *info; + int i, k, j = e->npattrs; + + for (i = k = 0; os->atdesc[i].name; i++) { +diff --git a/lib/pfmlib_perf_event_pmu.c b/lib/pfmlib_perf_event_pmu.c +index 5b2d8104696a..5c81552da71e 100644 +--- a/lib/pfmlib_perf_event_pmu.c ++++ b/lib/pfmlib_perf_event_pmu.c +@@ -569,7 +569,7 @@ static int + pfmlib_perf_encode_tp(pfmlib_event_desc_t *e) + { + perf_umask_t *um; +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + int i, nu = 0; + + e->fstr[0] = '\0'; +@@ -607,7 +607,7 @@ pfmlib_perf_encode_tp(pfmlib_event_desc_t *e) + static int + pfmlib_perf_encode_hw_cache(pfmlib_event_desc_t *e) + { +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + perf_event_t *ent; + unsigned int msk, grpmsk; + uint64_t umask = 0; +@@ -733,7 +733,7 @@ pfm_perf_event_is_valid(void *this, int idx) + } + + static int +-pfm_perf_get_event_attr_info(void *this, int idx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_perf_get_event_attr_info(void *this, int idx, int attr_idx, pfmlib_event_attr_info_t *info) + { + perf_umask_t *um; + +diff --git a/lib/pfmlib_perf_event_raw.c b/lib/pfmlib_perf_event_raw.c +index e10d215912ea..71d944334876 100644 +--- a/lib/pfmlib_perf_event_raw.c ++++ b/lib/pfmlib_perf_event_raw.c +@@ -91,7 +91,7 @@ pfm_perf_raw_event_is_valid(void *this, int idx) + } + + static int +-pfm_perf_raw_get_event_attr_info(void *this, int idx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_perf_raw_get_event_attr_info(void *this, int idx, int attr_idx, pfmlib_event_attr_info_t *info) + { + return PFM_ERR_ATTR; + } +diff --git a/lib/pfmlib_power_priv.h b/lib/pfmlib_power_priv.h +index 8b5c3ac0ffcf..3b72d326e3bb 100644 +--- a/lib/pfmlib_power_priv.h ++++ b/lib/pfmlib_power_priv.h +@@ -101,7 +101,7 @@ typedef struct { + #define POWER8_PLM (POWER_PLM|PFM_PLMH) + + extern int pfm_gen_powerpc_get_event_info(void *this, int pidx, pfm_event_info_t *info); +-extern int pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfm_event_attr_info_t *info); ++extern int pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfmlib_event_attr_info_t *info); + extern int pfm_gen_powerpc_get_encoding(void *this, pfmlib_event_desc_t *e); + extern int pfm_gen_powerpc_get_event_first(void *this); + extern int pfm_gen_powerpc_get_event_next(void *this, int idx); +diff --git a/lib/pfmlib_powerpc.c b/lib/pfmlib_powerpc.c +index f025dede599d..f32080d63b5e 100644 +--- a/lib/pfmlib_powerpc.c ++++ b/lib/pfmlib_powerpc.c +@@ -56,7 +56,7 @@ pfm_gen_powerpc_get_event_info(void *this, int pidx, pfm_event_info_t *info) + } + + int +-pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfm_event_attr_info_t *info) ++pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfmlib_event_attr_info_t *info) + { + /* No attributes are supported */ + return PFM_ERR_ATTR; +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 33d7fdf2013d..2f4d2b9d494b 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -56,6 +56,28 @@ typedef struct { + pfm_attr_t type; /* used to validate value (if any) */ + } pfmlib_attr_desc_t; + ++typedef struct { ++ const char *name; /* attribute symbolic name */ ++ const char *desc; /* attribute description */ ++ const char *equiv; /* attribute is equivalent to */ ++ size_t size; /* struct sizeof */ ++ uint64_t code; /* attribute code */ ++ pfm_attr_t type; /* attribute type */ ++ int idx; /* attribute opaque index */ ++ pfm_attr_ctrl_t ctrl; /* what is providing attr */ ++ struct { ++ unsigned int is_dfl:1; /* is default umask */ ++ unsigned int is_precise:1; /* Intel X86: supports PEBS */ ++ unsigned int reserved_bits:30; ++ }; ++ union { ++ uint64_t dfl_val64; /* default 64-bit value */ ++ const char *dfl_str; /* default string value */ ++ int dfl_bool; /* default boolean value */ ++ int dfl_int; /* default integer value */ ++ }; ++} pfmlib_event_attr_info_t; ++ + /* + * attribute description passed to model-specific layer + */ +@@ -90,7 +112,7 @@ typedef struct { + int count; /* number of entries in codes[] */ + pfmlib_attr_t attrs[PFMLIB_MAX_ATTRS]; /* list of requested attributes */ + +- pfm_event_attr_info_t *pattrs; /* list of possible attributes */ ++ pfmlib_event_attr_info_t *pattrs; /* list of possible attributes */ + char fstr[PFMLIB_EVT_MAX_NAME_LEN]; /* fully qualified event string */ + uint64_t codes[PFMLIB_MAX_ENCODING]; /* event encoding */ + void *os_data; +@@ -129,7 +151,7 @@ typedef struct pfmlib_pmu { + int (*event_is_valid)(void *this, int pidx); + int (*can_auto_encode)(void *this, int pidx, int uidx); + +- int (*get_event_attr_info)(void *this, int pidx, int umask_idx, pfm_event_attr_info_t *info); ++ int (*get_event_attr_info)(void *this, int pidx, int umask_idx, pfmlib_event_attr_info_t *info); + int (*get_event_encoding[PFM_OS_MAX])(void *this, pfmlib_event_desc_t *e); + + void (*validate_pattrs[PFM_OS_MAX])(void *this, pfmlib_event_desc_t *e); +diff --git a/lib/pfmlib_sparc.c b/lib/pfmlib_sparc.c +index f88b5512a5f4..fe8da0618d31 100644 +--- a/lib/pfmlib_sparc.c ++++ b/lib/pfmlib_sparc.c +@@ -165,7 +165,7 @@ int + pfm_sparc_get_encoding(void *this, pfmlib_event_desc_t *e) + { + const sparc_entry_t *pe = this_pe(this); +- pfm_event_attr_info_t *a; ++ pfmlib_event_attr_info_t *a; + pfm_sparc_reg_t reg; + int i; + +@@ -260,7 +260,7 @@ pfm_sparc_validate_table(void *this, FILE *fp) + } + + int +-pfm_sparc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) ++pfm_sparc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) + { + const sparc_entry_t *pe = this_pe(this); + int idx; +diff --git a/lib/pfmlib_sparc_priv.h b/lib/pfmlib_sparc_priv.h +index 7de9b3dc327a..332651ff051e 100644 +--- a/lib/pfmlib_sparc_priv.h ++++ b/lib/pfmlib_sparc_priv.h +@@ -45,7 +45,7 @@ extern int pfm_sparc_get_event_first(void *this); + extern int pfm_sparc_get_event_next(void *this, int idx); + extern int pfm_sparc_event_is_valid(void *this, int pidx); + extern int pfm_sparc_validate_table(void *this, FILE *fp); +-extern int pfm_sparc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); ++extern int pfm_sparc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); + extern int pfm_sparc_get_event_info(void *this, int idx, pfm_event_info_t *info); + extern unsigned int pfm_sparc_get_event_nattrs(void *this, int pidx); + +diff --git a/lib/pfmlib_torrent.c b/lib/pfmlib_torrent.c +index b8d697aa27ac..72991e7ec98a 100644 +--- a/lib/pfmlib_torrent.c ++++ b/lib/pfmlib_torrent.c +@@ -104,7 +104,7 @@ pfm_torrent_get_event_info(void *this, int pidx, pfm_event_info_t *info) + + static int + pfm_torrent_get_event_attr_info(void *this, int idx, int attr_idx, +- pfm_event_attr_info_t *info) ++ pfmlib_event_attr_info_t *info) + { + int m; + +-- +2.7.4 + +From 39d4b76fa96825ec65724eb94939a3b534a62fd0 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 3 Apr 2017 23:41:10 -0700 +Subject: [PATCH 6/7] enable generic support for 64-bit raw umask + +This patch modifies the generic code to handle 64-bit raw umasks +passed by users. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_common.c | 3 ++- + lib/pfmlib_priv.h | 4 ++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index f3c6dfa23e55..6ff44994203b 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -1011,7 +1011,8 @@ pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) + ainfo->name = "RAW_UMASK"; + ainfo->type = PFM_ATTR_RAW_UMASK; + ainfo->ctrl = PFM_ATTR_CTRL_PMU; +- ainfo->idx = strtoul(s, &endptr, 0); ++ /* can handle up to 64-bit raw umask */ ++ ainfo->idx = strtoull(s, &endptr, 0); + ainfo->equiv= NULL; + if (*endptr) { + DPRINT("raw umask (%s) is not a number\n"); +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 2f4d2b9d494b..b7503a76de01 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -63,8 +63,8 @@ typedef struct { + size_t size; /* struct sizeof */ + uint64_t code; /* attribute code */ + pfm_attr_t type; /* attribute type */ +- int idx; /* attribute opaque index */ +- pfm_attr_ctrl_t ctrl; /* what is providing attr */ ++ pfm_attr_ctrl_t ctrl; /* what is providing attr */ ++ uint64_t idx; /* attribute opaque index */ + struct { + unsigned int is_dfl:1; /* is default umask */ + unsigned int is_precise:1; /* Intel X86: supports PEBS */ +-- +2.7.4 + +From 088a1806676382e1a0324ba4c2d59b9d07a96caf Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 4 Apr 2017 09:42:25 -0700 +Subject: [PATCH 7/7] enable 38-bit raw umask for Intel offcore_response event + +This patch enables support for passing and encoding of 38-bit +offcore_response matrix umask. Without the patch, the raw umask +was limited to 32-bit which is not enough to cover all the possible +bits of the offcore_response event available since Intel SandyBridge. + +$ examples/check_events offcore_response_0:0xffffff +Requested Event: offcore_response_0:0xffffff +Actual Event: ivb::OFFCORE_RESPONSE_0:0xffffff:k=1:u=1:e=0:i=0:c=0:t=0 +PMU : Intel Ivy Bridge +IDX : 155189325 +Codes : 0x5301b7 0xffffff + +The patch also adds tests to the validation code. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_intel_x86.c | 20 +++-- + tests/validate_x86.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 246 insertions(+), 6 deletions(-) + +diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c +index 09a0f50a3a4e..8fe93115dfa9 100644 +--- a/lib/pfmlib_intel_x86.c ++++ b/lib/pfmlib_intel_x86.c +@@ -481,16 +481,24 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + reg.sel_event_select = last_ucode; + } + } else if (a->type == PFM_ATTR_RAW_UMASK) { ++ int ofr_bits = 8; ++ uint64_t rmask; ++ ++ /* set limit on width of raw umask */ ++ if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) { ++ ofr_bits = 38; ++ if (e->pmu->pmu == PFM_PMU_INTEL_WSM || e->pmu->pmu == PFM_PMU_INTEL_WSM_DP) ++ ofr_bits = 16; ++ } ++ rmask = (1ULL << ofr_bits) - 1; + +- /* there can only be one RAW_UMASK per event */ +- +- /* sanity check */ +- if (a->idx & ~0xff) { +- DPRINT("raw umask is 8-bit wide\n"); ++ if (a->idx & ~rmask) { ++ DPRINT("raw umask is too wide max %d bits\n", ofr_bits); + return PFM_ERR_ATTR; + } ++ + /* override umask */ +- umask2 = a->idx & 0xff; ++ umask2 = a->idx & rmask; + ugrpmsk = grpmsk; + } else { + uint64_t ival = e->attrs[k].ival; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 906afba636e1..aa0aaa114d0d 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4523,6 +4523,238 @@ static const test_event_t x86_test_events[]={ + .codes[0] = 0x0825, + .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_1", + }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "wsm::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0xffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xffff, ++ .fstr = "wsm::OFFCORE_RESPONSE_0:0xffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "snb::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "snb::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "hsw::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "hsw::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "skl::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "skl::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_1:0xfff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfff, ++ .fstr = "wsm::OFFCORE_RESPONSE_1:0xfff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "snb::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "snb::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "hsw::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "hsw::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "skl::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "skl::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, + }; + + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) +-- +2.7.4 + +From 1eac17750c99cc29156d3cf2815b4bf0cdf1a1be Mon Sep 17 00:00:00 2001 +From: William Cohen +Date: Tue, 11 Apr 2017 11:22:59 -0400 +Subject: [PATCH] Also convert s390 to use the internal + pfmlib_event_attr_info_t + +Commit 321133e converted most of the architectures to use the internal +perflib_event_attr_info_t type. However, the s390 was missed in that +previous commit. This patch corrects the issue so libpfm compiles on +s390. +--- + lib/pfmlib_s390x_cpumf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c +index db2a215..b5444ef 100644 +--- a/lib/pfmlib_s390x_cpumf.c ++++ b/lib/pfmlib_s390x_cpumf.c +@@ -254,7 +254,7 @@ static int pfm_cpumf_get_event_info(void *this, int idx, + } + + static int pfm_cpumf_get_event_attr_info(void *this, int idx, int umask_idx, +- pfm_event_attr_info_t *info) ++ pfmlib_event_attr_info_t *info) + { + /* Attributes are not supported */ + return PFM_ERR_ATTR; +-- +2.9.3 + diff --git a/SOURCES/libpfm-updates.patch b/SOURCES/libpfm-updates.patch new file mode 100644 index 0000000..95ed8b2 --- /dev/null +++ b/SOURCES/libpfm-updates.patch @@ -0,0 +1,10351 @@ +From 756658bff2e346b72d54ae569a68ae4028cf541e Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 19 Feb 2016 20:12:23 +0100 +Subject: [PATCH] fix encoding of UNC_M_PRE_COUNT for HSW-EP and IVB-EP + +The encoding of RD, WR, BYP umasks were wrong. +Added a couple of test to check th encodings of this event. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hswep_unc_imc_events.h | 6 +++--- + lib/events/intel_ivbep_unc_imc_events.h | 6 +++--- + tests/validate_x86.c | 14 ++++++++++++++ + 3 files changed, 20 insertions(+), 6 deletions(-) + +diff --git a/lib/events/intel_hswep_unc_imc_events.h b/lib/events/intel_hswep_unc_imc_events.h +index 8b52be4..7f77615 100644 +--- a/lib/events/intel_hswep_unc_imc_events.h ++++ b/lib/events/intel_hswep_unc_imc_events.h +@@ -162,15 +162,15 @@ static const intel_x86_umask_t hswep_unc_m_pre_count[]={ + }, + { .uname = "RD", + .udesc = "Precharge due to read", +- .ucode = 0x100, ++ .ucode = 0x400, + }, + { .uname = "WR", + .udesc = "Precharhe due to write", +- .ucode = 0x200, ++ .ucode = 0x800, + }, + { .uname = "BYP", + .udesc = "Precharge due to bypass", +- .ucode = 0x800, ++ .ucode = 0x1000, + }, + }; + +diff --git a/lib/events/intel_ivbep_unc_imc_events.h b/lib/events/intel_ivbep_unc_imc_events.h +index 473afc4..ba60c7e 100644 +--- a/lib/events/intel_ivbep_unc_imc_events.h ++++ b/lib/events/intel_ivbep_unc_imc_events.h +@@ -162,15 +162,15 @@ static const intel_x86_umask_t ivbep_unc_m_pre_count[]={ + }, + { .uname = "RD", + .udesc = "Precharge due to read", +- .ucode = 0x100, ++ .ucode = 0x400, + }, + { .uname = "WR", + .udesc = "Precharhe due to write", +- .ucode = 0x200, ++ .ucode = 0x800, + }, + { .uname = "BYP", + .udesc = "Precharge due to bypass", +- .ucode = 0x800, ++ .ucode = 0x1000, + }, + }; + +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index a29b031..4bf8604 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -2664,6 +2664,13 @@ static const test_event_t x86_test_events[]={ + .fstr = "ivbep_unc_imc0::UNC_M_CAS_COUNT:RD:e=0:t=0", + }, + { SRC_LINE, ++ .name = "ivbep_unc_imc0::UNC_M_PRE_COUNT:WR", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0802, ++ .fstr = "ivbep_unc_imc0::UNC_M_PRE_COUNT:WR:e=0:t=0", ++ }, ++ { SRC_LINE, + .name = "ivbep_unc_imc0::UNC_M_POWER_CKE_CYCLES:RANK0", + .ret = PFM_SUCCESS, + .count = 1, +@@ -3607,6 +3614,13 @@ static const test_event_t x86_test_events[]={ + .fstr = "hswep_unc_imc0::UNC_M_CAS_COUNT:RD:e=0:i=0:t=0", + }, + { SRC_LINE, ++ .name = "hswep_unc_imc0::UNC_M_PRE_COUNT:WR", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0802, ++ .fstr = "hswep_unc_imc0::UNC_M_PRE_COUNT:WR:e=0:i=0:t=0", ++ }, ++ { SRC_LINE, + .name = "hswep_unc_imc0::UNC_M_POWER_CKE_CYCLES:RANK0", + .ret = PFM_SUCCESS, + .count = 1, +-- +2.9.3 + + +From 1fc70406adb18233251c31848a6fc372813599b2 Mon Sep 17 00:00:00 2001 +From: Will Schmidt +Date: Thu, 10 Mar 2016 13:43:58 -0600 +Subject: [PATCH] Update the POWER8 PVR values + +Update the POWER8 PVR values to include additional flavors +of the POWER8 processor. +The existing POWER8 entry is now POWER8E, this is to be +consistent with changes made on the kernel side. +(arch/powerpc/kernel/cputable.c) + +Signed-off-by: Will Schmidt +--- + lib/pfmlib_power8.c | 6 ++++-- + lib/pfmlib_power_priv.h | 4 +++- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/lib/pfmlib_power8.c b/lib/pfmlib_power8.c +index d274b59..ea964b7 100644 +--- a/lib/pfmlib_power8.c ++++ b/lib/pfmlib_power8.c +@@ -1,7 +1,7 @@ + /* + * pfmlib_power8.c : IBM Power8 support + * +- * Copyright (C) IBM Corporation, 2013. All rights reserved. ++ * Copyright (C) IBM Corporation, 2013-2016. All rights reserved. + * Contributed by Carl Love (carll@us.ibm.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy +@@ -29,7 +29,9 @@ + static int + pfm_power8_detect(void* this) + { +- if (__is_processor(PV_POWER8)) ++ if (__is_processor(PV_POWER8) || ++ __is_processor(PV_POWER8E) || ++ __is_processor(PV_POWER8NVL)) + return PFM_SUCCESS; + return PFM_ERR_NOTSUPP; + } +diff --git a/lib/pfmlib_power_priv.h b/lib/pfmlib_power_priv.h +index 0d8b473..e66e7e9 100644 +--- a/lib/pfmlib_power_priv.h ++++ b/lib/pfmlib_power_priv.h +@@ -93,7 +93,9 @@ typedef struct { + #define PV_POWER7p 0x004a + #define PV_970MP 0x0044 + #define PV_970GX 0x0045 +-#define PV_POWER8 0x004b ++#define PV_POWER8E 0x004b ++#define PV_POWER8NVL 0x004c ++#define PV_POWER8 0x004d + + extern int pfm_gen_powerpc_get_event_info(void *this, int pidx, pfm_event_info_t *info); + extern int pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfm_event_attr_info_t *info); +-- +2.9.3 + + +From c6f8a4db1b83eb6ad6dee81e33124d259e37c2c5 Mon Sep 17 00:00:00 2001 +From: Will Schmidt +Date: Thu, 10 Mar 2016 13:44:02 -0600 +Subject: [PATCH] A small assortment of cosmetic touch-ups. + +A small assortment of cosmetic touch-ups. + +Signed-off-by: Will Schmidt +--- + lib/events/power8_events.h | 2 +- + lib/pfmlib_power7.c | 2 +- + lib/pfmlib_power_priv.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/events/power8_events.h b/lib/events/power8_events.h +index 2aee218..92337f8 100644 +--- a/lib/events/power8_events.h ++++ b/lib/events/power8_events.h +@@ -8,7 +8,7 @@ + /* + * File: power8_events.h + * CVS: +-Author: Carl Love ++* Author: Carl Love + * carll.ibm.com + * Mods: + * +diff --git a/lib/pfmlib_power7.c b/lib/pfmlib_power7.c +index ceab517..a32977c 100644 +--- a/lib/pfmlib_power7.c ++++ b/lib/pfmlib_power7.c +@@ -1,5 +1,5 @@ + /* +- * pfmlib_power7.c : IBM Power6 support ++ * pfmlib_power7.c : IBM Power7 support + * + * Copyright (C) IBM Corporation, 2009. All rights reserved. + * Contributed by Corey Ashford (cjashfor@us.ibm.com) +diff --git a/lib/pfmlib_power_priv.h b/lib/pfmlib_power_priv.h +index e66e7e9..04f1437 100644 +--- a/lib/pfmlib_power_priv.h ++++ b/lib/pfmlib_power_priv.h +@@ -77,7 +77,7 @@ typedef struct { + /* Processor Version Register (PVR) field extraction */ + + #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +-#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ ++#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */ + + #define __is_processor(pv) (PVR_VER(mfspr(SPRN_PVR)) == (pv)) + +-- +2.9.3 + + +From f191f9048a3adb191bbde3dac1bddec5436250dc Mon Sep 17 00:00:00 2001 +From: Will Cohen +Date: Thu, 24 Mar 2016 07:11:35 +0100 +Subject: [PATCH] Limit functions visibility in libpfm + +Limiting functions and data structures visibility in libpfm.so +so they are hidden from other other code linked with library can allow +the compiler to generate better quality code and reduce linking +overhead on startup. Hiding the internal functions and data +structures also allow more flexibility in changing internal +implementation while keeping compatibility with previous versions of +the library. + +This patch limits libpfm to making visible the function listed in the +header files it provides. The llvm clang compiler honor the gcc +visibility option and pragmas. According to the libabigail tool +abidiff 59 functions and 154 variables were hidden as a result of this +change. The patch reduces the size of the shared library by about 14KB +(0.8%) on x86_64. + +Signed-off-by: William Cohen +--- + include/perfmon/perf_event.h | 4 ++++ + include/perfmon/pfmlib.h | 4 ++++ + include/perfmon/pfmlib_perf_event.h | 4 ++++ + lib/Makefile | 2 +- + 4 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/include/perfmon/perf_event.h b/include/perfmon/perf_event.h +index cadcec7..a11a8cd 100644 +--- a/include/perfmon/perf_event.h ++++ b/include/perfmon/perf_event.h +@@ -22,6 +22,8 @@ + #ifndef __PERFMON_PERF_EVENT_H__ + #define __PERFMON_PERF_EVENT_H__ + ++#pragma GCC visibility push(default) ++ + #include + #include /* for syscall numbers */ + #include +@@ -588,4 +590,6 @@ union perf_mem_data_src { + } + #endif + ++#pragma GCC visibility pop ++ + #endif /* __PERFMON_PERF_EVENT_H__ */ +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index a548be2..b05754b 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -26,6 +26,8 @@ + #ifndef __PFMLIB_H__ + #define __PFMLIB_H__ + ++#pragma GCC visibility push(default) ++ + #ifdef __cplusplus + extern "C" { + #endif +@@ -534,4 +536,6 @@ extern pfm_err_t pfm_get_event_encoding(const char *str, int dfl_plm, char **fst + } + #endif + ++#pragma GCC visibility pop ++ + #endif /* __PFMLIB_H__ */ +diff --git a/include/perfmon/pfmlib_perf_event.h b/include/perfmon/pfmlib_perf_event.h +index 8b3dae2..0516277 100644 +--- a/include/perfmon/pfmlib_perf_event.h ++++ b/include/perfmon/pfmlib_perf_event.h +@@ -25,6 +25,8 @@ + #include + #include + ++#pragma GCC visibility push(default) ++ + #ifdef __cplusplus + extern "C" { + #endif +@@ -61,4 +63,6 @@ extern pfm_err_t pfm_get_perf_event_encoding(const char *str, + } + #endif + ++#pragma GCC visibility pop ++ + #endif /* __PFMLIB_PERF_EVENT_H__ */ +diff --git a/lib/Makefile b/lib/Makefile +index a2c5818..f035307 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -33,7 +33,7 @@ ifeq ($(SYS),Linux) + SRCS += pfmlib_perf_event_pmu.c pfmlib_perf_event.c pfmlib_perf_event_raw.c + endif + +-CFLAGS+=-D_REENTRANT -I. ++CFLAGS+=-D_REENTRANT -I. -fvisibility=hidden + + # + # list all library support modules +-- +2.9.3 + + +From 4f9fc8b50b761807b12b739372af48b22a46ad28 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Thu, 24 Mar 2016 07:35:31 +0100 +Subject: [PATCH] update Intel Skylake event table + +To match V18 pbulish on download-01.org. +Basically adding missing: + ITLB_MISSES.WALK_COMPLETED_4K + ITLB_MISSES.WALK_COMPLETED_2M_4M + + DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + + DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_skl_events.h | 20 ++++++++++++++++++++ + tests/validate_x86.c | 12 ++++++++++++ + 2 files changed, 32 insertions(+) + +diff --git a/lib/events/intel_skl_events.h b/lib/events/intel_skl_events.h +index d48e87e..4980164 100644 +--- a/lib/events/intel_skl_events.h ++++ b/lib/events/intel_skl_events.h +@@ -223,6 +223,16 @@ static const intel_x86_umask_t skl_dtlb_load_misses[]={ + .ucode = 0xe00, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_4K", ++ .udesc = "Misses in all TLB levels causes a page walk of 4KB page size that completes", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WALK_COMPLETED_2M_4M", ++ .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page size that completes", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_ACTIVE", + .udesc = "Cycles with at least one hardware walker active for a load", + .ucode = 0x1000 | (0x1 << INTEL_X86_CMASK_BIT), +@@ -257,6 +267,16 @@ static const intel_x86_umask_t skl_itlb_misses[]={ + .ucode = 0xe00, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_4K", ++ .udesc = "Misses in all TLB levels causes a page walk of 4KB page size that completes", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WALK_COMPLETED_2M_4M", ++ .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page size that completes", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_DURATION", + .udesc = "Cycles when PMH is busy with page walks", + .ucode = 0x1000, +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 4bf8604..84e08b2 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -3921,6 +3921,18 @@ static const test_event_t x86_test_events[]={ + .fstr = "skl::CYCLE_ACTIVITY:0x6:k=1:u=1:e=0:i=0:c=6:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "skl::dtlb_store_misses:walk_completed_2m_4m:c=1", ++ .count = 1, ++ .codes[0] = 0x1530449, ++ .fstr = "skl::DTLB_STORE_MISSES:WALK_COMPLETED_2M_4M:k=1:u=1:e=0:i=0:c=1:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::rob_misc_events:lbr_inserts", ++ .count = 1, ++ .codes[0] = 0x5320cc, ++ .fstr = "skl::ROB_MISC_EVENTS:LBR_INSERTS:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, + .name = "skl::cycle_activity:stalls_mem_any:c=6", + .ret = PFM_ERR_ATTR_SET, + }, +-- +2.9.3 + + +From ec6289ddde0a8826f16158e00fb45636e25f0d06 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Thu, 24 Mar 2016 07:48:30 +0100 +Subject: [PATCH] update Intel Broadwell event table + +Based on V13 from download.01.org + +Following events added: + + ITLB_MISSES.WALK_COMPLETED_4K + ITLB_MISSES.WALK_COMPLETED_2M_4M + ITLB_MISSES.WALK_COMPLETED_1G + ITLB_MISSES.STLB_HIT_2M + + DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G + DTLB_LOAD_MISSES.STLB_HIT_2M + + DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G + DTLB_STORE_MISSES.STLB_HIT_2M + LOAD_HIT_PRE.SW_PREF + + BR_MISP_EXEC:TAKEN_RETURN_NEAR + + Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 46 +++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 42 insertions(+), 4 deletions(-) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index 3d21a04..e59d61a 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -223,6 +223,11 @@ static const intel_x86_umask_t bdw_br_misp_exec[]={ + .ucode = 0xa000, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "TAKEN_RETURN_NEAR", ++ .udesc = "Taken speculative and retired mispredicted direct returns", ++ .ucode = 0x8800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + }; + + static const intel_x86_umask_t bdw_br_misp_retired[]={ +@@ -381,6 +386,16 @@ static const intel_x86_umask_t bdw_dtlb_load_misses[]={ + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_2M_4M", ++ .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page sizes that completes", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WALK_COMPLETED_1G", ++ .udesc = "Misses in all TLB levels causes a page walk of 1GB page sizes that completes", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_COMPLETED", + .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", + .ucode = 0xe00, +@@ -392,10 +407,15 @@ static const intel_x86_umask_t bdw_dtlb_load_misses[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "STLB_HIT_4K", +- .udesc = "Misses that miss the DTLB and hit the STLB (4K)", ++ .udesc = "Misses that miss the DTLB and hit the STLB (4KB)", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "STLB_HIT_2M", ++ .udesc = "Misses that miss the DTLB and hit the STLB (2MB)", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "STLB_HIT", + .udesc = "Number of cache load STLB hits. No page walk", + .ucode = 0x6000, +@@ -410,10 +430,20 @@ static const intel_x86_umask_t bdw_itlb_misses[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED_4K", +- .udesc = "Misses in all TLB levels causes a page walk that completes (4K)", ++ .udesc = "Misses in all TLB levels causes a page walk that completes (4KB)", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_2M_4M", ++ .udesc = "Misses in all TLB levels causes a page walk that completes (2MB/4MB)", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WALK_COMPLETED_1G", ++ .udesc = "Misses in all TLB levels causes a page walk that completes (1GB)", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_COMPLETED", + .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", + .ucode = 0xe00, +@@ -425,10 +455,15 @@ static const intel_x86_umask_t bdw_itlb_misses[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "STLB_HIT_4K", +- .udesc = "Misses that miss the DTLB and hit the STLB (4K)", ++ .udesc = "Misses that miss the DTLB and hit the STLB (4KB)", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "STLB_HIT_2M", ++ .udesc = "Misses that miss the DTLB and hit the STLB (2MB)", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "STLB_HIT", + .udesc = "Number of cache load STLB hits. No page walk", + .ucode = 0x6000, +@@ -969,7 +1004,10 @@ static const intel_x86_umask_t bdw_load_hit_pre[]={ + { .uname = "HW_PF", + .udesc = "Non software-prefetch load dispatches that hit FB allocated for hardware prefetch", + .ucode = 0x200, +- .uflags = INTEL_X86_DFL, ++ }, ++ { .uname = "SW_PF", ++ .udesc = "Non software-prefetch load dispatches that hit FB allocated for software prefetch", ++ .ucode = 0x100, + }, + }; + +-- +2.9.3 + + +From 9603a098df47994a03ffb0c4fdaed5a94fbf1c6f Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sat, 16 Apr 2016 05:26:11 +0200 +Subject: [PATCH] enable Intel Broadwell EP core PMU support + +This file enables full support for Intel Broadwell EP core PMU. +Prior, it was based on Broadwell desktop. This patch adds the +remote memory events. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 3 +- + lib/events/intel_bdw_events.h | 159 ++++++++++++++++++++++++++++-------------- + lib/pfmlib_common.c | 1 + + lib/pfmlib_intel_bdw.c | 37 +++++++++- + lib/pfmlib_priv.h | 1 + + tests/validate_x86.c | 26 ++++++- + 6 files changed, 171 insertions(+), 56 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index b05754b..24a2a60 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -245,7 +245,7 @@ typedef enum { + PFM_PMU_ARM_CORTEX_A7, /* ARM Cortex A7 */ + + PFM_PMU_INTEL_HSW_EP, /* Intel Haswell EP */ +- PFM_PMU_INTEL_BDW, /* Intel Broadwell EP */ ++ PFM_PMU_INTEL_BDW, /* Intel Broadwell */ + + PFM_PMU_ARM_XGENE, /* Applied Micro X-Gene (ARMv8) */ + +@@ -296,6 +296,7 @@ typedef enum { + + PFM_PMU_INTEL_SKL, /* Intel Skylake */ + ++ PFM_PMU_INTEL_BDW_EP, /* Intel Broadwell EP */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index e59d61a..439d3c6 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -1092,10 +1092,28 @@ static const intel_x86_umask_t bdw_mem_load_uops_l3_hit_retired[]={ + + static const intel_x86_umask_t bdw_mem_load_uops_l3_miss_retired[]={ + { .uname = "LOCAL_DRAM", +- .udesc = "Retired load uops missing L3 cache but hitting local memory", ++ .udesc = "Retired load uops missing L3 cache but hitting local memory (Precise Event)", + .ucode = 0x100, +- .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS , + }, ++ { .uname = "REMOTE_DRAM", ++ .udesc = "Number of retired load uops that missed L3 but were service by remote RAM, snoop not needed, snoop miss, snoop hit data not forwarded (Precise Event)", ++ .ucode = 0x400, ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REMOTE_HITM", ++ .udesc = "Number of retired load uops whose data sources was remote HITM (Precise Event)", ++ .ucode = 0x1000, ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REMOTE_FWD", ++ .udesc = "Load uops that miss in the L3 whose data source was forwarded from a remote cache (Precise Event)", ++ .ucode = 0x2000, ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, + }; + + static const intel_x86_umask_t bdw_mem_load_uops_retired[]={ +@@ -1785,96 +1803,135 @@ static const intel_x86_umask_t bdw_offcore_response[]={ + .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, +- { .uname = "L4_HIT_LOCAL_L4", +- .udesc = "Supplier: L4 local hit", +- .ucode = 0x1ULL << (22+8), ++ { .uname = "L3_MISS_LOCAL", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (26+8), + .umodel = PFM_PMU_INTEL_BDW, +- .grpid = 1, ++ .grpid = 1, + }, +- { .uname = "L4_HIT_REMOTE_HOP0_L4", +- .udesc = "Supplier: L4 hit on L4 from same socket (hop0)", +- .ucode = 0x1ULL << (23+8), ++ { .uname = "LLC_MISS_LOCAL", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (26+8), ++ .uequiv = "L3_MISS_LOCAL", + .umodel = PFM_PMU_INTEL_BDW, +- .grpid = 1, ++ .grpid = 1, + }, +- { .uname = "L4_HIT_REMOTE_HOP1_L4", +- .udesc = "Supplier: L4 hit on remote L4 with 1 hop", +- .ucode = 0x1ULL << (24+8), ++ { .uname = "LLC_MISS_LOCAL_DRAM", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (26+8), ++ .uequiv = "L3_MISS_LOCAL", + .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, +- { .uname = "L4_HIT_REMOTE_HOP2P_L4", +- .udesc = "Supplier: L4 hit on remote L4 with 2 hops", +- .ucode = 0x1ULL << (25+8), +- .umodel = PFM_PMU_INTEL_BDW, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses", ++ .ucode = 0xfULL << (26+8), + .grpid = 1, + }, +- { .uname = "L4_HIT", +- .udesc = "Supplier: L4 hits (covers all L4 hits)", +- .ucode = 0xfULL << (22+8), +- .umodel = PFM_PMU_INTEL_BDW, ++ { .uname = "L3_MISS_REMOTE_HOP0", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", ++ .ucode = 0x1ULL << (27+8), ++ .umodel = PFM_PMU_INTEL_BDW_EP, + .grpid = 1, + }, +- { .uname = "L3_MISS_LOCAL", +- .udesc = "Supplier: counts L3 misses to local DRAM", +- .ucode = 1ULL << (26+8), +- .umodel = PFM_PMU_INTEL_BDW, +- .grpid = 1, ++ { .uname = "L3_MISS_REMOTE_HOP0_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", ++ .ucode = 0x1ULL << (27+8), ++ .uequiv = "L3_MISS_REMOTE_HOP0", ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, + }, +- { .uname = "LLC_MISS_LOCAL", +- .udesc = "Supplier: counts L3 misses to local DRAM", +- .ucode = 1ULL << (26+8), +- .uequiv = "L3_MISS_LOCAL", +- .umodel = PFM_PMU_INTEL_BDW, +- .grpid = 1, ++ { .uname = "L3_MISS_REMOTE_HOP1", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", ++ .ucode = 0x1ULL << (28+8), ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE_HOP1_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", ++ .ucode = 0x1ULL << (28+8), ++ .uequiv = "L3_MISS_REMOTE_HOP1", ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE_HOP2P", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 2P hops", ++ .ucode = 0x1ULL << (29+8), ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE_HOP2P_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote DRAM with 2P hops", ++ .ucode = 0x1ULL << (29+8), ++ .uequiv = "L3_MISS_REMOTE_HOP2P", ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE", ++ .udesc = "Supplier: counts L3 misses to remote node", ++ .ucode = 0x7ULL << (26+8), ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote node", ++ .ucode = 0x7ULL << (26+8), ++ .uequiv = "L3_MISS_REMOTE", ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "SPL_HIT", ++ .udesc = "Supplier: counts L3 supplier hit", ++ .ucode = 0x1ULL << (30+8), ++ .grpid = 1, + }, +- { .uname = "SNP_NONE", +- .udesc = "Snoop: counts number of times no snoop-related information is available", ++ { .uname = "SNP_NONE", ++ .udesc = "Snoop: counts number of times no snoop-related information is available", + .ucode = 1ULL << (31+8), + .grpid = 2, + }, +- { .uname = "SNP_NOT_NEEDED", +- .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", ++ { .uname = "SNP_NOT_NEEDED", ++ .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", + .ucode = 1ULL << (32+8), + .grpid = 2, + }, +- { .uname = "SNP_MISS", +- .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", ++ { .uname = "SNP_MISS", ++ .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", + .ucode = 1ULL << (33+8), + .grpid = 2, + }, +- { .uname = "SNP_NO_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and it hit in at leas one snooped cache", ++ { .uname = "SNP_NO_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and it hit in at leas one snooped cache", + .ucode = 1ULL << (34+8), + .grpid = 2, + }, +- { .uname = "SNP_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", ++ { .uname = "SNP_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", + .ucode = 1ULL << (35+8), + .grpid = 2, + }, + { .uname = "HITM", + .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", +- .ucode = 1ULL << (36+8), ++ .ucode = 1ULL << (36+8), + .uequiv = "SNP_HITM", +- .grpid = 2, ++ .grpid = 2, + }, + { .uname = "SNP_HITM", + .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", +- .ucode = 1ULL << (36+8), +- .grpid = 2, ++ .ucode = 1ULL << (36+8), ++ .grpid = 2, + }, + { .uname = "NON_DRAM", + .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", +- .ucode = 1ULL << (37+8), +- .grpid = 2, ++ .ucode = 1ULL << (37+8), ++ .grpid = 2, + }, + { .uname = "SNP_ANY", + .udesc = "Snoop: any snoop reason", +- .ucode = 0x7fULL << (31+8), ++ .ucode = 0x7fULL << (31+8), + .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:HITM:NON_DRAM", +- .uflags= INTEL_X86_DFL, +- .grpid = 2, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 2, + }, + }; + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index f9012fe..50d48fb 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -100,6 +100,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &intel_hsw_support, + &intel_hsw_ep_support, + &intel_bdw_support, ++ &intel_bdw_ep_support, + &intel_skl_support, + &intel_rapl_support, + &intel_snbep_unc_cb0_support, +diff --git a/lib/pfmlib_intel_bdw.c b/lib/pfmlib_intel_bdw.c +index ea3b7be..1de8438 100644 +--- a/lib/pfmlib_intel_bdw.c ++++ b/lib/pfmlib_intel_bdw.c +@@ -28,9 +28,13 @@ + + static const int bdw_models[] = { + 61, /* Broadwell Core-M */ ++ 71, /* Broadwell + GT3e (Iris Pro graphics) */ ++ 0 ++}; ++ ++static const int bdwep_models[] = { + 79, /* Broadwell-EP, Xeon */ + 86, /* Broadwell-EP, Xeon D */ +- 71, /* Broadwell + GT3e (Iris Pro graphics) */ + 0 + }; + +@@ -71,3 +75,34 @@ pfmlib_pmu_t intel_bdw_support={ + .get_event_nattrs = pfm_intel_x86_get_event_nattrs, + .can_auto_encode = pfm_intel_x86_can_auto_encode, + }; ++ ++pfmlib_pmu_t intel_bdw_ep_support={ ++ .desc = "Intel Broadwell EP", ++ .name = "bdw_ep", ++ .pmu = PFM_PMU_INTEL_BDW_EP, ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_bdw_pe), ++ .type = PFM_PMU_TYPE_CORE, ++ .supported_plm = INTEL_X86_PLM, ++ .num_cntrs = 8, /* consider with HT off by default */ ++ .num_fixed_cntrs = 3, ++ .max_encoding = 2, /* offcore_response */ ++ .pe = intel_bdw_pe, ++ .atdesc = intel_x86_mods, ++ .flags = PFMLIB_PMU_FL_RAW_UMASK ++ | INTEL_X86_PMU_FL_ECMASK, ++ .cpu_family = 6, ++ .cpu_models = bdwep_models, ++ .pmu_detect = pfm_intel_x86_model_detect, ++ .pmu_init = pfm_bdw_init, ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, ++ PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), ++ .get_event_first = pfm_intel_x86_get_event_first, ++ .get_event_next = pfm_intel_x86_get_event_next, ++ .event_is_valid = pfm_intel_x86_event_is_valid, ++ .validate_table = pfm_intel_x86_validate_table, ++ .get_event_info = pfm_intel_x86_get_event_info, ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, ++ .can_auto_encode = pfm_intel_x86_can_auto_encode, ++}; +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 4c075a2..2c760ea 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -252,6 +252,7 @@ extern pfmlib_pmu_t intel_ivb_ep_support; + extern pfmlib_pmu_t intel_hsw_support; + extern pfmlib_pmu_t intel_hsw_ep_support; + extern pfmlib_pmu_t intel_bdw_support; ++extern pfmlib_pmu_t intel_bdw_ep_support; + extern pfmlib_pmu_t intel_skl_support; + extern pfmlib_pmu_t intel_rapl_support; + extern pfmlib_pmu_t intel_snbep_unc_cb0_support; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 84e08b2..57d2ce0 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -3042,12 +3042,12 @@ static const test_event_t x86_test_events[]={ + .fstr = "bdw::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +- .name = "bdw::offcore_response_0:l4_hit", ++ .name = "bdw::offcore_response_0:l3_miss", + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] =0x5301b7, +- .codes[1] = 0x3f83c08fffull, +- .fstr = "bdw::OFFCORE_RESPONSE_0:ANY_REQUEST:L4_HIT:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ .codes[1] = 0x3fbc008fffull, ++ .fstr = "bdw::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "bdw::offcore_response_1:any_data", +@@ -3058,6 +3058,26 @@ static const test_event_t x86_test_events[]={ + .fstr = "bdw::OFFCORE_RESPONSE_1:DMND_DATA_RD:PF_DATA_RD:PF_LLC_DATA_RD:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3fbc008fffull, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:L3_MISS_REMOTE_HOP0_DRAM", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3f88008fffull, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_REMOTE_HOP0:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw::offcore_response_0:L3_MISS_REMOTE_HOP0_DRAM", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, + .name = "hswep_unc_cbo1::UNC_C_CLOCKTICKS:u", + .ret = PFM_ERR_ATTR, + }, +-- +2.9.3 + + +From f009e5b7e06c611321c553aed3c0864d59536f32 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 25 Apr 2016 17:22:54 +0200 +Subject: [PATCH] add Intel Broadwell/Skylake RAPL support + +Add model numbers for Intel Broadwell and Skylake processors. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_intel_rapl.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c +index cdbf178..1413b5f 100644 +--- a/lib/pfmlib_intel_rapl.c ++++ b/lib/pfmlib_intel_rapl.c +@@ -102,7 +102,10 @@ pfm_rapl_detect(void *this) + case 60: /* Haswell */ + case 69: /* Haswell */ + case 70: /* Haswell */ +- case 71: /* Haswell */ ++ case 61: /* Broadwell */ ++ case 71: /* Broadwell */ ++ case 78: /* Skylake */ ++ case 94: /* Skylake H/S */ + /* already setup by default */ + break; + case 45: /* Sandy Bridg-EP */ +@@ -111,6 +114,8 @@ pfm_rapl_detect(void *this) + intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_srv_pe); + break; + case 63: /* Haswell-EP */ ++ case 79: /* Broadwell-EP */ ++ case 86: /* Broadwell D */ + intel_rapl_support.pe = intel_rapl_hswep_pe; + intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_hswep_pe); + break; +-- +2.9.3 + + +From 4dc4c6ada254f30eee8cd2ae27bb0869a111b613 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sat, 28 May 2016 03:49:04 +0200 +Subject: [PATCH] Allow raw umask for OFFCORE_RESPONSE on Intel core PMUs + +This patch makes it possible to specify the raw umask as +hexadecimal for the Intel core PMU OFFCORE_RESPONSE_* event. +This makes it possible to encode a umask which could have been +omitted by mistake from the library or not yet supported. + +$ examples/check_events offcore_response_0:0xffff + +Added validation tests for this new support. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 4 +- + lib/pfmlib_intel_x86.c | 16 ++-- + tests/validate_x86.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 243 insertions(+), 9 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index 24a2a60..8921164 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -420,8 +420,8 @@ typedef struct { + size_t size; /* struct sizeof */ + uint64_t code; /* attribute code */ + pfm_attr_t type; /* attribute type */ +- int idx; /* attribute opaque index */ +- pfm_attr_ctrl_t ctrl; /* what is providing attr */ ++ uint64_t idx; /* attribute opaque index */ ++ pfm_attr_ctrl_t ctrl; /* what is providing attr */ + struct { + unsigned int is_dfl:1; /* is default umask */ + unsigned int is_precise:1; /* Intel X86: supports PEBS */ +diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c +index bb671bd..031de0d 100644 +--- a/lib/pfmlib_intel_x86.c ++++ b/lib/pfmlib_intel_x86.c +@@ -471,16 +471,18 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + reg.sel_event_select = last_ucode; + } + } else if (a->type == PFM_ATTR_RAW_UMASK) { +- ++ uint64_t rmask; + /* there can only be one RAW_UMASK per event */ +- +- /* sanity check */ +- if (a->idx & ~0xff) { +- DPRINT("raw umask is 8-bit wide\n"); ++ if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) { ++ rmask = (1ULL << 38) - 1; ++ } else { ++ rmask = 0xff; ++ } ++ if (a->idx & ~rmask) { ++ DPRINT("raw umask is too wide\n"); + return PFM_ERR_ATTR; + } +- /* override umask */ +- umask2 = a->idx & 0xff; ++ umask2 = a->idx & rmask; + ugrpmsk = grpmsk; + } else { + uint64_t ival = e->attrs[k].ival; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 57d2ce0..0fce00c 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -3970,6 +3970,238 @@ static const test_event_t x86_test_events[]={ + .codes[0] = 0x15301a3, + .fstr = "hsw::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:c=1:t=0:intx=0:intxcp=0", + }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "wsm::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "wsm::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "snb::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "snb::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "hsw::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "hsw::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xf, ++ .fstr = "skl::OFFCORE_RESPONSE_0:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0xfffffffff, ++ .fstr = "skl::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_0:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "wsm::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "wsm::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "snb::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "snb::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "snb::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, ++ .name = "ivb_ep::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "hsw::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "hsw::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "hsw::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0xf", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xf, ++ .fstr = "skl::OFFCORE_RESPONSE_1:0xf:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0xfffffffff", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0xfffffffff, ++ .fstr = "skl::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::offcore_response_1:0x7fffffffff", ++ .ret = PFM_ERR_ATTR, ++ }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) + +-- +2.9.3 + + +From 36a34982dafcf784e7d5636c8c4186fca6457c3d Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sat, 28 May 2016 04:08:36 +0200 +Subject: [PATCH] Fix offcore_response raw umask encodings for 32-bit in test + suite + +Constants with more than 32 bits must have the ull suffix in 32-bit mode. + +Signed-off-by: Stephane Eranian +--- + tests/validate_x86.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 0fce00c..09152f7 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -3983,7 +3983,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "wsm::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4003,7 +4003,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "snb::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4023,7 +4023,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "ivb_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4043,7 +4043,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "hsw::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +@@ -4063,7 +4063,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "bdw_ep::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +@@ -4083,7 +4083,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301b7, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "skl::OFFCORE_RESPONSE_0:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +@@ -4095,7 +4095,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "wsm::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4115,7 +4115,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "snb::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4135,7 +4135,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "ivb_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, +@@ -4155,7 +4155,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "hsw::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +@@ -4175,7 +4175,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "bdw_ep::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +@@ -4195,7 +4195,7 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] = 0x5301bb, +- .codes[1] = 0xfffffffff, ++ .codes[1] = 0xfffffffffull, + .fstr = "skl::OFFCORE_RESPONSE_1:0xffffffff:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, +-- +2.9.3 + + +From bfb9baf1c8a9533fde271d0436ecd465934dfa17 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sat, 28 May 2016 04:20:14 +0200 +Subject: [PATCH] Fix pfmlib_parse_event_attr() parsing of raw umask for 32-bit + +This function was using strtoul() instad of strtoull() now +that a->idx is uint64_t. That was causing bogus encodings +in 32-bit mode when the raw umask was larger than 32-bit. + +Also fix a few other bugs in debug prints related to this. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_common.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 50d48fb..05ce1c0 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -937,10 +937,10 @@ pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) + ainfo->name = "RAW_UMASK"; + ainfo->type = PFM_ATTR_RAW_UMASK; + ainfo->ctrl = PFM_ATTR_CTRL_PMU; +- ainfo->idx = strtoul(s, &endptr, 0); ++ ainfo->idx = strtoull(s, &endptr, 0); + ainfo->equiv= NULL; + if (*endptr) { +- DPRINT("raw umask (%s) is not a number\n"); ++ DPRINT("raw umask (%s) is not a number\n", s); + return PFM_ERR_ATTR; + } + +@@ -1291,9 +1291,9 @@ found: + for (i = 0; i < d->nattrs; i++) { + pfm_event_attr_info_t *a = attr(d, i); + if (a->type != PFM_ATTR_RAW_UMASK) +- DPRINT("%d %d %d %s\n", d->event, i, a->idx, d->pattrs[d->attrs[i].id].name); ++ DPRINT("%d %d %"PRIu64" %s\n", d->event, i, a->idx, d->pattrs[d->attrs[i].id].name); + else +- DPRINT("%d %d RAW_UMASK (0x%x)\n", d->event, i, a->idx); ++ DPRINT("%d %d RAW_UMASK (0x%"PRIx64")\n", d->event, i, a->idx); + } + error: + free(str); +-- +2.9.3 + + +From 487937da54654c699c932c6938484ddcdb91a297 Mon Sep 17 00:00:00 2001 +From: Phil Mucci +Date: Tue, 21 Jun 2016 09:20:42 -0700 +Subject: [PATCH] IBM Power8 add missing suppored_plm mask initialization + +Without this patch, there was no way to encode priv level +at the perf_evnets OS level. They would come out as zero +because libpm4 did not know hardware supports filtering. + +Signed-off-by: Phil Mucci +--- + lib/pfmlib_power8.c | 1 + + lib/pfmlib_power_priv.h | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/lib/pfmlib_power8.c b/lib/pfmlib_power8.c +index ea964b7..d30f036 100644 +--- a/lib/pfmlib_power8.c ++++ b/lib/pfmlib_power8.c +@@ -42,6 +42,7 @@ pfmlib_pmu_t power8_support={ + .pmu = PFM_PMU_POWER8, + .pme_count = LIBPFM_ARRAY_SIZE(power8_pe), + .type = PFM_PMU_TYPE_CORE, ++ .supported_plm = POWER8_PLM, + .num_cntrs = 4, + .num_fixed_cntrs = 2, + .max_encoding = 1, +diff --git a/lib/pfmlib_power_priv.h b/lib/pfmlib_power_priv.h +index 04f1437..8b5c3ac 100644 +--- a/lib/pfmlib_power_priv.h ++++ b/lib/pfmlib_power_priv.h +@@ -97,6 +97,9 @@ typedef struct { + #define PV_POWER8NVL 0x004c + #define PV_POWER8 0x004d + ++#define POWER_PLM (PFM_PLM0|PFM_PLM3) ++#define POWER8_PLM (POWER_PLM|PFM_PLMH) ++ + extern int pfm_gen_powerpc_get_event_info(void *this, int pidx, pfm_event_info_t *info); + extern int pfm_gen_powerpc_get_event_attr_info(void *this, int pidx, int umask_idx, pfm_event_attr_info_t *info); + extern int pfm_gen_powerpc_get_encoding(void *this, pfmlib_event_desc_t *e); +-- +2.9.3 + + +From a31c90ed0aecdc3da5b47611d0068448cac38e5b Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 8 Jul 2016 15:05:32 -0700 +Subject: [PATCH] fix/add offcore_response:l3_miss alias for Intel + SNB/IVB/HSW/BDW/SKL + +This patch adds a L3_MISS alias for Intel Snb/IVB/HSW/BDW/SKL processors. +L3_MISS counts local and remote misses (if any). + +Adds the corresponding validation tests. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 16 +++++++---- + lib/events/intel_hsw_events.h | 11 ++++++-- + lib/events/intel_ivb_events.h | 14 ++++++++++ + lib/events/intel_skl_events.h | 5 ++-- + lib/events/intel_snb_events.h | 21 ++++++++++++++ + tests/validate_x86.c | 64 +++++++++++++++++++++++++++++++++++++++++-- + 6 files changed, 119 insertions(+), 12 deletions(-) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index 439d3c6..c22755e 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -1806,27 +1806,33 @@ static const intel_x86_umask_t bdw_offcore_response[]={ + { .uname = "L3_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (26+8), +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (26+8), + .uequiv = "L3_MISS_LOCAL", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_MISS_LOCAL_DRAM", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (26+8), + .uequiv = "L3_MISS_LOCAL", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_MISS", +- .udesc = "Supplier: counts L3 misses", +- .ucode = 0xfULL << (26+8), ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (26+8), ++ .uequiv = "L3_MISS_LOCAL", + .grpid = 1, ++ .umodel = PFM_PMU_INTEL_BDW, ++ }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0xfULL << (26+8), ++ .uequiv = "L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", ++ .umodel = PFM_PMU_INTEL_BDW_EP, ++ .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_HOP0", + .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index e4546cf..426119b 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -1784,12 +1784,19 @@ static const intel_x86_umask_t hsw_offcore_response[]={ + .grpid = 1, + }, + { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 0x1ULL << (22+8), ++ .uequiv = "L3_MISS_LOCAL", ++ .grpid = 1, ++ .umodel = PFM_PMU_INTEL_HSW, ++ }, ++ { .uname = "L3_MISS", + .udesc = "Supplier: counts L3 misses to local or remote DRAM", +- .ucode = 0xfULL << (26+8), ++ .ucode = 0x7ULL << (27+8) | 0x1ULL << (22+8), ++ .uequiv = "L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, +- + { .uname = "SPL_HIT", + .udesc = "Supplier: counts L3 supplier hit", + .ucode = 0x1ULL << (30+8), +diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h +index cf5059e..fa29dcb 100644 +--- a/lib/events/intel_ivb_events.h ++++ b/lib/events/intel_ivb_events.h +@@ -1732,6 +1732,20 @@ static const intel_x86_umask_t ivb_offcore_response[]={ + .umodel = PFM_PMU_INTEL_IVB_EP, + .grpid = 1, + }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 0x1ULL << (22+8), ++ .grpid = 1, ++ .uequiv = "LLC_MISS_LOCAL", ++ .umodel = PFM_PMU_INTEL_IVB, ++ }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0x3ULL << (22+8), ++ .uequiv = "LLC_MISS_LOCAL:LLC_MISS_REMOTE", ++ .umodel = PFM_PMU_INTEL_IVB_EP, ++ .grpid = 1, ++ }, + { .uname = "LLC_MISS_REMOTE_DRAM", + .udesc = "Supplier: counts L3 misses to remote DRAM", + .ucode = 0xffULL << (23+8), +diff --git a/lib/events/intel_skl_events.h b/lib/events/intel_skl_events.h +index 4980164..3a107f3 100644 +--- a/lib/events/intel_skl_events.h ++++ b/lib/events/intel_skl_events.h +@@ -1471,12 +1471,13 @@ static const intel_x86_umask_t skl_offcore_response[]={ + { .uname = "L3_MISS_LOCAL", + .udesc = "Supplier: counts L3 misses to local DRAM", + .ucode = 1ULL << (26+8), +- .umodel = PFM_PMU_INTEL_SKL, + .grpid = 1, + }, + { .uname = "L3_MISS", + .udesc = "Supplier: counts L3 misses", +- .ucode = 0xfULL << (26+8), ++ .ucode = 0x1ULL << (26+8), ++ .uequiv = "L3_MISS_LOCAL", ++ .umodel = PFM_PMU_INTEL_SKL, + .grpid = 1, + }, + { .uname = "SPL_HIT", +diff --git a/lib/events/intel_snb_events.h b/lib/events/intel_snb_events.h +index 829f710..0d448b7 100644 +--- a/lib/events/intel_snb_events.h ++++ b/lib/events/intel_snb_events.h +@@ -1765,6 +1765,13 @@ static const intel_x86_umask_t snb_offcore_response[]={ + .uequiv = "LLC_MISS_LOCAL_DRAM", + .grpid = 1, + }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 0x1ULL << (22+8), ++ .grpid = 1, ++ .uequiv = "LLC_MISS_LOCAL", ++ .umodel = PFM_PMU_INTEL_SNB, ++ }, + { .uname = "LLC_MISS_REMOTE", + .udesc = "Supplier: counts L3 misses to remote DRAM", + .ucode = 0xffULL << (23+8), +@@ -1778,6 +1785,20 @@ static const intel_x86_umask_t snb_offcore_response[]={ + .grpid = 1, + .umodel = PFM_PMU_INTEL_SNB_EP, + }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0x3ULL << (22+8), ++ .uequiv = "LLC_MISS_LOCAL:LLC_MISS_REMOTE", ++ .umodel = PFM_PMU_INTEL_SNB_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0x3ULL << (22+8), ++ .uequiv = "LLC_MISS_LOCAL:LLC_MISS_REMOTE", ++ .umodel = PFM_PMU_INTEL_SNB_EP, ++ .grpid = 1, ++ }, + { .uname = "LLC_HITMESF", + .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", + .ucode = 0xfULL << (18+8), +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 09152f7..876453f 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -998,6 +998,14 @@ static const test_event_t x86_test_events[]={ + .fstr = "snb::OFFCORE_RESPONSE_0:ANY_REQUEST:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, ++ .name = "snb::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3f80408fffull, ++ .fstr = "snb::OFFCORE_RESPONSE_0:ANY_REQUEST:LLC_MISS_LOCAL_DRAM:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, + .name = "amd64_fam11h_turion::MAB_REQUESTS:DC_BUFFER_0", + .ret = PFM_ERR_NOTFOUND, + }, +@@ -1155,6 +1163,14 @@ static const test_event_t x86_test_events[]={ + .fstr = "ivb::OFFCORE_RESPONSE_0:ANY_REQUEST:LLC_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, ++ .name = "ivb::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3f80408fffull, ++ .fstr = "ivb::OFFCORE_RESPONSE_0:ANY_REQUEST:LLC_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, + .name = "ivb::DTLB_LOAD_MISSES:STLB_HIT", + .ret = PFM_SUCCESS, + .count = 1, +@@ -1777,6 +1793,15 @@ static const test_event_t x86_test_events[]={ + .fstr = "snb_ep::OFFCORE_RESPONSE_1:DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_DATA_RD:PF_RFO:PF_IFETCH:PF_LLC_DATA_RD:PF_LLC_RFO:PF_LLC_IFETCH:BUS_LOCKS:STRM_ST:OTHER:LLC_MISS_REMOTE_DRAM:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", + }, + { SRC_LINE, ++ .name = "snb_ep::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3fffc08fffull, ++ .fstr = "snb_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:LLC_MISS_LOCAL_DRAM:LLC_MISS_REMOTE_DRAM:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ ++ { SRC_LINE, + .name = "snb_ep::mem_trans_retired:latency_above_threshold", + .ret = PFM_SUCCESS, + .count = 2, +@@ -2028,6 +2053,14 @@ static const test_event_t x86_test_events[]={ + .fstr = "ivb_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_DATA_RD:PF_RFO:PF_IFETCH:PF_LLC_DATA_RD:PF_LLC_RFO:PF_LLC_IFETCH:BUS_LOCKS:STRM_ST:OTHER:LLC_MISS_REMOTE_DRAM:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0" + }, + { SRC_LINE, ++ .name = "ivb_ep::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3fffc08fffull, ++ .fstr = "ivb_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:LLC_MISS_LOCAL:LLC_MISS_REMOTE_DRAM:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0", ++ }, ++ { SRC_LINE, + .name = "hsw::mem_trans_retired:latency_above_threshold:ldlat=3:u", + .ret = PFM_SUCCESS, + .count = 2, +@@ -2167,6 +2200,14 @@ static const test_event_t x86_test_events[]={ + .fstr = "hsw::OFFCORE_RESPONSE_0:DMND_DATA_RD:L3_HITS:SNP_FWD:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "hsw::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3f80408fffull, ++ .fstr = "hsw::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, + .name = "ivb_unc_cbo0::unc_clockticks", + .ret = PFM_SUCCESS, + .count = 1, +@@ -2906,6 +2947,14 @@ static const test_event_t x86_test_events[]={ + .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD:L3_HITM:L3_HITE:L3_HITS:L3_HITF:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "hsw_ep::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3fb8408fffull, ++ .fstr = "hsw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, + .name = "bdw::mem_trans_retired:latency_above_threshold:ldlat=3:u", + .ret = PFM_SUCCESS, + .count = 2, +@@ -3046,8 +3095,8 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_SUCCESS, + .count = 2, + .codes[0] =0x5301b7, +- .codes[1] = 0x3fbc008fffull, +- .fstr = "bdw::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ .codes[1] = 0x3f84008fffull, ++ .fstr = "bdw::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "bdw::offcore_response_1:any_data", +@@ -3063,7 +3112,7 @@ static const test_event_t x86_test_events[]={ + .count = 2, + .codes[0] =0x5301b7, + .codes[1] = 0x3fbc008fffull, +- .fstr = "bdw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, + .name = "bdw_ep::offcore_response_0:L3_MISS_REMOTE_HOP0_DRAM", +@@ -3935,6 +3984,15 @@ static const test_event_t x86_test_events[]={ + .fstr = "skl::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "skl::offcore_response_0:l3_miss", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] =0x5301b7, ++ .codes[1] = 0x3f84008fffull, ++ .fstr = "skl::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ ++ { SRC_LINE, + .name = "skl::cycle_activity:0x6:c=6", + .count = 1, + .codes[0] = 0x65306a3, +-- +2.9.3 + + +From b74653d106613015632d865e5e934bf20137f3a7 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 1 Jul 2016 17:12:19 -0700 +Subject: [PATCH] add support for Intel Goldmont processor + +Enable support for Intel Goldmont processor core PMU. + +Based on official event table from download.01.org +version V6. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 1 + + lib/Makefile | 2 + + lib/events/intel_glm_events.h | 1476 +++++++++++++++++++++++++++++++++++++++++ + lib/pfmlib_common.c | 1 + + lib/pfmlib_intel_glm.c | 73 ++ + lib/pfmlib_priv.h | 1 + + 6 files changed, 1554 insertions(+) + create mode 100644 lib/events/intel_glm_events.h + create mode 100644 lib/pfmlib_intel_glm.c + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index 8921164..ba3a54f 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -297,6 +297,7 @@ typedef enum { + PFM_PMU_INTEL_SKL, /* Intel Skylake */ + + PFM_PMU_INTEL_BDW_EP, /* Intel Broadwell EP */ ++ PFM_PMU_INTEL_GLM, /* Intel Goldmont */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/Makefile b/lib/Makefile +index f035307..bd74d50 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -93,6 +93,7 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ + pfmlib_intel_hswep_unc_sbo.c \ + pfmlib_intel_knc.c \ + pfmlib_intel_slm.c \ ++ pfmlib_intel_glm.c \ + pfmlib_intel_netburst.c \ + pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ + pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ +@@ -238,6 +239,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ + events/intel_hsw_events.h \ + events/intel_bdw_events.h \ + events/intel_skl_events.h \ ++ events/intel_glm_events.h \ + pfmlib_intel_snbep_unc_priv.h \ + events/intel_snbep_unc_cbo_events.h \ + events/intel_snbep_unc_ha_events.h \ +diff --git a/lib/events/intel_glm_events.h b/lib/events/intel_glm_events.h +new file mode 100644 +index 0000000..fd0b27c +--- /dev/null ++++ b/lib/events/intel_glm_events.h +@@ -0,0 +1,1476 @@ ++/* ++ * Contributed by Stephane Eranian ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * FILE AUTOMATICALLY GENERATED from download.01.org/perfmon/GLM/Goldmont_core_V6.json ++ * PMU: glm (Intel Goldmont) ++ */ ++static const intel_x86_umask_t glm_icache[]={ ++ { .uname = "HIT", ++ .udesc = "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "MISSES", ++ .udesc = "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ACCESSES", ++ .udesc = "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture", ++ .ucode = 0x0300, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_l2_reject_xq[]={ ++ { .uname = "ALL", ++ .udesc = "Requests rejected by the XQ", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_hw_interrupts[]={ ++ { .uname = "RECEIVED", ++ .udesc = "Hardware interrupts received", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "PENDING_AND_MASKED", ++ .udesc = "Cycles pending interrupts are masked", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_br_misp_retired[]={ ++ { .uname = "ALL_BRANCHES", ++ .udesc = "Retired mispredicted branch instructions (Precise Event)", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "JCC", ++ .udesc = "Retired mispredicted conditional branch instructions (Precise Event)", ++ .ucode = 0x7e00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Retired mispredicted conditional branch instructions that were taken (Precise Event)", ++ .ucode = 0xfe00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Retired mispredicted near indirect call instructions (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Retired mispredicted near return instructions (Precise Event)", ++ .ucode = 0xf700, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Retired mispredicted instructions of near indirect Jmp or near indirect call (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_decode_restriction[]={ ++ { .uname = "PREDECODE_WRONG", ++ .udesc = "Decode restrictions due to predicting wrong instruction length", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_misalign_mem_ref[]={ ++ { .uname = "LOAD_PAGE_SPLIT", ++ .udesc = "Load uops that split a page (Precise Event)", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "STORE_PAGE_SPLIT", ++ .udesc = "Store uops that split a page (Precise Event)", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_inst_retired[]={ ++ { .uname = "ANY_P", ++ .udesc = "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_issue_slots_not_consumed[]={ ++ { .uname = "RESOURCE_FULL", ++ .udesc = "Unfilled issue slots per cycle because of a full resource in the backend", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "RECOVERY", ++ .udesc = "Unfilled issue slots per cycle to recover", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ANY", ++ .udesc = "Unfilled issue slots per cycle", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_itlb[]={ ++ { .uname = "MISS", ++ .udesc = "ITLB misses", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_longest_lat_cache[]={ ++ { .uname = "REFERENCE", ++ .udesc = "L2 cache requests", ++ .ucode = 0x4f00, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "MISS", ++ .udesc = "L2 cache request misses", ++ .ucode = 0x4100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_mem_load_uops_retired[]={ ++ { .uname = "L1_HIT", ++ .udesc = "Load uops retired that hit L1 data cache (Precise Event)", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "L1_MISS", ++ .udesc = "Load uops retired that missed L1 data cache (Precise Event)", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "L2_HIT", ++ .udesc = "Load uops retired that hit L2 (Precise Event)", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "L2_MISS", ++ .udesc = "Load uops retired that missed L2 (Precise Event)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "HITM", ++ .udesc = "Memory uop retired where cross core or cross module HITM occured (Precise Event)", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "WCB_HIT", ++ .udesc = "Loads retired that hit WCB (Precise Event)", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DRAM_HIT", ++ .udesc = "Loads retired that came from DRAM (Precise Event)", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_ld_blocks[]={ ++ { .uname = "ALL_BLOCK", ++ .udesc = "Loads blocked (Precise Event)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "UTLB_MISS", ++ .udesc = "Loads blocked because adress in not in the UTLB (Precise Event)", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "STORE_FORWARD", ++ .udesc = "Loads blocked due to store forward restriction (Precise Event)", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DATA_UNKNOWN", ++ .udesc = "Loads blocked due to store data not ready (Precise Event)", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "4K_ALIAS", ++ .udesc = "Loads blocked because address has 4k partial address false dependence (Precise Event)", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_dl1[]={ ++ { .uname = "DIRTY_EVICTION", ++ .udesc = "L1 Cache evictions for dirty data", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_cycles_div_busy[]={ ++ { .uname = "ALL", ++ .udesc = "Cycles a divider is busy", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "IDIV", ++ .udesc = "Cycles the integer divide unit is busy", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "FPDIV", ++ .udesc = "Cycles the FP divide unit is busy", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_ms_decoded[]={ ++ { .uname = "MS_ENTRY", ++ .udesc = "MS decode starts", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_uops_retired[]={ ++ { .uname = "ANY", ++ .udesc = "Uops retired (Precise Event)", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "MS", ++ .udesc = "MS uops retired (Precise Event)", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_offcore_response_1[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "DMND_RFO", ++ .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "DMND_CODE_RD", ++ .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "WB", ++ .udesc = "Request: number of writebacks (modified to exclusive) transactions", ++ .ucode = 1ULL << (3 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L2 prefetcher", ++ .ucode = 1ULL << (4 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_RFO", ++ .udesc = "Request: number of RFO requests generated by L2 prefetcher", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_READS", ++ .udesc = "Request: number of partil reads", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_WRITES", ++ .udesc = "Request: number of partial writes", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD", ++ }, ++ { .uname = "UC_CODE_READS", ++ .udesc = "Request: number of uncached code reads", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Request: number of bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "STRM_ST", ++ .udesc = "Request: number of streaming store requests for full cacheline", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SW_PF", ++ .udesc = "Request: number of cacheline requests due to software prefetch", ++ .ucode = 1ULL << (12 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_L1_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L1 data prefetcher", ++ .ucode = 1ULL << (13 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_STRM_ST", ++ .udesc = "Request: number of streaming store requests for partial cacheline", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "OTHER", ++ .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", ++ .ucode = 1ULL << (15 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "ANY_CODE_RD", ++ .udesc = "Request: combination of PF_CODE_RD | DMND_CODE_RD | PF_L3_CODE_RD", ++ .ucode = 0x24400, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD:DMND_CODE_RD:PF_L3_CODE_RD", ++ }, ++ { .uname = "ANY_IFETCH", ++ .udesc = "Request: combination of PF_CODE_RD | PF_L3_CODE_RD", ++ .ucode = 0x24000, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD:PF_L3_CODE_RD", ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Request: combination of all request umasks", ++ .ucode = 0x8fff00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:WB:PF_DATA_RD:PF_RFO:PF_CODE_RD:PF_L3_DATA_RD:PF_L3_RFO:PF_L3_CODE_RD:SPLIT_LOCK_UC_LOCK:STRM_ST:OTHER", ++ }, ++ { .uname = "ANY_DATA", ++ .udesc = "Request: combination of DMND_DATA | PF_DATA_RD | PF_L3_DATA_RD", ++ .ucode = 0x9100, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD", ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Request: combination of DMND_RFO | PF_RFO | PF_L3_RFO", ++ .ucode = 0x12200, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_RFO:PF_RFO:PF_L3_RFO", ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Response: any response type", ++ .ucode = 1ULL << (16 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "NO_SUPP", ++ .udesc = "Supplier: counts number of times supplier information is not available", ++ .ucode = 1ULL << (17 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITM", ++ .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", ++ .ucode = 1ULL << (18 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITE", ++ .udesc = "Supplier: counts L3 hits in E-state", ++ .ucode = 1ULL << (19 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITS", ++ .udesc = "Supplier: counts L3 hits in S-state", ++ .ucode = 1ULL << (20 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HIT", ++ .udesc = "Supplier: counts L3 hits in any state (M, E, S)", ++ .ucode = 7ULL << (18 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ .umodel = PFM_PMU_INTEL_GLM, ++ .uequiv = "L3_HITM:L3_HITE:L3_HITS", ++ }, ++ { .uname = "L3_MISS_LOCAL_DRAM", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (22 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_MISS_REMOTE_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote DRAM", ++ .ucode = 0x7fULL << (23 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0xffULL << (22 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ .uequiv = "L3_MISS_REMOTE_DRAM:L3_MISS_LOCAL_DRAM", ++ }, ++ { .uname = "SPL_HIT", ++ .udesc = "Supplier: counts L3 supplier hit", ++ .ucode = 1ULL << (30 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NONE", ++ .udesc = "Snoop: counts number of times no snoop-related information is available", ++ .ucode = 1ULL << (31 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NOT_NEEDED", ++ .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", ++ .ucode = 1ULL << (32 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_MISS", ++ .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", ++ .ucode = 1ULL << (33 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NO_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", ++ .ucode = 1ULL << (34 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", ++ .ucode = 1ULL << (35 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_HITM", ++ .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", ++ .ucode = 1ULL << (36 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NON_DRAM", ++ .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", ++ .ucode = 1ULL << (37 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_ANY", ++ .udesc = "Snoop: any snoop reason", ++ .ucode = 0x7ULL << (31 + 8), ++ .uflags = INTEL_X86_DFL, ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:SNP_HITM:SNP_NON_DRAM", ++ }, ++}; ++ ++static const intel_x86_umask_t glm_machine_clears[]={ ++ { .uname = "SMC", ++ .udesc = "Self-Modifying Code detected", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "MEMORY_ORDERING", ++ .udesc = "Machine cleas due to memory ordering issue", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "FP_ASSIST", ++ .udesc = "Machine clears due to FP assists", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DISAMBIGUATION", ++ .udesc = "Machine clears due to memory disambiguation", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ALL", ++ .udesc = "All machine clears", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_br_inst_retired[]={ ++ { .uname = "ALL_BRANCHES", ++ .udesc = "Retired branch instructions (Precise Event)", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "JCC", ++ .udesc = "Retired conditional branch instructions (Precise Event)", ++ .ucode = 0x7e00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Retired conditional branch instructions that were taken (Precise Event)", ++ .ucode = 0xfe00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "CALL", ++ .udesc = "Retired near call instructions (Precise Event)", ++ .ucode = 0xf900, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "REL_CALL", ++ .udesc = "Retired near relative call instructions (Precise Event)", ++ .ucode = 0xfd00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Retired near indirect call instructions (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Retired near return instructions (Precise Event)", ++ .ucode = 0xf700, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Retired instructions of near indirect Jmp or call (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "FAR_BRANCH", ++ .udesc = "Retired far branch instructions (Precise Event)", ++ .ucode = 0xbf00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_fetch_stall[]={ ++ { .uname = "ICACHE_FILL_PENDING_CYCLES", ++ .udesc = "Cycles where code-fetch is stalled and an ICache miss is outstanding. This is not the same as an ICache Miss", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_uops_not_delivered[]={ ++ { .uname = "ANY", ++ .udesc = "Uops requested but not-delivered to the back-end per cycle", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_mem_uops_retired[]={ ++ { .uname = "ALL_LOADS", ++ .udesc = "Load uops retired (Precise Event)", ++ .ucode = 0x8100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ALL_STORES", ++ .udesc = "Store uops retired (Precise Event)", ++ .ucode = 0x8200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ALL", ++ .udesc = "Memory uops retired (Precise Event)", ++ .ucode = 0x8300, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DTLB_MISS_LOADS", ++ .udesc = "Load uops retired that missed the DTLB (Precise Event)", ++ .ucode = 0x1100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DTLB_MISS_STORES", ++ .udesc = "Store uops retired that missed the DTLB (Precise Event)", ++ .ucode = 0x1200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "DTLB_MISS", ++ .udesc = "Memory uops retired that missed the DTLB (Precise Event)", ++ .ucode = 0x1300, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "LOCK_LOADS", ++ .udesc = "Locked load uops retired (Precise Event)", ++ .ucode = 0x2100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "SPLIT_LOADS", ++ .udesc = "Load uops retired that split a cache-line (Precise Event)", ++ .ucode = 0x4100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "SPLIT_STORES", ++ .udesc = "Stores uops retired that split a cache-line (Precise Event)", ++ .ucode = 0x4200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "SPLIT", ++ .udesc = "Memory uops retired that split a cache-line (Precise Event)", ++ .ucode = 0x4300, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_uops_issued[]={ ++ { .uname = "ANY", ++ .udesc = "Uops issued to the back end per cycle", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_offcore_response_0[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "DMND_RFO", ++ .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "DMND_CODE_RD", ++ .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "DMND_IFETCH", ++ .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_CODE_RD", ++ }, ++ { .uname = "WB", ++ .udesc = "Request: number of writebacks (modified to exclusive) transactions", ++ .ucode = 1ULL << (3 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", ++ .ucode = 1ULL << (4 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_RFO", ++ .udesc = "Request: number of RFO requests generated by L2 prefetchers", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_CODE_RD", ++ .udesc = "Request: number of code reads generated by L2 prefetchers", ++ .ucode = 1ULL << (6 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_IFETCH", ++ .udesc = "Request: number of code reads generated by L2 prefetchers", ++ .ucode = 1ULL << (6 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD", ++ }, ++ { .uname = "PF_L3_DATA_RD", ++ .udesc = "Request: number of L2 prefetcher requests to L3 for loads", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_L3_RFO", ++ .udesc = "Request: number of RFO requests generated by L2 prefetcher", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_L3_CODE_RD", ++ .udesc = "Request: number of L2 prefetcher requests to L3 for instruction fetches", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_L3_IFETCH", ++ .udesc = "Request: number of L2 prefetcher requests to L3 for instruction fetches", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_L3_CODE_RD", ++ }, ++ { .uname = "SPLIT_LOCK_UC_LOCK", ++ .udesc = "Request: number of bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Request: number of bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "SPLIT_LOCK_UC_LOCK", ++ }, ++ { .uname = "BUS_LOCK", ++ .udesc = "Request: number of bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "SPLIT_LOCK_UC_LOCK", ++ }, ++ { .uname = "STRM_ST", ++ .udesc = "Request: number of streaming store requests", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "OTHER", ++ .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", ++ .ucode = 1ULL << (15 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "ANY_CODE_RD", ++ .udesc = "Request: combination of PF_CODE_RD | DMND_CODE_RD | PF_L3_CODE_RD", ++ .ucode = 0x24400, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD:DMND_CODE_RD:PF_L3_CODE_RD", ++ }, ++ { .uname = "ANY_IFETCH", ++ .udesc = "Request: combination of PF_CODE_RD | PF_L3_CODE_RD", ++ .ucode = 0x24000, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_CODE_RD:PF_L3_CODE_RD", ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Request: combination of all request umasks", ++ .ucode = 0x8fff00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:WB:PF_DATA_RD:PF_RFO:PF_CODE_RD:PF_L3_DATA_RD:PF_L3_RFO:PF_L3_CODE_RD:SPLIT_LOCK_UC_LOCK:STRM_ST:OTHER", ++ }, ++ { .uname = "ANY_DATA", ++ .udesc = "Request: combination of DMND_DATA | PF_DATA_RD | PF_L3_DATA_RD", ++ .ucode = 0x9100, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD", ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Request: combination of DMND_RFO | PF_RFO | PF_L3_RFO", ++ .ucode = 0x12200, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_RFO:PF_RFO:PF_L3_RFO", ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Response: any response type", ++ .ucode = 1ULL << (16 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "NO_SUPP", ++ .udesc = "Supplier: counts number of times supplier information is not available", ++ .ucode = 1ULL << (17 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITM", ++ .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", ++ .ucode = 1ULL << (18 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITE", ++ .udesc = "Supplier: counts L3 hits in E-state", ++ .ucode = 1ULL << (19 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HITS", ++ .udesc = "Supplier: counts L3 hits in S-state", ++ .ucode = 1ULL << (20 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_HIT", ++ .udesc = "Supplier: counts L3 hits in any state (M, E, S)", ++ .ucode = 7ULL << (18 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ .umodel = PFM_PMU_INTEL_GLM, ++ .uequiv = "L3_HITM:L3_HITE:L3_HITS", ++ }, ++ { .uname = "L3_MISS_LOCAL_DRAM", ++ .udesc = "Supplier: counts L3 misses to local DRAM", ++ .ucode = 1ULL << (22 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_MISS_REMOTE_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote DRAM", ++ .ucode = 0x7fULL << (23 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L3_MISS", ++ .udesc = "Supplier: counts L3 misses to local or remote DRAM", ++ .ucode = 0xffULL << (22 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ .uequiv = "L3_MISS_REMOTE_DRAM:L3_MISS_LOCAL_DRAM", ++ }, ++ { .uname = "SPL_HIT", ++ .udesc = "Supplier: counts L3 supplier hit", ++ .ucode = 1ULL << (30 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NONE", ++ .udesc = "Snoop: counts number of times no snoop-related information is available", ++ .ucode = 1ULL << (31 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NOT_NEEDED", ++ .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", ++ .ucode = 1ULL << (32 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_MISS", ++ .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", ++ .ucode = 1ULL << (33 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NO_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", ++ .ucode = 1ULL << (34 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_FWD", ++ .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", ++ .ucode = 1ULL << (35 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_HITM", ++ .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", ++ .ucode = 1ULL << (36 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_NON_DRAM", ++ .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", ++ .ucode = 1ULL << (37 + 8), ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SNP_ANY", ++ .udesc = "Snoop: any snoop reason", ++ .ucode = 0x7ULL << (31 + 8), ++ .uflags = INTEL_X86_DFL, ++ .grpid = 2, ++ .ucntmsk = 0xffull, ++ .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:SNP_HITM:SNP_NON_DRAM", ++ }, ++}; ++ ++static const intel_x86_umask_t glm_core_reject_l2q[]={ ++ { .uname = "ALL", ++ .udesc = "Requests rejected by the L2Q ", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_page_walks[]={ ++ { .uname = "D_SIDE_CYCLES", ++ .udesc = "Duration of D-side page-walks in cycles", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "I_SIDE_CYCLES", ++ .udesc = "Duration of I-side pagewalks in cycles", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "CYCLES", ++ .udesc = "Duration of page-walks in cycles", ++ .ucode = 0x0300, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_baclears[]={ ++ { .uname = "ALL", ++ .udesc = "BACLEARs asserted for any branch type", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "RETURN", ++ .udesc = "BACLEARs asserted for return branch", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "COND", ++ .udesc = "BACLEARs asserted for conditional branch", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_umask_t glm_cpu_clk_unhalted[]={ ++ { .uname = "CORE", ++ .udesc = "Core cycles when core is not halted (Fixed event)", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0x200000000ull, ++ }, ++ { .uname = "REF_TSC", ++ .udesc = "Reference cycles when core is not halted (Fixed event)", ++ .ucode = 0x0300, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0x400000000ull, ++ }, ++ { .uname = "CORE_P", ++ .udesc = "Core cycles when core is not halted", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "REF", ++ .udesc = "Reference cycles when core is not halted", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_glm_pe[]={ ++ { .name = "ICACHE", ++ .desc = "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture", ++ .code = 0x80, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_icache), ++ .umasks = glm_icache, ++ }, ++ { .name = "L2_REJECT_XQ", ++ .desc = "Requests rejected by the XQ", ++ .code = 0x30, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_l2_reject_xq), ++ .umasks = glm_l2_reject_xq, ++ }, ++ { .name = "HW_INTERRUPTS", ++ .desc = "Hardware interrupts received", ++ .code = 0xcb, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_hw_interrupts), ++ .umasks = glm_hw_interrupts, ++ }, ++ { .name = "BR_MISP_RETIRED", ++ .desc = "Retired mispredicted branch instructions (Precise Event)", ++ .code = 0xc5, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_br_misp_retired), ++ .umasks = glm_br_misp_retired, ++ }, ++ { .name = "DECODE_RESTRICTION", ++ .desc = "Decode restrictions due to predicting wrong instruction length", ++ .code = 0xe9, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_decode_restriction), ++ .umasks = glm_decode_restriction, ++ }, ++ { .name = "MISALIGN_MEM_REF", ++ .desc = "Load uops that split a page (Precise Event)", ++ .code = 0x13, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_misalign_mem_ref), ++ .umasks = glm_misalign_mem_ref, ++ }, ++ { .name = "INST_RETIRED", ++ .desc = "Instructions retired (Precise Event)", ++ .code = 0xc0, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0x10000000full, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_inst_retired), ++ .umasks = glm_inst_retired, ++ }, ++ { .name = "INSTRUCTION_RETIRED", ++ .desc = "Number of instructions retired", ++ .code = 0xc0, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0x100000ffull, ++ .ngrp = 0, ++ }, ++ { .name = "ISSUE_SLOTS_NOT_CONSUMED", ++ .desc = "Unfilled issue slots per cycle because of a full resource in the backend", ++ .code = 0xca, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_issue_slots_not_consumed), ++ .umasks = glm_issue_slots_not_consumed, ++ }, ++ { .name = "ITLB", ++ .desc = "ITLB misses", ++ .code = 0x81, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_itlb), ++ .umasks = glm_itlb, ++ }, ++ { .name = "LONGEST_LAT_CACHE", ++ .desc = "L2 cache requests", ++ .code = 0x2e, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_longest_lat_cache), ++ .umasks = glm_longest_lat_cache, ++ }, ++ { .name = "MEM_LOAD_UOPS_RETIRED", ++ .desc = "Load uops retired that hit L1 data cache (Precise Event)", ++ .code = 0xd1, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_mem_load_uops_retired), ++ .umasks = glm_mem_load_uops_retired, ++ }, ++ { .name = "LD_BLOCKS", ++ .desc = "Loads blocked (Precise Event)", ++ .code = 0x03, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_ld_blocks), ++ .umasks = glm_ld_blocks, ++ }, ++ { .name = "DL1", ++ .desc = "L1 Cache evictions for dirty data", ++ .code = 0x51, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_dl1), ++ .umasks = glm_dl1, ++ }, ++ { .name = "CYCLES_DIV_BUSY", ++ .desc = "Cycles a divider is busy", ++ .code = 0xcd, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_cycles_div_busy), ++ .umasks = glm_cycles_div_busy, ++ }, ++ { .name = "MS_DECODED", ++ .desc = "MS decode starts", ++ .code = 0xe7, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_ms_decoded), ++ .umasks = glm_ms_decoded, ++ }, ++ { .name = "UOPS_RETIRED", ++ .desc = "Uops retired (Precise Event)", ++ .code = 0xc2, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_uops_retired), ++ .umasks = glm_uops_retired, ++ }, ++ { .name = "OFFCORE_RESPONSE_1", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .code = 0x1bb, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xffull, ++ .flags = INTEL_X86_NHM_OFFCORE, ++ .ngrp = 3, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_offcore_response_1), ++ .umasks = glm_offcore_response_1, ++ }, ++ { .name = "MACHINE_CLEARS", ++ .desc = "Self-Modifying Code detected", ++ .code = 0xc3, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_machine_clears), ++ .umasks = glm_machine_clears, ++ }, ++ { .name = "BR_INST_RETIRED", ++ .desc = "Retired branch instructions (Precise Event)", ++ .code = 0xc4, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_br_inst_retired), ++ .umasks = glm_br_inst_retired, ++ }, ++ { .name = "FETCH_STALL", ++ .desc = "Cycles where code-fetch is stalled and an ICache miss is outstanding. This is not the same as an ICache Miss", ++ .code = 0x86, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_fetch_stall), ++ .umasks = glm_fetch_stall, ++ }, ++ { .name = "UOPS_NOT_DELIVERED", ++ .desc = "Uops requested but not-delivered to the back-end per cycle", ++ .code = 0x9c, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_uops_not_delivered), ++ .umasks = glm_uops_not_delivered, ++ }, ++ { .name = "MISPREDICTED_BRANCH_RETIRED", ++ .desc = "Number of mispredicted branch instructions retired", ++ .code = 0xc5, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xffull, ++ .equiv = "BR_MISP_RETIRED:ALL_BRANCHES", ++ .ngrp = 0, ++ }, ++ { .name = "INSTRUCTIONS_RETIRED", ++ .desc = "Number of instructions retired", ++ .code = 0xc0, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0x100000ffull, ++ .equiv = "INSTRUCTION_RETIRED", ++ .ngrp = 0, ++ }, ++ { .name = "MEM_UOPS_RETIRED", ++ .desc = "Load uops retired (Precise Event)", ++ .code = 0xd0, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_mem_uops_retired), ++ .umasks = glm_mem_uops_retired, ++ }, ++ { .name = "UOPS_ISSUED", ++ .desc = "Uops issued to the back end per cycle", ++ .code = 0x0e, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_uops_issued), ++ .umasks = glm_uops_issued, ++ }, ++ { .name = "OFFCORE_RESPONSE_0", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .code = 0x1b7, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xffull, ++ .flags = INTEL_X86_NHM_OFFCORE, ++ .ngrp = 3, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_offcore_response_0), ++ .umasks = glm_offcore_response_0, ++ }, ++ { .name = "UNHALTED_REFERENCE_CYCLES", ++ .desc = "Unhalted reference cycles. Ticks at constant reference frequency", ++ .code = 0x0300, ++ .modmsk = INTEL_FIXED3_ATTRS, ++ .cntmsk = 0x40000000ull, ++ .flags = INTEL_X86_FIXED, ++ .ngrp = 0, ++ }, ++ { .name = "BRANCH_INSTRUCTIONS_RETIRED", ++ .desc = "Number of branch instructions retired", ++ .code = 0xc4, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xffull, ++ .equiv = "BR_INST_RETIRED:ALL_BRANCHES", ++ .ngrp = 0, ++ }, ++ { .name = "CORE_REJECT_L2Q", ++ .desc = "Requests rejected by the L2Q ", ++ .code = 0x31, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_core_reject_l2q), ++ .umasks = glm_core_reject_l2q, ++ }, ++ { .name = "PAGE_WALKS", ++ .desc = "Duration of D-side page-walks in cycles", ++ .code = 0x05, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_page_walks), ++ .umasks = glm_page_walks, ++ }, ++ { .name = "BACLEARS", ++ .desc = "BACLEARs asserted for any branch type", ++ .code = 0xe6, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xfull, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_baclears), ++ .umasks = glm_baclears, ++ }, ++ { .name = "CPU_CLK_UNHALTED", ++ .desc = "Core cycles when core is not halted (Fixed event)", ++ .code = 0x00, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0x60000000full, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(glm_cpu_clk_unhalted), ++ .umasks = glm_cpu_clk_unhalted, ++ }, ++ { .name = "UNHALTED_CORE_CYCLES", ++ .desc = "Core clock cycles whenever the clock signal on the specific core is running (not halted)", ++ .code = 0x3c, ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0x20000000ull, ++ .ngrp = 0, ++ }, ++}; +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 05ce1c0..4c4c376 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -125,6 +125,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &intel_snbep_unc_r3qpi1_support, + &intel_knc_support, + &intel_slm_support, ++ &intel_glm_support, + &intel_ivbep_unc_cb0_support, + &intel_ivbep_unc_cb1_support, + &intel_ivbep_unc_cb2_support, +diff --git a/lib/pfmlib_intel_glm.c b/lib/pfmlib_intel_glm.c +new file mode 100644 +index 0000000..0b8bd9d +--- /dev/null ++++ b/lib/pfmlib_intel_glm.c +@@ -0,0 +1,73 @@ ++/* ++ * pfmlib_intel_glm.c : Intel Goldmont core PMU ++ * ++ * Copyright (c) 2016 Google ++ * Contributed by Stephane Eranian ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "events/intel_glm_events.h" ++ ++static const int glm_models[] = { ++ 92, /* Goldmont */ ++ 95, /* Goldmont Denverton */ ++ 0 ++}; ++ ++static int ++pfm_intel_glm_init(void *this) ++{ ++ pfm_intel_x86_cfg.arch_version = 3; ++ return PFM_SUCCESS; ++} ++ ++pfmlib_pmu_t intel_glm_support={ ++ .desc = "Intel Goldmont", ++ .name = "glm", ++ .pmu = PFM_PMU_INTEL_GLM, ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_glm_pe), ++ .type = PFM_PMU_TYPE_CORE, ++ .num_cntrs = 4, ++ .num_fixed_cntrs = 3, ++ .max_encoding = 2, ++ .pe = intel_glm_pe, ++ .atdesc = intel_x86_mods, ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, ++ .supported_plm = INTEL_X86_PLM, ++ ++ .cpu_family = 6, ++ .cpu_models = glm_models, ++ .pmu_detect = pfm_intel_x86_model_detect, ++ .pmu_init = pfm_intel_glm_init, ++ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, ++ PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), ++ ++ .get_event_first = pfm_intel_x86_get_event_first, ++ .get_event_next = pfm_intel_x86_get_event_next, ++ .event_is_valid = pfm_intel_x86_event_is_valid, ++ .validate_table = pfm_intel_x86_validate_table, ++ .get_event_info = pfm_intel_x86_get_event_info, ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, ++}; +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 2c760ea..0d106a4 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -353,6 +353,7 @@ extern pfmlib_pmu_t intel_hswep_unc_r3qpi2_support; + extern pfmlib_pmu_t intel_hswep_unc_irp_support; + extern pfmlib_pmu_t intel_knc_support; + extern pfmlib_pmu_t intel_slm_support; ++extern pfmlib_pmu_t intel_glm_support; + extern pfmlib_pmu_t power4_support; + extern pfmlib_pmu_t ppc970_support; + extern pfmlib_pmu_t ppc970mp_support; +-- +2.9.3 + + +From c7e1e2ad413997c0cce36b040681e9e5bf6a8ef8 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 20 Jul 2016 22:16:18 -0700 +Subject: [PATCH] update Intel Goldmont support + +This patch fixes errors in the initial commit (b74653d10661) +for Intel Goldmont core PMU. Mostly the offcore_response support +was incorrect. + +This patch series adds support for the average latency cycle +feature of offcore_response on Intel Goldmont. As a consequence +a new umask/event flag called INTEL_X86_EXCL_GRP_BUT_0 is introduced. +It allows a umask to restrict which other umask can be combined. +It allows the current umask group and group 0 to be used, no other +group can be used. This feature is used to support the average latency +encodings in offcore_response on this PMU. + +The patch also adds the proper man page for libpfm_intel_glm.3 +The patch adds validation test sfor encoding Goldmont events including +the new offcore_response features, + +Signed-off-by: Stephane Eranian +--- + README | 1 + + docs/Makefile | 1 + + docs/man3/libpfm_intel_glm.3 | 97 +++++++ + lib/events/intel_glm_events.h | 605 +++++++++++++++--------------------------- + lib/pfmlib_intel_nhm_unc.c | 2 +- + lib/pfmlib_intel_x86.c | 32 ++- + lib/pfmlib_intel_x86_priv.h | 3 +- + tests/validate_x86.c | 82 ++++++ + 8 files changed, 423 insertions(+), 400 deletions(-) + create mode 100644 docs/man3/libpfm_intel_glm.3 + +diff --git a/README b/README +index 6a1bbc1..ce60d3a 100644 +--- a/README ++++ b/README +@@ -52,6 +52,7 @@ The library supports many PMUs. The current version can handle: + Intel SkyLake + Intel Silvermont + Intel Airmont ++ Intel Goldmont + Intel RAPL (energy consumption) + Intel Knights Corner + Intel architectural perfmon v1, v2, v3 +diff --git a/docs/Makefile b/docs/Makefile +index c7c82ef..873f31f 100644 +--- a/docs/Makefile ++++ b/docs/Makefile +@@ -52,6 +52,7 @@ ARCH_MAN=libpfm_intel_core.3 \ + libpfm_intel_rapl.3 \ + libpfm_intel_slm.3 \ + libpfm_intel_skl.3 \ ++ libpfm_intel_glm.3 \ + libpfm_intel_snbep_unc_cbo.3 \ + libpfm_intel_snbep_unc_ha.3 \ + libpfm_intel_snbep_unc_imc.3 \ +diff --git a/docs/man3/libpfm_intel_glm.3 b/docs/man3/libpfm_intel_glm.3 +new file mode 100644 +index 0000000..1a9338b +--- /dev/null ++++ b/docs/man3/libpfm_intel_glm.3 +@@ -0,0 +1,97 @@ ++.TH LIBPFM 3 "July, 2016" "" "Linux Programmer's Manual" ++.SH NAME ++libpfm_intel_glm - support for Intel Goldmont core PMU ++.SH SYNOPSIS ++.nf ++.B #include ++.sp ++.B PMU name: glm ++.B PMU desc: Intel Goldmont ++.sp ++.SH DESCRIPTION ++The library supports the Intel Goldmont core PMU. It should be noted that ++this PMU model only covers each core's PMU and not the socket level ++PMU. ++ ++On Goldmont, the number of generic counters is 4. There is no HyperThreading support. ++The \fBpfm_get_pmu_info()\fR function returns the maximum number of generic counters ++in \fBnum_cntrs\fr. ++ ++.SH MODIFIERS ++The following modifiers are supported on Intel Goldmont processors: ++.TP ++.B u ++Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. ++This is a boolean modifier. ++.TP ++.B k ++Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. ++This is a boolean modifier. ++.TP ++.B i ++Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR ++occurring. This is a boolean modifier ++.TP ++.B e ++Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event ++to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. ++This is a boolean modifier. ++.TP ++.B c ++Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles ++in which the number of occurrences of the event is greater or equal to the threshold. This is an integer ++modifier with values in the range [0:255]. ++ ++.SH OFFCORE_RESPONSE events ++Intel Goldmont provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. ++ ++Those events need special treatment in the performance monitoring infrastructure ++because each event uses an extra register to store some settings. Thus, in ++case multiple offcore_response events are monitored simultaneously, the kernel needs ++to manage the sharing of that extra register. ++ ++The offcore_response events are exposed as normal events by the library. The extra ++settings are exposed as regular umasks. The library takes care of encoding the ++events according to the underlying kernel interface. ++ ++On Intel Goldmont, the umasks are divided into 4 categories: request, supplier ++and snoop and average latency. Offcore_response event has two modes of operations: normal and average latency. ++In the first mode, the two offcore_respnse events operate independently of each other. The user must provide at ++least one umask for each of the first 3 categories: request, supplier, snoop. In the second mode, the two ++offcore_response events are combined to compute an average latency per request type. ++ ++For the normal mode, there is a special supplier (response) umask called \fBANY_RESPONSE\fR. When this umask ++is used then it overrides any supplier and snoop umasks. In other words, users can ++specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop ++is specified, the library defaults to using \fBANY_RESPONSE\fR. ++ ++For instance, the following are valid event selections: ++.TP ++.B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE ++.TP ++.B OFFCORE_RESPONSE_0:ANY_REQUEST ++.TP ++.B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY ++ ++.P ++But the following are illegal: ++ ++.TP ++.B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:ANY_RESPONSE ++.TP ++.B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY:ANY_RESPONSE ++.P ++In average latency mode, \fBOFFCORE_RESPONSE_0\fR must be programmed to select the request types of interest, for instance, \fBDMND_DATA_RD\fR, and the \fBOUTSTANDING\fR umask must be set and no others. the library will enforce that restriction as soon as the \fBOUTSTANDING\fR umask is used. Then \fBOFFCORE_RESPONSE_1\fR must be set with the same request types and the \fBANY_RESPONSE\fR umask. It should be noted that the library encodes events independently of each other and therefore cannot verify that the requests are matching between the two events. ++Example of average latency settings: ++.TP ++.B OFFCORE_RESPONSE_0:DMND_DATA_RD:OUTSTANDING+OFFCORE_RESPONSE_1:DMND_DATA_RD:ANY_RESPONSE ++.TP ++.B OFFCORE_RESPONSE_0:ANY_REQUEST:OUTSTANDING+OFFCORE_RESPONSE_1:ANY_REQUEST:ANY_RESPONSE ++.P ++The average latency for the request(s) is obtained by dividing the counts of \fBOFFCORE_RESPONSE_0\fR by the count of \fBOFFCORE_RESPONSE_1\fR. The ratio is expressed in core cycles. ++ ++.SH AUTHORS ++.nf ++Stephane Eranian ++.if ++.PP +diff --git a/lib/events/intel_glm_events.h b/lib/events/intel_glm_events.h +index fd0b27c..a7ed811 100644 +--- a/lib/events/intel_glm_events.h ++++ b/lib/events/intel_glm_events.h +@@ -358,7 +358,7 @@ static const intel_x86_umask_t glm_uops_retired[]={ + }, + }; + +-static const intel_x86_umask_t glm_offcore_response_1[]={ ++static const intel_x86_umask_t glm_offcore_response_0[]={ + { .uname = "DMND_DATA_RD", + .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", + .ucode = 1ULL << (0 + 8), +@@ -406,7 +406,6 @@ static const intel_x86_umask_t glm_offcore_response_1[]={ + .ucode = 1ULL << (8 + 8), + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD", + }, + { .uname = "UC_CODE_READS", + .udesc = "Request: number of uncached code reads", +@@ -420,7 +419,7 @@ static const intel_x86_umask_t glm_offcore_response_1[]={ + .grpid = 0, + .ucntmsk = 0xffull, + }, +- { .uname = "STRM_ST", ++ { .uname = "FULL_STRM_ST", + .udesc = "Request: number of streaming store requests for full cacheline", + .ucode = 1ULL << (11 + 8), + .grpid = 0, +@@ -440,51 +439,37 @@ static const intel_x86_umask_t glm_offcore_response_1[]={ + }, + { .uname = "PARTIAL_STRM_ST", + .udesc = "Request: number of streaming store requests for partial cacheline", +- .ucode = 1ULL << (11 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "OTHER", +- .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", +- .ucode = 1ULL << (15 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "ANY_CODE_RD", +- .udesc = "Request: combination of PF_CODE_RD | DMND_CODE_RD | PF_L3_CODE_RD", +- .ucode = 0x24400, ++ .ucode = 1ULL << (14 + 8), + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD:DMND_CODE_RD:PF_L3_CODE_RD", + }, +- { .uname = "ANY_IFETCH", +- .udesc = "Request: combination of PF_CODE_RD | PF_L3_CODE_RD", +- .ucode = 0x24000, ++ { .uname = "STRM_ST", ++ .udesc = "Request: number of streaming store requests for partial or full cacheline", ++ .ucode = (1ULL << (14 + 8)) | (1ULL << (11+8)), ++ .uequiv = "FULL_STRM_ST:PARTIAL_STRM_ST", + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD:PF_L3_CODE_RD", + }, + { .uname = "ANY_REQUEST", + .udesc = "Request: combination of all request umasks", +- .ucode = 0x8fff00, ++ .ucode = 1ULL << (15 + 8), + .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:WB:PF_DATA_RD:PF_RFO:PF_CODE_RD:PF_L3_DATA_RD:PF_L3_RFO:PF_L3_CODE_RD:SPLIT_LOCK_UC_LOCK:STRM_ST:OTHER", + }, +- { .uname = "ANY_DATA", +- .udesc = "Request: combination of DMND_DATA | PF_DATA_RD | PF_L3_DATA_RD", +- .ucode = 0x9100, ++ { .uname = "ANY_PF_DATA_RD", ++ .udesc = "Request: number of prefetch data reads", ++ .ucode = (1ULL << (4+8)) | (1ULL << (12+8)) | (1ULL << (13+8)), + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD", ++ .uequiv = "PF_DATA_RD:SW_PF:PF_L1_DATA_RD", + }, + { .uname = "ANY_RFO", +- .udesc = "Request: combination of DMND_RFO | PF_RFO | PF_L3_RFO", +- .ucode = 0x12200, ++ .udesc = "Request: number of RFO", ++ .ucode = (1ULL << (1+8)) | (1ULL << (5+8)), + .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "DMND_RFO:PF_RFO:PF_L3_RFO", ++ .uequiv = "DMND_RFO:PF_RFO", + }, + { .uname = "ANY_RESPONSE", + .udesc = "Response: any response type", +@@ -493,112 +478,210 @@ static const intel_x86_umask_t glm_offcore_response_1[]={ + .grpid = 1, + .ucntmsk = 0xffull, + }, +- { .uname = "NO_SUPP", +- .udesc = "Supplier: counts number of times supplier information is not available", +- .ucode = 1ULL << (17 + 8), ++ { .uname = "L2_HIT", ++ .udesc = "Supplier: counts L2 hits", ++ .ucode = 1ULL << (18 + 8), + .grpid = 1, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_HITM", +- .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", +- .ucode = 1ULL << (18 + 8), +- .grpid = 1, ++ { .uname = "L2_MISS_SNP_MISS_OR_NO_SNOOP_NEEDED", ++ .udesc = "Snoop: counts number true misses to this processor module for which a snoop request missed the other processor module or no snoop was needed", ++ .ucode = 1ULL << (33 + 8), ++ .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_HITE", +- .udesc = "Supplier: counts L3 hits in E-state", +- .ucode = 1ULL << (19 + 8), +- .grpid = 1, ++ { .uname = "L2_MISS_HIT_OTHER_CORE_NO_FWD", ++ .udesc = "Snoop: counts number of times a snoop request hits the other processor module but no data forwarding is needed", ++ .ucode = 1ULL << (34 + 8), ++ .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_HITS", +- .udesc = "Supplier: counts L3 hits in S-state", +- .ucode = 1ULL << (20 + 8), +- .grpid = 1, ++ { .uname = "L2_MISS_HITM_OTHER_CORE", ++ .udesc = "Snoop: counts number of times a snoop request hits in the other processor module or other core's L1 where a modified copy (M-state) is found", ++ .ucode = 1ULL << (36 + 8), ++ .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_HIT", +- .udesc = "Supplier: counts L3 hits in any state (M, E, S)", +- .ucode = 7ULL << (18 + 8), +- .grpid = 1, ++ { .uname = "L2_MISS_SNP_NON_DRAM", ++ .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", ++ .ucode = 1ULL << (37 + 8), ++ .grpid = 2, + .ucntmsk = 0xffull, +- .umodel = PFM_PMU_INTEL_GLM, +- .uequiv = "L3_HITM:L3_HITE:L3_HITS", + }, +- { .uname = "L3_MISS_LOCAL_DRAM", +- .udesc = "Supplier: counts L3 misses to local DRAM", +- .ucode = 1ULL << (22 + 8), +- .grpid = 1, ++ { .uname = "L2_MISS_SNP_ANY", ++ .udesc = "Snoop: any snoop reason", ++ .ucode = 0x1bULL << (33 + 8), ++ .uflags = INTEL_X86_DFL, ++ .uequiv = "L2_MISS_SNP_MISS_OR_NO_SNOOP_NEEDED:L2_MISS_HIT_OTHER_CORE_NO_FWD:L2_MISS_HITM_OTHER_CORE:L2_MISS_SNP_NON_DRAM", ++ .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_MISS_REMOTE_DRAM", +- .udesc = "Supplier: counts L3 misses to remote DRAM", +- .ucode = 0x7fULL << (23 + 8), +- .grpid = 1, ++ { .uname = "OUTSTANDING", ++ .udesc = "Outstanding request: counts weighted cycles of outstanding offcore requests of the request type specified in the bits 15:0 of offcore_response from the time the XQ receives the request and any response received. Bits 37:16 must be set to 0. This is only available for offcore_response_0", ++ .ucode = 1ULL << (38 + 8), ++ .uflags = INTEL_X86_DFL | INTEL_X86_EXCL_GRP_BUT_0, /* can only be combined with request type bits (grpid = 0) */ ++ .grpid = 3, + .ucntmsk = 0xffull, + }, +- { .uname = "L3_MISS", +- .udesc = "Supplier: counts L3 misses to local or remote DRAM", +- .ucode = 0xffULL << (22 + 8), +- .grpid = 1, ++}; ++ ++static const intel_x86_umask_t glm_offcore_response_1[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, + .ucntmsk = 0xffull, +- .uequiv = "L3_MISS_REMOTE_DRAM:L3_MISS_LOCAL_DRAM", + }, +- { .uname = "SPL_HIT", +- .udesc = "Supplier: counts L3 supplier hit", +- .ucode = 1ULL << (30 + 8), +- .grpid = 1, ++ { .uname = "DMND_RFO", ++ .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_NONE", +- .udesc = "Snoop: counts number of times no snoop-related information is available", +- .ucode = 1ULL << (31 + 8), +- .grpid = 2, ++ { .uname = "DMND_CODE_RD", ++ .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_NOT_NEEDED", +- .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", +- .ucode = 1ULL << (32 + 8), +- .grpid = 2, ++ { .uname = "WB", ++ .udesc = "Request: number of writebacks (modified to exclusive) transactions", ++ .ucode = 1ULL << (3 + 8), ++ .grpid = 0, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_MISS", +- .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", +- .ucode = 1ULL << (33 + 8), +- .grpid = 2, ++ { .uname = "PF_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L2 prefetcher", ++ .ucode = 1ULL << (4 + 8), ++ .grpid = 0, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_NO_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", +- .ucode = 1ULL << (34 + 8), ++ { .uname = "PF_RFO", ++ .udesc = "Request: number of RFO requests generated by L2 prefetcher", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_READS", ++ .udesc = "Request: number of partil reads", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_WRITES", ++ .udesc = "Request: number of partial writes", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "UC_CODE_READS", ++ .udesc = "Request: number of uncached code reads", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Request: number of bus lock and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "FULL_STRM_ST", ++ .udesc = "Request: number of streaming store requests for full cacheline", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "SW_PF", ++ .udesc = "Request: number of cacheline requests due to software prefetch", ++ .ucode = 1ULL << (12 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PF_L1_DATA_RD", ++ .udesc = "Request: number of data cacheline reads generated by L1 data prefetcher", ++ .ucode = 1ULL << (13 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "PARTIAL_STRM_ST", ++ .udesc = "Request: number of streaming store requests for partial cacheline", ++ .ucode = 1ULL << (14 + 8), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "STRM_ST", ++ .udesc = "Request: number of streaming store requests for partial or full cacheline", ++ .ucode = (1ULL << (14 + 8)) | (1ULL << (11+8)), ++ .uequiv = "FULL_STRM_ST:PARTIAL_STRM_ST", ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Request: combination of all request umasks", ++ .ucode = 1ULL << (15 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "ANY_PF_DATA_RD", ++ .udesc = "Request: number of prefetch data reads", ++ .ucode = (1ULL << (4+8)) | (1ULL << (12+8)) | (1ULL << (13+8)), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "PF_DATA_RD:SW_PF:PF_L1_DATA_RD", ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Request: number of RFO", ++ .ucode = (1ULL << (1+8)) | (1ULL << (5+8)), ++ .grpid = 0, ++ .ucntmsk = 0xffull, ++ .uequiv = "DMND_RFO:PF_RFO", ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Response: any response type", ++ .ucode = 1ULL << (16 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L2_HIT", ++ .udesc = "Supplier: counts L2 hits", ++ .ucode = 1ULL << (18 + 8), ++ .grpid = 1, ++ .ucntmsk = 0xffull, ++ }, ++ { .uname = "L2_MISS_SNP_MISS_OR_NO_SNOOP_NEEDED", ++ .udesc = "Snoop: counts number true misses to this processor module for which a snoop request missed the other processor module or no snoop was needed", ++ .ucode = 1ULL << (33 + 8), + .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", +- .ucode = 1ULL << (35 + 8), ++ { .uname = "L2_MISS_HIT_OTHER_CORE_NO_FWD", ++ .udesc = "Snoop: counts number of times a snoop request hits the other processor module but no data forwarding is needed", ++ .ucode = 1ULL << (34 + 8), + .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_HITM", +- .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", ++ { .uname = "L2_MISS_HITM_OTHER_CORE", ++ .udesc = "Snoop: counts number of times a snoop request hits in the other processor module or other core's L1 where a modified copy (M-state) is found", + .ucode = 1ULL << (36 + 8), + .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_NON_DRAM", ++ { .uname = "L2_MISS_SNP_NON_DRAM", + .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", + .ucode = 1ULL << (37 + 8), + .grpid = 2, + .ucntmsk = 0xffull, + }, +- { .uname = "SNP_ANY", ++ { .uname = "L2_MISS_SNP_ANY", + .udesc = "Snoop: any snoop reason", +- .ucode = 0x7ULL << (31 + 8), ++ .ucode = 0xfULL << (33 + 8), + .uflags = INTEL_X86_DFL, + .grpid = 2, + .ucntmsk = 0xffull, +- .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:SNP_HITM:SNP_NON_DRAM", ++ .uequiv = "L2_MISS_SNP_MISS_OR_NO_SNOOP_NEEDED:L2_MISS_HIT_OTHER_CORE_NO_FWD:L2_MISS_HITM_OTHER_CORE:L2_MISS_SNP_NON_DRAM", + }, + }; + +@@ -809,272 +892,6 @@ static const intel_x86_umask_t glm_uops_issued[]={ + }, + }; + +-static const intel_x86_umask_t glm_offcore_response_0[]={ +- { .uname = "DMND_DATA_RD", +- .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", +- .ucode = 1ULL << (0 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "DMND_RFO", +- .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", +- .ucode = 1ULL << (1 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "DMND_CODE_RD", +- .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", +- .ucode = 1ULL << (2 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "DMND_IFETCH", +- .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", +- .ucode = 1ULL << (2 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "DMND_CODE_RD", +- }, +- { .uname = "WB", +- .udesc = "Request: number of writebacks (modified to exclusive) transactions", +- .ucode = 1ULL << (3 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_DATA_RD", +- .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", +- .ucode = 1ULL << (4 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_RFO", +- .udesc = "Request: number of RFO requests generated by L2 prefetchers", +- .ucode = 1ULL << (5 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_CODE_RD", +- .udesc = "Request: number of code reads generated by L2 prefetchers", +- .ucode = 1ULL << (6 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_IFETCH", +- .udesc = "Request: number of code reads generated by L2 prefetchers", +- .ucode = 1ULL << (6 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD", +- }, +- { .uname = "PF_L3_DATA_RD", +- .udesc = "Request: number of L2 prefetcher requests to L3 for loads", +- .ucode = 1ULL << (7 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_L3_RFO", +- .udesc = "Request: number of RFO requests generated by L2 prefetcher", +- .ucode = 1ULL << (8 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_L3_CODE_RD", +- .udesc = "Request: number of L2 prefetcher requests to L3 for instruction fetches", +- .ucode = 1ULL << (9 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "PF_L3_IFETCH", +- .udesc = "Request: number of L2 prefetcher requests to L3 for instruction fetches", +- .ucode = 1ULL << (9 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "PF_L3_CODE_RD", +- }, +- { .uname = "SPLIT_LOCK_UC_LOCK", +- .udesc = "Request: number of bus lock and split lock requests", +- .ucode = 1ULL << (10 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "BUS_LOCKS", +- .udesc = "Request: number of bus lock and split lock requests", +- .ucode = 1ULL << (10 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "SPLIT_LOCK_UC_LOCK", +- }, +- { .uname = "BUS_LOCK", +- .udesc = "Request: number of bus lock and split lock requests", +- .ucode = 1ULL << (10 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "SPLIT_LOCK_UC_LOCK", +- }, +- { .uname = "STRM_ST", +- .udesc = "Request: number of streaming store requests", +- .ucode = 1ULL << (11 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "OTHER", +- .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", +- .ucode = 1ULL << (15 + 8), +- .grpid = 0, +- .ucntmsk = 0xffull, +- }, +- { .uname = "ANY_CODE_RD", +- .udesc = "Request: combination of PF_CODE_RD | DMND_CODE_RD | PF_L3_CODE_RD", +- .ucode = 0x24400, +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD:DMND_CODE_RD:PF_L3_CODE_RD", +- }, +- { .uname = "ANY_IFETCH", +- .udesc = "Request: combination of PF_CODE_RD | PF_L3_CODE_RD", +- .ucode = 0x24000, +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "PF_CODE_RD:PF_L3_CODE_RD", +- }, +- { .uname = "ANY_REQUEST", +- .udesc = "Request: combination of all request umasks", +- .ucode = 0x8fff00, +- .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:WB:PF_DATA_RD:PF_RFO:PF_CODE_RD:PF_L3_DATA_RD:PF_L3_RFO:PF_L3_CODE_RD:SPLIT_LOCK_UC_LOCK:STRM_ST:OTHER", +- }, +- { .uname = "ANY_DATA", +- .udesc = "Request: combination of DMND_DATA | PF_DATA_RD | PF_L3_DATA_RD", +- .ucode = 0x9100, +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD", +- }, +- { .uname = "ANY_RFO", +- .udesc = "Request: combination of DMND_RFO | PF_RFO | PF_L3_RFO", +- .ucode = 0x12200, +- .grpid = 0, +- .ucntmsk = 0xffull, +- .uequiv = "DMND_RFO:PF_RFO:PF_L3_RFO", +- }, +- { .uname = "ANY_RESPONSE", +- .udesc = "Response: any response type", +- .ucode = 1ULL << (16 + 8), +- .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "NO_SUPP", +- .udesc = "Supplier: counts number of times supplier information is not available", +- .ucode = 1ULL << (17 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_HITM", +- .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", +- .ucode = 1ULL << (18 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_HITE", +- .udesc = "Supplier: counts L3 hits in E-state", +- .ucode = 1ULL << (19 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_HITS", +- .udesc = "Supplier: counts L3 hits in S-state", +- .ucode = 1ULL << (20 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_HIT", +- .udesc = "Supplier: counts L3 hits in any state (M, E, S)", +- .ucode = 7ULL << (18 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- .umodel = PFM_PMU_INTEL_GLM, +- .uequiv = "L3_HITM:L3_HITE:L3_HITS", +- }, +- { .uname = "L3_MISS_LOCAL_DRAM", +- .udesc = "Supplier: counts L3 misses to local DRAM", +- .ucode = 1ULL << (22 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_MISS_REMOTE_DRAM", +- .udesc = "Supplier: counts L3 misses to remote DRAM", +- .ucode = 0x7fULL << (23 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "L3_MISS", +- .udesc = "Supplier: counts L3 misses to local or remote DRAM", +- .ucode = 0xffULL << (22 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- .uequiv = "L3_MISS_REMOTE_DRAM:L3_MISS_LOCAL_DRAM", +- }, +- { .uname = "SPL_HIT", +- .udesc = "Supplier: counts L3 supplier hit", +- .ucode = 1ULL << (30 + 8), +- .grpid = 1, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_NONE", +- .udesc = "Snoop: counts number of times no snoop-related information is available", +- .ucode = 1ULL << (31 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_NOT_NEEDED", +- .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", +- .ucode = 1ULL << (32 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_MISS", +- .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", +- .ucode = 1ULL << (33 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_NO_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", +- .ucode = 1ULL << (34 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_FWD", +- .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", +- .ucode = 1ULL << (35 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_HITM", +- .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", +- .ucode = 1ULL << (36 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_NON_DRAM", +- .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", +- .ucode = 1ULL << (37 + 8), +- .grpid = 2, +- .ucntmsk = 0xffull, +- }, +- { .uname = "SNP_ANY", +- .udesc = "Snoop: any snoop reason", +- .ucode = 0x7ULL << (31 + 8), +- .uflags = INTEL_X86_DFL, +- .grpid = 2, +- .ucntmsk = 0xffull, +- .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:SNP_HITM:SNP_NON_DRAM", +- }, +-}; +- + static const intel_x86_umask_t glm_core_reject_l2q[]={ + { .uname = "ALL", + .udesc = "Requests rejected by the L2Q ", +@@ -1168,7 +985,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "ICACHE", + .desc = "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture", + .code = 0x80, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_icache), +@@ -1177,7 +994,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "L2_REJECT_XQ", + .desc = "Requests rejected by the XQ", + .code = 0x30, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_l2_reject_xq), +@@ -1186,7 +1003,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "HW_INTERRUPTS", + .desc = "Hardware interrupts received", + .code = 0xcb, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_hw_interrupts), +@@ -1195,7 +1012,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "BR_MISP_RETIRED", + .desc = "Retired mispredicted branch instructions (Precise Event)", + .code = 0xc5, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1205,7 +1022,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "DECODE_RESTRICTION", + .desc = "Decode restrictions due to predicting wrong instruction length", + .code = 0xe9, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_decode_restriction), +@@ -1214,7 +1031,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MISALIGN_MEM_REF", + .desc = "Load uops that split a page (Precise Event)", + .code = 0x13, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1224,7 +1041,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "INST_RETIRED", + .desc = "Instructions retired (Precise Event)", + .code = 0xc0, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x10000000full, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1234,14 +1051,14 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "INSTRUCTION_RETIRED", + .desc = "Number of instructions retired", + .code = 0xc0, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x100000ffull, + .ngrp = 0, + }, + { .name = "ISSUE_SLOTS_NOT_CONSUMED", + .desc = "Unfilled issue slots per cycle because of a full resource in the backend", + .code = 0xca, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_issue_slots_not_consumed), +@@ -1250,7 +1067,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "ITLB", + .desc = "ITLB misses", + .code = 0x81, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_itlb), +@@ -1259,7 +1076,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "LONGEST_LAT_CACHE", + .desc = "L2 cache requests", + .code = 0x2e, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_longest_lat_cache), +@@ -1268,7 +1085,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MEM_LOAD_UOPS_RETIRED", + .desc = "Load uops retired that hit L1 data cache (Precise Event)", + .code = 0xd1, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1278,7 +1095,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "LD_BLOCKS", + .desc = "Loads blocked (Precise Event)", + .code = 0x03, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1288,7 +1105,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "DL1", + .desc = "L1 Cache evictions for dirty data", + .code = 0x51, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_dl1), +@@ -1297,7 +1114,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "CYCLES_DIV_BUSY", + .desc = "Cycles a divider is busy", + .code = 0xcd, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_cycles_div_busy), +@@ -1306,7 +1123,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MS_DECODED", + .desc = "MS decode starts", + .code = 0xe7, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_ms_decoded), +@@ -1315,7 +1132,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "UOPS_RETIRED", + .desc = "Uops retired (Precise Event)", + .code = 0xc2, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1324,8 +1141,8 @@ static const intel_x86_entry_t intel_glm_pe[]={ + }, + { .name = "OFFCORE_RESPONSE_1", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", +- .code = 0x1bb, +- .modmsk = INTEL_V4_ATTRS, ++ .code = 0x2b7, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xffull, + .flags = INTEL_X86_NHM_OFFCORE, + .ngrp = 3, +@@ -1335,7 +1152,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MACHINE_CLEARS", + .desc = "Self-Modifying Code detected", + .code = 0xc3, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_machine_clears), +@@ -1344,7 +1161,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "BR_INST_RETIRED", + .desc = "Retired branch instructions (Precise Event)", + .code = 0xc4, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1354,7 +1171,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "FETCH_STALL", + .desc = "Cycles where code-fetch is stalled and an ICache miss is outstanding. This is not the same as an ICache Miss", + .code = 0x86, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_fetch_stall), +@@ -1363,7 +1180,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "UOPS_NOT_DELIVERED", + .desc = "Uops requested but not-delivered to the back-end per cycle", + .code = 0x9c, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_uops_not_delivered), +@@ -1372,7 +1189,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MISPREDICTED_BRANCH_RETIRED", + .desc = "Number of mispredicted branch instructions retired", + .code = 0xc5, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xffull, + .equiv = "BR_MISP_RETIRED:ALL_BRANCHES", + .ngrp = 0, +@@ -1380,7 +1197,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "INSTRUCTIONS_RETIRED", + .desc = "Number of instructions retired", + .code = 0xc0, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x100000ffull, + .equiv = "INSTRUCTION_RETIRED", + .ngrp = 0, +@@ -1388,7 +1205,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "MEM_UOPS_RETIRED", + .desc = "Load uops retired (Precise Event)", + .code = 0xd0, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .flags = INTEL_X86_PEBS, + .ngrp = 1, +@@ -1398,7 +1215,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "UOPS_ISSUED", + .desc = "Uops issued to the back end per cycle", + .code = 0x0e, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_uops_issued), +@@ -1407,17 +1224,17 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "OFFCORE_RESPONSE_0", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", + .code = 0x1b7, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xffull, + .flags = INTEL_X86_NHM_OFFCORE, +- .ngrp = 3, ++ .ngrp = 4, + .numasks = LIBPFM_ARRAY_SIZE(glm_offcore_response_0), + .umasks = glm_offcore_response_0, + }, + { .name = "UNHALTED_REFERENCE_CYCLES", + .desc = "Unhalted reference cycles. Ticks at constant reference frequency", + .code = 0x0300, +- .modmsk = INTEL_FIXED3_ATTRS, ++ .modmsk = INTEL_FIXED2_ATTRS, + .cntmsk = 0x40000000ull, + .flags = INTEL_X86_FIXED, + .ngrp = 0, +@@ -1425,7 +1242,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "BRANCH_INSTRUCTIONS_RETIRED", + .desc = "Number of branch instructions retired", + .code = 0xc4, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xffull, + .equiv = "BR_INST_RETIRED:ALL_BRANCHES", + .ngrp = 0, +@@ -1433,7 +1250,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "CORE_REJECT_L2Q", + .desc = "Requests rejected by the L2Q ", + .code = 0x31, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_core_reject_l2q), +@@ -1442,7 +1259,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "PAGE_WALKS", + .desc = "Duration of D-side page-walks in cycles", + .code = 0x05, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_page_walks), +@@ -1451,7 +1268,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "BACLEARS", + .desc = "BACLEARs asserted for any branch type", + .code = 0xe6, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_baclears), +@@ -1460,7 +1277,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "CPU_CLK_UNHALTED", + .desc = "Core cycles when core is not halted (Fixed event)", + .code = 0x00, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x60000000full, + .ngrp = 1, + .numasks = LIBPFM_ARRAY_SIZE(glm_cpu_clk_unhalted), +@@ -1469,7 +1286,7 @@ static const intel_x86_entry_t intel_glm_pe[]={ + { .name = "UNHALTED_CORE_CYCLES", + .desc = "Core clock cycles whenever the clock signal on the specific core is running (not halted)", + .code = 0x3c, +- .modmsk = INTEL_V4_ATTRS, ++ .modmsk = INTEL_V2_ATTRS, + .cntmsk = 0x20000000ull, + .ngrp = 0, + }, +diff --git a/lib/pfmlib_intel_nhm_unc.c b/lib/pfmlib_intel_nhm_unc.c +index fbf1b19..4c27b07 100644 +--- a/lib/pfmlib_intel_nhm_unc.c ++++ b/lib/pfmlib_intel_nhm_unc.c +@@ -213,7 +213,7 @@ pfm_nhm_unc_get_encoding(void *this, pfmlib_event_desc_t *e) + */ + if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) { + ugrpmsk ^= grpmsk; +- ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask, -1); ++ ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask, -1, -1); + if (ret != PFM_SUCCESS) + return ret; + } +diff --git a/lib/pfmlib_intel_x86.c b/lib/pfmlib_intel_x86.c +index 031de0d..b698144 100644 +--- a/lib/pfmlib_intel_x86.c ++++ b/lib/pfmlib_intel_x86.c +@@ -200,13 +200,14 @@ int + pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, + unsigned int msk, + uint64_t *umask, +- unsigned int max_grpid) ++ unsigned int max_grpid, ++ int excl_grp_but_0) + { + const intel_x86_entry_t *pe = this_pe(this); + const intel_x86_entry_t *ent; + unsigned int i; + int j, k, added, skip; +- int idx; ++ int idx, grpid; + + k = e->nattrs; + ent = pe+e->event; +@@ -242,6 +243,12 @@ pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, + skip = 1; + continue; + } ++ grpid = ent->umasks[idx].grpid; ++ ++ if (excl_grp_but_0 != -1 && grpid != 0 && excl_grp_but_0 != grpid) { ++ skip = 1; ++ continue; ++ } + + /* umask is default for group */ + if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) { +@@ -373,6 +380,7 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + unsigned int grpid; + int ldlat = 0, ldlat_um = 0; + int fe_thr= 0, fe_thr_um = 0; ++ int excl_grp_but_0 = -1; + int grpcounts[INTEL_X86_NUM_GRP]; + int ncombo[INTEL_X86_NUM_GRP]; + +@@ -425,6 +433,8 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_GT)) + max_grpid = grpid; + ++ if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_BUT_0)) ++ excl_grp_but_0 = grpid; + /* + * upper layer has removed duplicates + * so if we come here more than once, it is for two +@@ -580,11 +590,25 @@ pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) + */ + if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) { + ugrpmsk ^= grpmsk; +- ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask2, max_grpid); ++ ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask2, max_grpid, excl_grp_but_0); + if (ret != PFM_SUCCESS) + return ret; + } +- ++ /* ++ * GRP_EXCL_BUT_0 groups require at least one bit set in grpid = 0 and one in theirs ++ * applies to OFFCORE_RESPONSE umasks on some processors (e.g., Goldmont) ++ */ ++ DPRINT("excl_grp_but_0=%d\n", excl_grp_but_0); ++ if (excl_grp_but_0 != -1) { ++ /* skip group 0, because it is authorized */ ++ for (k = 1; k < INTEL_X86_NUM_GRP; k++) { ++ DPRINT("grpcounts[%d]=%d\n", k, grpcounts[k]); ++ if (grpcounts[k] && k != excl_grp_but_0) { ++ DPRINT("GRP_EXCL_BUT_0 but grpcounts[%d]=%d\n", k, grpcounts[k]); ++ return PFM_ERR_FEATCOMB; ++ } ++ } ++ } + ret = intel_x86_check_pebs(this, e); + if (ret != PFM_SUCCESS) + return ret; +diff --git a/lib/pfmlib_intel_x86_priv.h b/lib/pfmlib_intel_x86_priv.h +index 74aab3e..963b41a 100644 +--- a/lib/pfmlib_intel_x86_priv.h ++++ b/lib/pfmlib_intel_x86_priv.h +@@ -89,6 +89,7 @@ typedef struct { + #define INTEL_X86_GRP_DFL_NONE 0x0800 /* ok if umask group defaults to no umask */ + #define INTEL_X86_FRONTEND 0x1000 /* Skylake Precise frontend */ + #define INTEL_X86_FETHR 0x2000 /* precise frontend umask requires threshold modifier (fe_thres) */ ++#define INTEL_X86_EXCL_GRP_BUT_0 0x4000 /* exclude all groups except self and grpid = 0 */ + + typedef union pfm_intel_x86_reg { + unsigned long long val; /* complete register value */ +@@ -325,7 +326,7 @@ intel_x86_attr2umask(void *this, int pidx, int attr_idx) + } + + extern int pfm_intel_x86_detect(void); +-extern int pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, unsigned int msk, uint64_t *umask, unsigned int max_grpid); ++extern int pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, unsigned int msk, uint64_t *umask, unsigned int max_grpid, int excl_grp_but_0); + + extern int pfm_intel_x86_event_is_valid(void *this, int pidx); + extern int pfm_intel_x86_get_encoding(void *this, pfmlib_event_desc_t *e); +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 876453f..3e6f408 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4260,6 +4260,88 @@ static const test_event_t x86_test_events[]={ + .name = "skl::offcore_response_1:0x7fffffffff", + .ret = PFM_ERR_ATTR, + }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_1:any_request", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5302b7, ++ .codes[1] = 0x18000, ++ .fstr = "glm::OFFCORE_RESPONSE_1:ANY_REQUEST:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_1:any_rfo", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5302b7, ++ .codes[1] = 0x10022, ++ .fstr = "glm::OFFCORE_RESPONSE_1:DMND_RFO:PF_RFO:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_1:any_rfo:l2_miss_snp_miss_or_no_snoop_needed", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5302b7, ++ .codes[1] = 0x200010022ull, ++ .fstr = "glm::OFFCORE_RESPONSE_1:DMND_RFO:PF_RFO:ANY_RESPONSE:L2_MISS_SNP_MISS_OR_NO_SNOOP_NEEDED:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_0:strm_st", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x14800, ++ .fstr = "glm::OFFCORE_RESPONSE_0:FULL_STRM_ST:PARTIAL_STRM_ST:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_1:dmnd_data_rd:outstanding", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_1:dmnd_data_rd:l2_hit:outstanding", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_0:strm_st:outstanding", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x4000004800ull, ++ .fstr = "glm::OFFCORE_RESPONSE_0:FULL_STRM_ST:PARTIAL_STRM_ST:OUTSTANDING:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_0:outstanding:dmnd_data_rd:u", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5101b7, ++ .codes[1] = 0x4000000001ull, ++ .fstr = "glm::OFFCORE_RESPONSE_0:DMND_DATA_RD:OUTSTANDING:k=0:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::offcore_response_0:strm_st:l2_hit:outstanding", ++ .ret = PFM_ERR_FEATCOMB, ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x5301ca, ++ .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k:c=1:i", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x1d201ca, ++ .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=0:e=0:i=1:c=1", ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:u:t", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:u:intxcp", ++ .ret = PFM_ERR_ATTR, ++ }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) + +-- +2.9.3 + + +From f7d50753d0e0148d00060e191c29afdd9d39d146 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 20 Jul 2016 22:23:36 -0700 +Subject: [PATCH] fix Intel Broadwell-EP OFFCORE_RESPONSE:L3_MISS_REMOTE + +This encoding of the umask was off by one bit for +L3_MISS_REMOTE and L3_MISS_REMOTE_DRAM (alias). + +Also adds the uequiv alias for the umask. + +Also adds a validation test for the umask. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 5 +++-- + tests/validate_x86.c | 10 +++++++++- + 2 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index c22755e..6be3ac9 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -1875,13 +1875,14 @@ static const intel_x86_umask_t bdw_offcore_response[]={ + }, + { .uname = "L3_MISS_REMOTE", + .udesc = "Supplier: counts L3 misses to remote node", +- .ucode = 0x7ULL << (26+8), ++ .uequiv = "L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", ++ .ucode = 0x7ULL << (27+8), + .umodel = PFM_PMU_INTEL_BDW_EP, + .grpid = 1, + }, + { .uname = "L3_MISS_REMOTE_DRAM", + .udesc = "Supplier: counts L3 misses to remote node", +- .ucode = 0x7ULL << (26+8), ++ .ucode = 0x7ULL << (27+8), + .uequiv = "L3_MISS_REMOTE", + .umodel = PFM_PMU_INTEL_BDW_EP, + .grpid = 1, +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 3e6f408..4096372 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -3110,11 +3110,19 @@ static const test_event_t x86_test_events[]={ + .name = "bdw_ep::offcore_response_0:l3_miss", + .ret = PFM_SUCCESS, + .count = 2, +- .codes[0] =0x5301b7, ++ .codes[0] = 0x5301b7, + .codes[1] = 0x3fbc008fffull, + .fstr = "bdw_ep::OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, + { SRC_LINE, ++ .name = "bdw_ep::offcore_response_1:l3_miss_remote", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301bb, ++ .codes[1] = 0x3fb8008fffull, ++ .fstr = "bdw_ep::OFFCORE_RESPONSE_1:ANY_REQUEST:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, + .name = "bdw_ep::offcore_response_0:L3_MISS_REMOTE_HOP0_DRAM", + .ret = PFM_SUCCESS, + .count = 2, +-- +2.9.3 + + +From a3012f86d5f96ca814585b181f830861774f29da Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 20 Jul 2016 22:28:01 -0700 +Subject: [PATCH] add Intel Haswell-EP alias for offcore_response remote L3 + miss + +This patch adds offcore_response_*:L3_MISS_REMOTE and L3_MISS_REMOTE_DRAM +umasks to be consistent with Intel Broadwell. + +Also adds a validation test for it. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 14 ++++++++++++++ + tests/validate_x86.c | 8 ++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index 426119b..2a17e47 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -1797,6 +1797,20 @@ static const intel_x86_umask_t hsw_offcore_response[]={ + .umodel = PFM_PMU_INTEL_HSW_EP, + .grpid = 1, + }, ++ { .uname = "L3_MISS_REMOTE", ++ .udesc = "Supplier: counts L3 misses to remote node", ++ .ucode = 0x7ULL << (27+8), ++ .uequiv = "L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", ++ .umodel = PFM_PMU_INTEL_HSW_EP, ++ .grpid = 1, ++ }, ++ { .uname = "L3_MISS_REMOTE_DRAM", ++ .udesc = "Supplier: counts L3 misses to remote node", ++ .ucode = 0x7ULL << (27+8), ++ .uequiv = "L3_MISS_REMOTE", ++ .umodel = PFM_PMU_INTEL_HSW_EP, ++ .grpid = 1, ++ }, + { .uname = "SPL_HIT", + .udesc = "Supplier: counts L3 supplier hit", + .ucode = 0x1ULL << (30+8), +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 4096372..0247c3e 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -2922,6 +2922,14 @@ static const test_event_t x86_test_events[]={ + .codes[1] = 0x3f80400091ull, + .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + }, ++ { SRC_LINE, ++ .name = "hsw_ep::offcore_response_0:any_data:L3_miss_remote", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x3fb8000091ull, ++ .fstr = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, + { SRC_LINE, /* here SNP_ANY gets expanded when passed on the cmdline, but not when added automatically by library */ + .name = "hsw_ep::OFFCORE_RESPONSE_0:DMND_DATA_RD:PF_DATA_RD:PF_L3_DATA_RD:L3_MISS_LOCAL:SNP_ANY:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", + .ret = PFM_SUCCESS, +-- +2.9.3 + + +From 1d57dbe8dbc4864ca501b6f3666c228adbee8910 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 22 Jul 2016 12:53:32 -0700 +Subject: [PATCH] fix error in pfmlib_is_blacklisted_pmu() with some compilers + +Some compilers or compiler options do not like: + + char buffer[strlen(pfm_cfg.blacklist_pmus) + 1]; + +So revert to a more classic style declaration with heap +allocation via strdup(); + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_common.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 4c4c376..6297fdd 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -676,6 +676,9 @@ pfmlib_match_forced_pmu(const char *name) + static int + pfmlib_is_blacklisted_pmu(pfmlib_pmu_t *p) + { ++ char *q, *buffer; ++ int ret = 1; ++ + if (!pfm_cfg.blacklist_pmus) + return 0; + +@@ -683,15 +686,20 @@ pfmlib_is_blacklisted_pmu(pfmlib_pmu_t *p) + * scan list for matching PMU names, we accept substrings. + * for instance: snbep does match snbep* + */ +- char *q, buffer[strlen(pfm_cfg.blacklist_pmus) + 1]; ++ buffer = strdup(pfm_cfg.blacklist_pmus); ++ if (!buffer) ++ return 0; + + strcpy (buffer, pfm_cfg.blacklist_pmus); + for (q = strtok (buffer, ","); q != NULL; q = strtok (NULL, ",")) { + if (strstr (p->name, q) != NULL) { +- return 1; ++ goto done; + } + } +- return 0; ++ ret = 0; ++done: ++ free(buffer); ++ return ret; + } + + static int +-- +2.9.3 + + +From a347a0a29389093e44c1049e351fb20e8702d040 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 22 Jul 2016 13:02:55 -0700 +Subject: [PATCH] remove duplicate offcore_response_*:l3_miss umask for SNB_EP + +The L3_MISS was duplicated. + +Bug introduced by: +a31c90ed0aec fix/add offcore_response:l3_miss alias for Intel SNB/IVB/HSW/BDW/SKL + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_snb_events.h | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/lib/events/intel_snb_events.h b/lib/events/intel_snb_events.h +index 0d448b7..475dd09 100644 +--- a/lib/events/intel_snb_events.h ++++ b/lib/events/intel_snb_events.h +@@ -1792,13 +1792,6 @@ static const intel_x86_umask_t snb_offcore_response[]={ + .umodel = PFM_PMU_INTEL_SNB_EP, + .grpid = 1, + }, +- { .uname = "L3_MISS", +- .udesc = "Supplier: counts L3 misses to local or remote DRAM", +- .ucode = 0x3ULL << (22+8), +- .uequiv = "LLC_MISS_LOCAL:LLC_MISS_REMOTE", +- .umodel = PFM_PMU_INTEL_SNB_EP, +- .grpid = 1, +- }, + { .uname = "LLC_HITMESF", + .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", + .ucode = 0xfULL << (18+8), +-- +2.9.3 + + +From 06b296c72838be44d8950dc03227fe0dc8ca1fb1 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Fri, 22 Jul 2016 14:35:21 -0700 +Subject: [PATCH] fix struct validation for pfm_event_attr_info_t + +There was a mismatch between the test and the actual struct. +The compiler adds a padding field of 4 bytes before idx for +both 64 and 32-bit modes. So take it into account explicitly +to avoid the test failure. + +Signed-off-by: Stephane Eranian +--- + include/perfmon/pfmlib.h | 5 +++-- + tests/validate.c | 3 ++- + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index ba3a54f..d3a3c41 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -421,6 +421,7 @@ typedef struct { + size_t size; /* struct sizeof */ + uint64_t code; /* attribute code */ + pfm_attr_t type; /* attribute type */ ++ int pad; /* padding */ + uint64_t idx; /* attribute opaque index */ + pfm_attr_ctrl_t ctrl; /* what is providing attr */ + struct { +@@ -450,13 +451,13 @@ typedef struct { + #if __WORDSIZE == 64 + #define PFM_PMU_INFO_ABI0 56 + #define PFM_EVENT_INFO_ABI0 64 +-#define PFM_ATTR_INFO_ABI0 64 ++#define PFM_ATTR_INFO_ABI0 72 + + #define PFM_RAW_ENCODE_ABI0 32 + #else + #define PFM_PMU_INFO_ABI0 44 + #define PFM_EVENT_INFO_ABI0 48 +-#define PFM_ATTR_INFO_ABI0 48 ++#define PFM_ATTR_INFO_ABI0 56 + + #define PFM_RAW_ENCODE_ABI0 20 + #endif +diff --git a/tests/validate.c b/tests/validate.c +index 522a6ab..0da0adc 100644 +--- a/tests/validate.c ++++ b/tests/validate.c +@@ -201,6 +201,7 @@ static const struct_desc_t pfmlib_structs[]={ + FIELD(code, pfm_event_attr_info_t), + FIELD(type, pfm_event_attr_info_t), + FIELD(idx, pfm_event_attr_info_t), ++ FIELD(pad, pfm_event_attr_info_t), /* padding */ + FIELD(ctrl, pfm_event_attr_info_t), + LAST_FIELD + }, +@@ -270,7 +271,7 @@ validate_structs(void) + } + + if (sz != d->sz) { +- printf("Failed (invisible padding of %zu bytes)\n", d->sz - sz); ++ printf("Failed (invisible padding of %zu bytes, total struct size %zu bytes)\n", d->sz - sz, d->sz); + errors++; + continue; + } +-- +2.9.3 + + +From bdf03951b7f493306c2c1adf434edbdb62c0f805 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 00:47:21 -0700 +Subject: [PATCH] Add SQ_MISC:SPLIT_LOCK to Intel Broadwell event table + +As SQ_MISC:SPLIT_LOCK to Intel Broadwell event table. +Based on V9 from download.01.org. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index 6be3ac9..f6ab78a 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -749,6 +749,14 @@ static const intel_x86_umask_t bdw_l1d[]={ + }, + }; + ++static const intel_x86_umask_t bdw_sq_misc[]={ ++ { .uname = "SPLIT_LOCK", ++ .udesc = "Number of split locks in the super queue (SQ)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_umask_t bdw_l1d_pend_miss[]={ + { .uname = "PENDING", + .udesc = "Cycles with L1D load misses outstanding", +@@ -2943,6 +2951,15 @@ static const intel_x86_entry_t intel_bdw_pe[]={ + .ngrp = 1, + .umasks = bdw_uops_dispatches_cancelled, + }, ++ { .name = "SQ_MISC", ++ .desc = "SuperQueue miscellaneous", ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0xf4, ++ .numasks = LIBPFM_ARRAY_SIZE(bdw_sq_misc), ++ .ngrp = 1, ++ .umasks = bdw_sq_misc, ++ }, + { .name = "OFFCORE_RESPONSE_0", + .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", + .modmsk = INTEL_V4_ATTRS, +-- +2.9.3 + + +From 98a2c6461dd01512f06c10966429f7d932642c19 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 01:01:56 -0700 +Subject: [PATCH] update Intel Skylake event table + +Based on V22 from download.01.org. + +Added: BR_MISP_RETIRED.NEAR_CALL +Added: SQ_MISC.SPLIT_LOCK +Added: ITLB_MISSES.WALK_COMPLETED_1G +Added: DTLB_LOAD_MISSES.WALK_COMPLETED_1G +Added: DTLB_STORE_MISSES.WALK_COMPLETED_1G +Added: MEM_LOAD_MISC_RETIRED:UC +Added: CPU_CLK_UNHALTED.RING0_TRANS + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_skl_events.h | 74 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 65 insertions(+), 9 deletions(-) + +diff --git a/lib/events/intel_skl_events.h b/lib/events/intel_skl_events.h +index 3a107f3..e7b522d 100644 +--- a/lib/events/intel_skl_events.h ++++ b/lib/events/intel_skl_events.h +@@ -94,10 +94,15 @@ static const intel_x86_umask_t skl_br_misp_retired[]={ + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, + }, + { .uname = "NEAR_TAKEN", +- .udesc = "number of near branch instructions retired that were mispredicted and taken", ++ .udesc = "Number of near branch instructions retired that were mispredicted and taken", + .ucode = 0x2000, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, ++ { .uname = "NEAR_CALL", ++ .udesc = "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, + }; + + static const intel_x86_umask_t skl_cpu_clk_thread_unhalted[]={ +@@ -129,6 +134,12 @@ static const intel_x86_umask_t skl_cpu_clk_thread_unhalted[]={ + .ucode = 0x200, + .uflags= INTEL_X86_NCOMBO, + }, ++ { .uname = "RING0_TRANS", ++ .udesc = "Counts when the current privilege level transitions from ring 1, 2 or 3 to ring 0 (kernel)", ++ .ucode = 0x000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ ++ .uequiv = "THREAD_P:e:c=1", ++ .uflags= INTEL_X86_NCOMBO, ++ }, + }; + + static const intel_x86_umask_t skl_cycle_activity[]={ +@@ -219,20 +230,25 @@ static const intel_x86_umask_t skl_dtlb_load_misses[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED", +- .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of any page size that completes", + .ucode = 0xe00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED_4K", +- .udesc = "Misses in all TLB levels causes a page walk of 4KB page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 4KB page size that completes", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED_2M_4M", +- .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 2MB/4MB page size that completes", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_1G", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 1GB page size that completes", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_ACTIVE", + .udesc = "Cycles with at least one hardware walker active for a load", + .ucode = 0x1000 | (0x1 << INTEL_X86_CMASK_BIT), +@@ -263,20 +279,25 @@ static const intel_x86_umask_t skl_itlb_misses[]={ + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED", +- .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of any page size that completes", + .ucode = 0xe00, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED_4K", +- .udesc = "Misses in all TLB levels causes a page walk of 4KB page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 4KB page size that completes", + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "WALK_COMPLETED_2M_4M", +- .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page size that completes", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 2MB/4MB page size that completes", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "WALK_COMPLETED_1G", ++ .udesc = "Number of misses in all TLB levels causing a page walk of 1GB page size that completes", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, + { .uname = "WALK_DURATION", + .udesc = "Cycles when PMH is busy with page walks", + .ucode = 0x1000, +@@ -539,6 +560,14 @@ static const intel_x86_umask_t skl_l1d[]={ + }, + }; + ++static const intel_x86_umask_t skl_sq_misc[]={ ++ { .uname = "SPLIT_LOCK", ++ .udesc = "Number of split locks in the super queue (SQ)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_umask_t skl_l1d_pend_miss[]={ + { .uname = "PENDING", + .udesc = "Cycles with L1D load misses outstanding", +@@ -602,8 +631,8 @@ static const intel_x86_umask_t skl_l2_lines_out[]={ + .ucode = 0x200, + .uflags = INTEL_X86_NCOMBO, + }, +- { .uname = "USELESS_PREF", +- .udesc = "TBD", ++ { .uname = "USELESS_HWPREF", ++ .udesc = "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + .ucode = 0x400, + .uflags = INTEL_X86_NCOMBO, + }, +@@ -1976,6 +2005,14 @@ static const intel_x86_umask_t skl_offcore_requests_buffer[]={ + }, + }; + ++static const intel_x86_umask_t skl_mem_load_misc_retired[]={ ++ { .uname = "UC", ++ .udesc = "Number of uncached load retired", ++ .ucode = 0x400, ++ .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_entry_t intel_skl_pe[]={ + { .name = "UNHALTED_CORE_CYCLES", + .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", +@@ -2602,6 +2639,25 @@ static const intel_x86_entry_t intel_skl_pe[]={ + .ngrp = 1, + .umasks = skl_hw_interrupts, + }, ++ { .name = "SQ_MISC", ++ .desc = "SuperQueue miscellaneous", ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0xf4, ++ .numasks = LIBPFM_ARRAY_SIZE(skl_sq_misc), ++ .ngrp = 1, ++ .umasks = skl_sq_misc, ++ }, ++ { .name = "MEM_LOAD_MISC_RETIRED", ++ .desc = "Load retired miscellaneous", ++ .modmsk = INTEL_V4_ATTRS, ++ .flags = INTEL_X86_PEBS, ++ .cntmsk = 0xf, ++ .code = 0xd4, ++ .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_misc_retired), ++ .ngrp = 1, ++ .umasks = skl_mem_load_misc_retired, ++ }, + { .name = "OFFCORE_REQUESTS_BUFFER", + .desc = "Offcore requests buffer", + .modmsk = INTEL_V4_ATTRS, +-- +2.9.3 + + +From 073e4dbbdde1adab02e01c659028bddaea969541 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 09:24:25 -0700 +Subject: [PATCH] add SQ_MISC:SPLIT_LOCK to Intel Haswell event table + +Added: SQ_MISC:SPLIT_LOCK + +Based on V23 of public event table for Haswell published on +download.01.org + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_hsw_events.h | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index 2a17e47..ab211cc 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -2224,6 +2224,14 @@ static const intel_x86_umask_t hsw_avx[]={ + }, + }; + ++static const intel_x86_umask_t hsw_sq_misc[]={ ++ { .uname = "SPLIT_LOCK", ++ .udesc = "Number of split locks in the super queue (SQ)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_entry_t intel_hsw_pe[]={ + { .name = "UNHALTED_CORE_CYCLES", + .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", +@@ -2856,6 +2864,15 @@ static const intel_x86_entry_t intel_hsw_pe[]={ + .ngrp = 1, + .umasks = hsw_avx, + }, ++ { .name = "SQ_MISC", ++ .desc = "SuperQueue miscellaneous", ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0xf4, ++ .numasks = LIBPFM_ARRAY_SIZE(hsw_sq_misc), ++ .ngrp = 1, ++ .umasks = hsw_sq_misc, ++ }, + { .name = "OFFCORE_REQUESTS_BUFFER", + .desc = "Offcore reqest buffer", + .modmsk = INTEL_V4_ATTRS, +-- +2.9.3 + + +From 25117cf79620936ed58c2c7cff72b77fd678a0a7 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 09:30:19 -0700 +Subject: [PATCH] add SQ_MISC:SPLIT_LOCK to Intel IvyBridge event table + +Added: SQ_MISC:SPLIT_LOCK + +Based on V18 of Intel Ivybridge event table published on +download.01.org. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_ivb_events.h | 59 ++++++++++++++++++++++++++++--------------- + 1 file changed, 38 insertions(+), 21 deletions(-) + +diff --git a/lib/events/intel_ivb_events.h b/lib/events/intel_ivb_events.h +index fa29dcb..dd4175a 100644 +--- a/lib/events/intel_ivb_events.h ++++ b/lib/events/intel_ivb_events.h +@@ -1970,6 +1970,14 @@ static const intel_x86_umask_t ivb_offcore_requests_buffer[]={ + }, + }; + ++static const intel_x86_umask_t ivb_sq_misc[]={ ++ { .uname = "SPLIT_LOCK", ++ .udesc = "Number of split locks in the super queue (SQ)", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ + static const intel_x86_entry_t intel_ivb_pe[]={ + { .name = "ARITH", + .desc = "Counts arithmetic multiply operations", +@@ -2651,24 +2659,33 @@ static const intel_x86_entry_t intel_ivb_pe[]={ + .ngrp = 1, + .umasks = ivb_offcore_requests_buffer, + }, +-{ .name = "OFFCORE_RESPONSE_0", +- .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", +- .modmsk = INTEL_V3_ATTRS, +- .cntmsk = 0xf, +- .code = 0x1b7, +- .flags= INTEL_X86_NHM_OFFCORE, +- .numasks = LIBPFM_ARRAY_SIZE(ivb_offcore_response), +- .ngrp = 3, +- .umasks = ivb_offcore_response, +-}, +-{ .name = "OFFCORE_RESPONSE_1", +- .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", +- .modmsk = INTEL_V3_ATTRS, +- .cntmsk = 0xf, +- .code = 0x1bb, +- .flags= INTEL_X86_NHM_OFFCORE, +- .numasks = LIBPFM_ARRAY_SIZE(ivb_offcore_response), +- .ngrp = 3, +- .umasks = ivb_offcore_response, /* identical to actual umasks list for this event */ +-}, +-}; ++ { .name = "SQ_MISC", ++ .desc = "SuperQueue miscellaneous", ++ .modmsk = INTEL_V4_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0xf4, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_sq_misc), ++ .ngrp = 1, ++ .umasks = ivb_sq_misc, ++ }, ++ { .name = "OFFCORE_RESPONSE_0", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x1b7, ++ .flags= INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_offcore_response), ++ .ngrp = 3, ++ .umasks = ivb_offcore_response, ++ }, ++ { .name = "OFFCORE_RESPONSE_1", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V3_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x1bb, ++ .flags= INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(ivb_offcore_response), ++ .ngrp = 3, ++ .umasks = ivb_offcore_response, /* identical to actual umasks list for this event */ ++ }, ++ }; +-- +2.9.3 + + +From 6e764d5d2f7a9fbbcdf1c987ab9895600826e467 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 09:54:21 -0700 +Subject: [PATCH] add BR_INST_RETIRED:ALL_TAKEN_BRANCHES to Intel Goldmont + event table + +Added: BR_INST_RETIRED:ALL_TAKEN_BRANCHES + +Based on Goldmont V8 event table published on download.01.org. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_glm_events.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/lib/events/intel_glm_events.h b/lib/events/intel_glm_events.h +index a7ed811..78dc5da 100644 +--- a/lib/events/intel_glm_events.h ++++ b/lib/events/intel_glm_events.h +@@ -727,6 +727,13 @@ static const intel_x86_umask_t glm_br_inst_retired[]={ + { .uname = "ALL_BRANCHES", + .udesc = "Retired branch instructions (Precise Event)", + .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, ++ { .uname = "ALL_TAKEN_BRANCHES", ++ .udesc = "Retired branch instructions (Precise Event)", ++ .ucode = 0x8000, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + .grpid = 0, + .ucntmsk = 0xfull, +-- +2.9.3 + + +From 7ac65a64d557a02244fef535b26ceb01b2258159 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 23 Aug 2016 10:05:37 -0700 +Subject: [PATCH] add BR_INST_RETIRED:ALL_TAKEN_BRANCHES to Intel Silvermont + event table + +Added: BR_INST_RETIRED:ALL_TAKEN_BRANCHES + +Based on Silvermont V13 event table published on download.01.org. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_slm_events.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/lib/events/intel_slm_events.h b/lib/events/intel_slm_events.h +index 3dbd90d..3d54f27 100644 +--- a/lib/events/intel_slm_events.h ++++ b/lib/events/intel_slm_events.h +@@ -127,6 +127,13 @@ static const intel_x86_umask_t slm_br_inst_retired[]={ + .ucode = 0x0, + .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, ++ { .uname = "ALL_TAKEN_BRANCHES", ++ .udesc = "Retired branch instructions (Precise Event)", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ .grpid = 0, ++ .ucntmsk = 0xfull, ++ }, + { .uname = "JCC", + .udesc = "JCC instructions retired (Precise Event)", + .ucode = 0x7e00, +-- +2.9.3 + + +From 408701ebe9cd1bb83b711ebdb5cb3d3dd58bec4b Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 30 Aug 2016 09:45:14 -0700 +Subject: [PATCH] fix encodings of L2_RQSTS:PF_MISS and PF_HIT for HSW/BDW + +This encodings of these two umakss were wrong for Haswell and Broadwell. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 16 ++++++++++++++-- + lib/events/intel_hsw_events.h | 18 ++++++++++++++++-- + 2 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index f6ab78a..fba5ad2 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -899,7 +899,13 @@ static const intel_x86_umask_t bdw_l2_rqsts[]={ + }, + { .uname = "L2_PF_MISS", + .udesc = "Requests from the L2 hardware prefetchers that miss L2 cache", +- .ucode = 0x3000, ++ .ucode = 0x3800, ++ .uequiv = "PF_MISS", ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PF_MISS", ++ .udesc = "Requests from the L2 hardware prefetchers that miss L2 cache", ++ .ucode = 0x3800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "MISS", +@@ -909,7 +915,13 @@ static const intel_x86_umask_t bdw_l2_rqsts[]={ + }, + { .uname = "L2_PF_HIT", + .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", +- .ucode = 0x5000, ++ .ucode = 0xd800, ++ .uequiv = "PF_HIT", ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PF_HIT", ++ .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", ++ .ucode = 0xd800, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ALL_DEMAND_DATA_RD", +diff --git a/lib/events/intel_hsw_events.h b/lib/events/intel_hsw_events.h +index ab211cc..64cb06a 100644 +--- a/lib/events/intel_hsw_events.h ++++ b/lib/events/intel_hsw_events.h +@@ -863,9 +863,16 @@ static const intel_x86_umask_t hsw_l2_rqsts[]={ + }, + { .uname = "L2_PF_MISS", + .udesc = "Requests from the L2 hardware prefetchers that miss L2 cache", +- .ucode = 0x3000, ++ .ucode = 0x3800, ++ .uequiv = "PF_MISS", ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PF_MISS", ++ .udesc = "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", ++ .ucode = 0x3800, + .uflags = INTEL_X86_NCOMBO, + }, ++ + { .uname = "MISS", + .udesc = "All requests that miss the L2 cache", + .ucode = 0x3f00, +@@ -873,9 +880,16 @@ static const intel_x86_umask_t hsw_l2_rqsts[]={ + }, + { .uname = "L2_PF_HIT", + .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", +- .ucode = 0x5000, ++ .ucode = 0xd800, ++ .uequiv = "PF_HIT", + .uflags = INTEL_X86_NCOMBO, + }, ++ { .uname = "PF_HIT", ++ .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", ++ .ucode = 0xd800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ + { .uname = "ALL_DEMAND_DATA_RD", + .udesc = "Any data read request to L2 cache", + .ucode = 0xe100, +-- +2.9.3 + + +From 359a11a6347b4a6495d4de18a4f916859d8d471a Mon Sep 17 00:00:00 2001 +From: Philip Mucci +Date: Thu, 1 Sep 2016 12:10:14 -0700 +Subject: [PATCH] allow . as a delimiter for event strings + +This patch allows either : or . as the event string delimiter: + +knl::offcore_response_0.any_request.L2_HIT_NEAR_TILE.L2_HIT_FAR_TILE.c=1.u + +is equivalent to + +knl::offcore_response_0:any_request:L2_HIT_NEAR_TILE:L2_HIT_FAR_TILE:c=1:u + +Delimiters can be mixed and matched. + +The change is motivated by the fact that it makes it easier to use vendor +provided symbolic event names directly as many of them use the . as the +event/umask delimiter, e.g., Intel event tables. + +Signed-off-by: Philip Mucci +Signed-off-by: Stephane Eranian +--- + docs/man3/libpfm.3 | 4 ++++ + lib/pfmlib_common.c | 22 +++++++++++++--------- + lib/pfmlib_priv.h | 2 +- + tests/validate_x86.c | 24 ++++++++++++++++++++++++ + 4 files changed, 42 insertions(+), 10 deletions(-) + +diff --git a/docs/man3/libpfm.3 b/docs/man3/libpfm.3 +index 08a0f49..3852a3c 100644 +--- a/docs/man3/libpfm.3 ++++ b/docs/man3/libpfm.3 +@@ -62,6 +62,10 @@ The string structure is defined as follows: + .ce + .B [pmu::][event_name][:unit_mask][:modifier|:modifier=val] + ++or ++.ce ++.B [pmu::][event_name][.unit_mask][.modifier|.modifier=val] ++ + The components are defined as follows: + .TP + .B pmu +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 6297fdd..b4547be 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -913,9 +913,10 @@ pfmlib_parse_event_attr(char *str, pfmlib_event_desc_t *d) + s = str; + + while(s) { +- p = strchr(s, PFMLIB_ATTR_DELIM); +- if (p) +- *p++ = '\0'; ++ p = s; ++ strsep(&p, PFMLIB_ATTR_DELIM); ++ /* if (p) ++ *p++ = '\0'; */ + + q = strchr(s, '='); + if (q) +@@ -1159,9 +1160,10 @@ pfmlib_parse_equiv_event(const char *event, pfmlib_event_desc_t *d) + if (!str) + return PFM_ERR_NOMEM; + +- p = strchr(s, PFMLIB_ATTR_DELIM); +- if (p) +- *p++ = '\0'; ++ p = s; ++ strsep(&p, PFMLIB_ATTR_DELIM); ++ /* if (p) ++ *p++ = '\0'; */ + + match = pmu->match_event ? pmu->match_event : match_event; + +@@ -1234,9 +1236,11 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) + pname = s; + s = p + strlen(PFMLIB_PMU_DELIM); + } +- p = strchr(s, PFMLIB_ATTR_DELIM); +- if (p) +- *p++ = '\0'; ++ p = s; ++ strsep(&p, PFMLIB_ATTR_DELIM); ++ /* if (p) ++ *p++ = '\0'; */ ++ + /* + * for each pmu + */ +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 0d106a4..5cde35c 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -29,7 +29,7 @@ + + #define PFM_PLM_ALL (PFM_PLM0|PFM_PLM1|PFM_PLM2|PFM_PLM3|PFM_PLMH) + +-#define PFMLIB_ATTR_DELIM ':' /* event attribute delimiter */ ++#define PFMLIB_ATTR_DELIM ":." /* event attribute delimiter possible */ + #define PFMLIB_PMU_DELIM "::" /* pmu to event delimiter */ + #define PFMLIB_EVENT_DELIM ',' /* event to event delimiter */ + +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 0247c3e..83b8c88 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4358,6 +4358,30 @@ static const test_event_t x86_test_events[]={ + .name = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:u:intxcp", + .ret = PFM_ERR_ATTR, + }, ++ /* ++ * test delimiter options ++ */ ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL.k=1.u=0.e=0.i=0.c=1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x15201ca, ++ .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=0:e=0:i=0:c=1", ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL:k=1:u=1:e=0:i=0:c=1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x15301ca, ++ .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=1:e=0:i=0:c=1", ++ }, ++ { SRC_LINE, ++ .name = "glm::ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL:k=1:u=1:e=0.i=0.c=1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x15301ca, ++ .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=1:e=0:i=0:c=1", ++ }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) + +-- +2.9.3 + + +From ef73810593a80b3202daee3e94d090b6ecefa068 Mon Sep 17 00:00:00 2001 +From: Asim YarKhan +Date: Thu, 28 Jul 2016 14:58:11 -0400 +Subject: [PATCH] Add support for Intel Knights Landing core PMU + +This patch adds support for Intel Knights Landing core PMU. + +This patch was contributed by Intel and altered to match updates to +libpfm4. Intel's contributed patch was split into two core and uncore +patches for libpfm4. This is the patch for the KNL core events only. + +Signed-off-by: Peinan Zhang +[yarkhan@icl.utk.edu: Split into core/uncore patches] +Signed-off-by: Asim YarKhan +--- + README | 1 + + docs/Makefile | 1 + + docs/man3/libpfm_intel_knl.3 | 100 ++++ + include/perfmon/pfmlib.h | 3 + + lib/Makefile | 2 + + lib/events/intel_knl_events.h | 1150 +++++++++++++++++++++++++++++++++++++++++ + lib/pfmlib_common.c | 1 + + lib/pfmlib_intel_knl.c | 75 +++ + lib/pfmlib_priv.h | 1 + + tests/validate_x86.c | 96 ++++ + 10 files changed, 1430 insertions(+) + create mode 100644 docs/man3/libpfm_intel_knl.3 + create mode 100644 lib/events/intel_knl_events.h + create mode 100644 lib/pfmlib_intel_knl.c + +diff --git a/README b/README +index ce60d3a..287616e 100644 +--- a/README ++++ b/README +@@ -55,6 +55,7 @@ The library supports many PMUs. The current version can handle: + Intel Goldmont + Intel RAPL (energy consumption) + Intel Knights Corner ++ Intel Knights Landing + Intel architectural perfmon v1, v2, v3 + + - For ARM: +diff --git a/docs/Makefile b/docs/Makefile +index 873f31f..f8f8838 100644 +--- a/docs/Makefile ++++ b/docs/Makefile +@@ -53,6 +53,7 @@ ARCH_MAN=libpfm_intel_core.3 \ + libpfm_intel_slm.3 \ + libpfm_intel_skl.3 \ + libpfm_intel_glm.3 \ ++ libpfm_intel_knl.3 \ + libpfm_intel_snbep_unc_cbo.3 \ + libpfm_intel_snbep_unc_ha.3 \ + libpfm_intel_snbep_unc_imc.3 \ +diff --git a/docs/man3/libpfm_intel_knl.3 b/docs/man3/libpfm_intel_knl.3 +new file mode 100644 +index 0000000..e521e01 +--- /dev/null ++++ b/docs/man3/libpfm_intel_knl.3 +@@ -0,0 +1,100 @@ ++.TH LIBPFM 3 "July, 2016" "" "Linux Programmer's Manual" ++.SH NAME ++libpfm_intel_knl - support for Intel Kinghts Landing core PMU ++.SH SYNOPSIS ++.nf ++.B #include ++.sp ++.B PMU name: knl ++.B PMU desc: Intel Kinghts Landing ++.sp ++.SH DESCRIPTION ++The library supports the Intel Kinghts Landing core PMU. It should be noted that ++this PMU model only covers each core's PMU and not the socket level PMU. ++ ++On Knights Landing, the number of generic counters is 4. There is 4-way HyperThreading support. ++The \fBpfm_get_pmu_info()\fR function returns the maximum number of generic counters ++in \fBnum_cntrs\fr. ++ ++.SH MODIFIERS ++The following modifiers are supported on Intel Kinghts Landing processors: ++.TP ++.B u ++Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. ++This is a boolean modifier. ++.TP ++.B k ++Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. ++This is a boolean modifier. ++.TP ++.B i ++Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR ++occurring. This is a boolean modifier ++.TP ++.B e ++Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event ++to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. ++This is a boolean modifier. ++.TP ++.B c ++Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles ++in which the number of occurrences of the event is greater or equal to the threshold. This is an integer ++modifier with values in the range [0:255]. ++.TP ++.B t ++Measure on any of the 4 hyper-threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. ++This modifier is only available on fixed counters (unhalted_reference_cycles, instructions_retired, unhalted_core_cycles). ++Depending on the underlying kernel interface, the event may be programmed on a fixed counter or a generic counter, except for ++unhalted_reference_cycles, in which case, this modifier may be ignored or rejected. ++ ++.SH OFFCORE_RESPONSE events ++Intel Knights Landing provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. ++ ++Those events need special treatment in the performance monitoring infrastructure ++because each event uses an extra register to store some settings. Thus, in ++case multiple offcore_response events are monitored simultaneously, the kernel needs ++to manage the sharing of that extra register. ++ ++The offcore_response events are exposed as normal events by the library. The extra ++settings are exposed as regular umasks. The library takes care of encoding the ++events according to the underlying kernel interface. ++ ++On Intel Knights Landing, the umasks are divided into 4 categories: request, supplier ++and snoop and average latency. Offcore_response event has two modes of operations: normal and average latency. ++In the first mode, the two offcore_respnse events operate independently of each other. The user must provide at ++least one umask for each of the first 3 categories: request, supplier, snoop. In the second mode, the two ++offcore_response events are combined to compute an average latency per request type. ++ ++For the normal mode, there is a special supplier (response) umask called \fBANY_RESPONSE\fR. When this umask ++is used then it overrides any supplier and snoop umasks. In other words, users can ++specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop ++is specified, the library defaults to using \fBANY_RESPONSE\fR. ++ ++For instance, the following are valid event selections: ++.TP ++.B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE ++.TP ++.B OFFCORE_RESPONSE_0:ANY_REQUEST ++.TP ++.B OFFCORE_RESPONSE_0:ANY_RFO:DDR_NEAR ++ ++.P ++But the following is illegal: ++ ++.TP ++.B OFFCORE_RESPONSE_0:ANY_RFO:DDR_NEAR:ANY_RESPONSE ++.P ++In average latency mode, \fBOFFCORE_RESPONSE_0\fR must be programmed to select the request types of interest, for instance, \fBDMND_DATA_RD\fR, and the \fBOUTSTANDING\fR umask must be set and no others. the library will enforce that restriction as soon as the \fBOUTSTANDING\fR umask is used. Then \fBOFFCORE_RESPONSE_1\fR must be set with the same request types and the \fBANY_RESPONSE\fR umask. It should be noted that the library encodes events independently of each other and therefore cannot verify that the requests are matching between the two events. ++Example of average latency settings: ++.TP ++.B OFFCORE_RESPONSE_0:DMND_DATA_RD:OUTSTANDING+OFFCORE_RESPONSE_1:DMND_DATA_RD:ANY_RESPONSE ++.TP ++.B OFFCORE_RESPONSE_0:ANY_REQUEST:OUTSTANDING+OFFCORE_RESPONSE_1:ANY_REQUEST:ANY_RESPONSE ++.P ++The average latency for the request(s) is obtained by dividing the counts of \fBOFFCORE_RESPONSE_0\fR by the count of \fBOFFCORE_RESPONSE_1\fR. The ratio is expressed in core cycles. ++ ++.SH AUTHORS ++.nf ++Stephane Eranian ++.if ++.PP +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index d3a3c41..b584672 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -297,7 +297,10 @@ typedef enum { + PFM_PMU_INTEL_SKL, /* Intel Skylake */ + + PFM_PMU_INTEL_BDW_EP, /* Intel Broadwell EP */ ++ + PFM_PMU_INTEL_GLM, /* Intel Goldmont */ ++ ++ PFM_PMU_INTEL_KNL, /* Intel Knights Landing */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/Makefile b/lib/Makefile +index bd74d50..3c5033f 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -93,6 +93,7 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ + pfmlib_intel_hswep_unc_sbo.c \ + pfmlib_intel_knc.c \ + pfmlib_intel_slm.c \ ++ pfmlib_intel_knl.c \ + pfmlib_intel_glm.c \ + pfmlib_intel_netburst.c \ + pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ +@@ -250,6 +251,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ + events/intel_snbep_unc_r2pcie_events.h \ + events/intel_snbep_unc_r3qpi_events.h \ + events/intel_knc_events.h \ ++ events/intel_knl_events.h \ + events/intel_ivbep_unc_cbo_events.h \ + events/intel_ivbep_unc_ha_events.h \ + events/intel_ivbep_unc_imc_events.h \ +diff --git a/lib/events/intel_knl_events.h b/lib/events/intel_knl_events.h +new file mode 100644 +index 0000000..d0255ba +--- /dev/null ++++ b/lib/events/intel_knl_events.h +@@ -0,0 +1,1150 @@ ++/* ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: knl (Intel Knights Landing) ++ */ ++ ++static const intel_x86_umask_t knl_icache[]={ ++ { .uname = "HIT", ++ .udesc = "Counts all instruction fetches that hit the instruction cache.", ++ .ucode = 0x100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "MISSES", ++ .udesc = "Counts all instruction fetches that miss the instruction cache or produce memory requests. An instruction fetch miss is counted only once and not once for every cycle it is outstanding.", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ACCESSES", ++ .udesc = "Counts all instruction fetches, including uncacheable fetches.", ++ .ucode = 0x300, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_uops_retired[]={ ++ { .uname = "ALL", ++ .udesc = "Counts the number of micro-ops retired.", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "MS", ++ .udesc = "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS).", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SCALAR_SIMD", ++ .udesc = "Counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts scalar SSE, AVX, AVX2, AVX-512 micro-ops except for loads (memory-to-register mov-type micro ops), division, sqrt.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PACKED_SIMD", ++ .udesc = "Counts the number of vector SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts packed SSE, AVX, AVX2, AVX-512 micro-ops (both floating point and integer) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies.", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_inst_retired[]={ ++ { .uname = "ANY_P", ++ .udesc = "Instructions retired using generic counter (precise event)", ++ .ucode = 0x0, ++ .uflags = INTEL_X86_PEBS | INTEL_X86_DFL, ++ }, ++ { .uname = "ANY", ++ .udesc = "Instructions retired using generic counter (precise event)", ++ .uequiv = "ANY_P", ++ .ucode = 0x0, ++ .uflags = INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_l2_requests_reject[]={ ++ { .uname = "ALL", ++ .udesc = "Counts the number of MEC requests from the L2Q that reference a cache line excluding SW prefetches filling only to L2 cache and L1 evictions (automatically exlcudes L2HWP, UC, WC) that were rejected - Multiple repeated rejects should be counted multiple times.", ++ .ucode = 0x000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_core_reject[]={ ++ { .uname = "ALL", ++ .udesc = "Counts the number of MEC requests that were not accepted into the L2Q because of any L2 queue reject condition. There is no concept of at-ret here. It might include requests due to instructions in the speculative path", ++ .ucode = 0x000, ++ .uflags = INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_machine_clears[]={ ++ { .uname = "SMC", ++ .udesc = "Counts the number of times that the machine clears due to program modifying data within 1K of a recently fetched code page.", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_DFL, ++ }, ++ { .uname = "MEMORY_ORDERING", ++ .udesc = "Counts the number of times the machine clears due to memory ordering hazards", ++ .ucode = 0x0200, ++ }, ++ { .uname = "FP_ASSIST", ++ .udesc = "Counts the number of floating operations retired that required microcode assists", ++ .ucode = 0x0400, ++ }, ++ { .uname = "ALL", ++ .udesc = "Counts all nukes", ++ .ucode = 0x0800, ++ }, ++ { .uname = "ANY", ++ .udesc = "Counts all nukes", ++ .uequiv = "ALL", ++ .ucode = 0x0800, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_br_inst_retired[]={ ++ { .uname = "ANY", ++ .udesc = "Counts the number of branch instructions retired (Precise Event)", ++ .ucode = 0x0, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, ++ }, ++ { .uname = "ALL_BRANCHES", ++ .udesc = "Counts the number of branch instructions retired", ++ .uequiv = "ANY", ++ .ucode = 0x0, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "JCC", ++ .udesc = "Counts the number of branch instructions retired that were conditional jumps.", ++ .ucode = 0x7e00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Counts the number of branch instructions retired that were conditional jumps and predicted taken.", ++ .ucode = 0xfe00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "CALL", ++ .udesc = "Counts the number of near CALL branch instructions retired.", ++ .ucode = 0xf900, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REL_CALL", ++ .udesc = "Counts the number of near relative CALL branch instructions retired.", ++ .ucode = 0xfd00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Counts the number of near indirect CALL branch instructions retired. (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Counts the number of near RET branch instructions retired. (Precise Event)", ++ .ucode = 0xf700, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Counts the number of branch instructions retired that were near indirect CALL or near indirect JMP. (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR_BRANCH", ++ .udesc = "Counts the number of far branch instructions retired. (Precise Event)", ++ .uequiv = "FAR", ++ .ucode = 0xbf00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR", ++ .udesc = "Counts the number of far branch instructions retired. (Precise Event)", ++ .ucode = 0xbf00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_fetch_stall[]={ ++ { .uname = "ICACHE_FILL_PENDING_CYCLES", ++ .udesc = "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_baclears[]={ ++ { .uname = "ALL", ++ .udesc = "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end.", ++ .ucode = 0x100, ++ .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY", ++ .udesc = "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end.", ++ .uequiv = "ALL", ++ .ucode = 0x100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Counts the number of times the front end resteers for RET branches as a result of another branch handling mechanism in the front end.", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "COND", ++ .udesc = "Counts the number of times the front end resteers for conditional branches as a result of another branch handling mechanism in the front end.", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_cpu_clk_unhalted[]={ ++ { .uname = "THREAD_P", ++ .udesc = "thread cycles when core is not halted", ++ .ucode = 0x0, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "BUS", ++ .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", ++ .uequiv = "REF_P", ++ .ucode = 0x100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "REF_P", ++ .udesc = "Number of reference cycles that the cpu is not in a halted state. The core enters the halted state when it is running the HLT instruction. In mobile systems, the core frequency may change from time to time. This event is not affected by core frequency changes but counts as if the core is running a the same maximum frequency all the time", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_mem_uops_retired[]={ ++ { .uname = "L1_MISS_LOADS", ++ .udesc = "Counts the number of load micro-ops retired that miss in L1 D cache.", ++ .ucode = 0x100, ++ }, ++ { .uname = "LD_DCU_MISS", ++ .udesc = "Counts the number of load micro-ops retired that miss in L1 D cache.", ++ .uequiv = "L1_MISS_LOADS", ++ .ucode = 0x100, ++ }, ++ { .uname = "L2_HIT_LOADS", ++ .udesc = "Counts the number of load micro-ops retired that hit in the L2.", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_PEBS, ++ }, ++ { .uname = "L2_MISS_LOADS", ++ .udesc = "Counts the number of load micro-ops retired that miss in the L2.", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_L2_MISS", ++ .udesc = "Counts the number of load micro-ops retired that miss in the L2.", ++ .uequiv = "L2_MISS_LOADS", ++ .ucode = 0x400, ++ .uflags = INTEL_X86_PEBS, ++ }, ++ { .uname = "DTLB_MISS_LOADS", ++ .udesc = "Counts the number of load micro-ops retired that cause a DTLB miss.", ++ .ucode = 0x800, ++ .uflags = INTEL_X86_PEBS, ++ }, ++ { .uname = "UTLB_MISS_LOADS", ++ .udesc = "Counts the number of load micro-ops retired that caused micro TLB miss.", ++ .ucode = 0x1000, ++ }, ++ { .uname = "LD_UTLB_MISS", ++ .udesc = "Counts the number of load micro-ops retired that caused micro TLB miss.", ++ .uequiv = "UTLB_MISS_LOADS", ++ .ucode = 0x1000, ++ }, ++ { .uname = "HITM", ++ .udesc = "Counts the loads retired that get the data from the other core in the same tile in M state.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_PEBS, ++ }, ++ { .uname = "ALL_LOADS", ++ .udesc = "Counts all the load micro-ops retired.", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_DFL, ++ }, ++ { .uname = "ANY_LD", ++ .udesc = "Counts all the load micro-ops retired.", ++ .uequiv = "ALL_LOADS", ++ .ucode = 0x4000, ++ }, ++ { .uname = "ALL_STORES", ++ .udesc = "Counts all the store micro-ops retired.", ++ .ucode = 0x8000, ++ }, ++ { .uname = "ANY_ST", ++ .udesc = "Counts all the store micro-ops retired.", ++ .uequiv = "ALL_STORES", ++ .ucode = 0x8000, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_page_walks[]={ ++ { .uname = "D_SIDE_CYCLES", ++ .udesc = "Counts the total D-side page walks that are completed or started. The page walks started in the speculative path will also be counted.", ++ .ucode = 0x100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "D_SIDE_WALKS", ++ .udesc = "Counts the total number of core cycles for all the D-side page walks. The cycles for page walks started in speculative path will also be included.", ++ .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "I_SIDE_CYCLES", ++ .udesc = "Counts the total I-side page walks that are completed.", ++ .ucode = 0x200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "I_SIDE_WALKS", ++ .udesc = "Counts the total number of core cycles for all the I-side page walks. The cycles for page walks started in speculative path will also be included.", ++ .ucode = 0x200 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "CYCLES", ++ .udesc = "Counts the total page walks completed (I-side and D-side)", ++ .ucode = 0x300, ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "WALKS", ++ .udesc = "Counts the total number of core cycles for all the page walks. The cycles for page walks started in speculative path will also be included.", ++ .ucode = 0x300 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), ++ .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_l2_rqsts[]={ ++ { .uname = "MISS", ++ .udesc = "Counts the number of L2 cache misses", ++ .ucode = 0x4100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "REFERENCE", ++ .udesc = "Counts the total number of L2 cache references.", ++ .ucode = 0x4f00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_recycleq[]={ ++ { .uname = "LD_BLOCK_ST_FORWARD", ++ .udesc = "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store (Precise Event).", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "LD_BLOCK_STD_NOTREADY", ++ .udesc = "Counts the number of occurences a retired load gets blocked because its address overlaps with a store whose data is not ready.", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ST_SPLITS", ++ .udesc = "Counts the number of occurences a retired store that is a cache line split. Each split should be counted only once.", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "LD_SPLITS", ++ .udesc = "Counts the number of occurences a retired load that is a cache line split. Each split should be counted only once (Precise Event).", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "LOCK", ++ .udesc = "Counts all the retired locked loads. It does not include stores because we would double count if we count stores.", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "STA_FULL", ++ .udesc = "Counts the store micro-ops retired that were pushed in the rehad queue because the store address buffer is full.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY_LD", ++ .udesc = "Counts any retired load that was pushed into the recycle queue for any reason.", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "ANY_ST", ++ .udesc = "Counts any retired store that was pushed into the recycle queue for any reason.", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_offcore_response_0[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Counts demand cacheable data and L1 prefetch data reads", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_RFO", ++ .udesc = "Counts Demand cacheable data writes", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_CODE_RD", ++ .udesc = "Counts demand code reads and prefetch code reads", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L2_RFO", ++ .udesc = "Counts L2 data RFO prefetches (includes PREFETCHW instruction)", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L2_CODE_RD", ++ .udesc = "Request: number of code reads generated by L2 prefetchers", ++ .ucode = 1ULL << (6 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_READS", ++ .udesc = "Counts Partial reads (UC or WC and is valid only for Outstanding response type).", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_WRITES", ++ .udesc = "Counts Partial writes (UC or WT or WP and should be programmed on PMC1)", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "UC_CODE_READS", ++ .udesc = "Counts UC code reads (valid only for Outstanding response type)", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Counts Bus locks and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "FULL_STREAMING_STORES", ++ .udesc = "Counts Full streaming stores (WC and should be programmed on PMC1)", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_SOFTWARE", ++ .udesc = "Counts Software prefetches", ++ .ucode = 1ULL << (12 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L1_DATA_RD", ++ .udesc = "Counts L1 data HW prefetches", ++ .ucode = 1ULL << (13 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_STREAMING_STORES", ++ .udesc = "Counts Partial streaming stores (WC and should be programmed on PMC1)", ++ .ucode = 1ULL << (14 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "STREAMING_STORES", ++ .udesc = "Counts all streaming stores (WC and should be programmed on PMC1)", ++ .ucode = (1ULL << 14 | 1ULL << 11) << 8, ++ .uequiv = "PARTIAL_STREAMING_STORES:FULL_STREAMING_STORES", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Counts any request", ++ .ucode = 1ULL << (15 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_DATA_RD", ++ .udesc = "Counts Demand cacheable data and L1 prefetch data read requests", ++ .ucode = (1ULL << 0 | 1ULL << 7 | 1ULL << 12 | 1ULL << 13) << 8, ++ .uequiv = "DMND_DATA_RD:PARTIAL_READS:PF_SOFTWARE:PF_L1_DATA_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Counts Demand cacheable data write requests", ++ .ucode = (1ULL << 1 | 1ULL << 5) << 8, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_CODE_RD", ++ .udesc = "Counts Demand code reads and prefetch code read requests", ++ .ucode = (1ULL << 2 | 1ULL << 6) << 8, ++ .uequiv = "DMND_CODE_RD:PF_L2_CODE_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_READ", ++ .udesc = "Counts any Read request", ++ .ucode = (1ULL << 0 | 1ULL << 1 | 1ULL << 2 | 1ULL << 5 | 1ULL << 6 | 1ULL << 7 | 1ULL << 9 | 1ULL << 12 | 1ULL << 13 ) << 8, ++ .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_PF_L2", ++ .udesc = "Counts any Prefetch requests", ++ .ucode = (1ULL << 5 | 1ULL << 6) << 8, ++ .uequiv = "PF_L2_RFO:PF_L2_CODE_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Accounts for any response", ++ .ucode = (1ULL << 16) << 8, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ }, ++ { .uname = "DDR_NEAR", ++ .udesc = "Accounts for data responses from DRAM Local.", ++ .ucode = (1ULL << 31 | 1ULL << 23 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "DDR_FAR", ++ .udesc = "Accounts for data responses from DRAM Far.", ++ .ucode = (1ULL << 31 | 1ULL << 24 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM_NEAR", ++ .udesc = "Accounts for data responses from MCDRAM Local.", ++ .ucode = (1ULL << 31 | 1ULL << 21 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM_FAR", ++ .udesc = "Accounts for data responses from MCDRAM Far or Other tile L2 hit far.", ++ .ucode = (1ULL << 32 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE_E_F", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", ++ .ucode = (1ULL << 35 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE_M", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", ++ .ucode = (1ULL << 36 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE_E_F", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.", ++ .ucode = (1ULL << 35 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE_M", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", ++ .ucode = (1ULL << 36 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "NON_DRAM", ++ .udesc = "accounts for responses from any NON_DRAM system address. This includes MMIO transactions", ++ .ucode = (1ULL << 37 | 1ULL << 17 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM", ++ .udesc = "accounts for responses from MCDRAM (local and far)", ++ .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 22 | 1ULL << 21 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "DDR", ++ .udesc = "accounts for responses from DDR (local and far)", ++ .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 24 | 1ULL << 23 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE", ++ .udesc = " accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state", ++ .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 20 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE", ++ .udesc = "accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.", ++ .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "OUTSTANDING", ++ .udesc = "outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", ++ .ucode = (1ULL << 38) << 8, ++ .uflags = INTEL_X86_GRP_DFL_NONE | INTEL_X86_EXCL_GRP_BUT_0, /* can only be combined with request type bits (grpid = 0) */ ++ .grpid = 2, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_offcore_response_1[]={ ++ { .uname = "DMND_DATA_RD", ++ .udesc = "Counts demand cacheable data and L1 prefetch data reads", ++ .ucode = 1ULL << (0 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_RFO", ++ .udesc = "Counts Demand cacheable data writes", ++ .ucode = 1ULL << (1 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "DMND_CODE_RD", ++ .udesc = "Counts demand code reads and prefetch code reads", ++ .ucode = 1ULL << (2 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L2_RFO", ++ .udesc = "Counts L2 data RFO prefetches (includes PREFETCHW instruction)", ++ .ucode = 1ULL << (5 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L2_CODE_RD", ++ .udesc = "Request: number of code reads generated by L2 prefetchers", ++ .ucode = 1ULL << (6 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_READS", ++ .udesc = "Counts Partial reads (UC or WC and is valid only for Outstanding response type).", ++ .ucode = 1ULL << (7 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_WRITES", ++ .udesc = "Counts Partial writes (UC or WT or WP and should be programmed on PMC1)", ++ .ucode = 1ULL << (8 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "UC_CODE_READS", ++ .udesc = "Counts UC code reads (valid only for Outstanding response type)", ++ .ucode = 1ULL << (9 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "BUS_LOCKS", ++ .udesc = "Counts Bus locks and split lock requests", ++ .ucode = 1ULL << (10 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "FULL_STREAMING_STORES", ++ .udesc = "Counts Full streaming stores (WC and should be programmed on PMC1)", ++ .ucode = 1ULL << (11 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_SOFTWARE", ++ .udesc = "Counts Software prefetches", ++ .ucode = 1ULL << (12 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PF_L1_DATA_RD", ++ .udesc = "Counts L1 data HW prefetches", ++ .ucode = 1ULL << (13 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "PARTIAL_STREAMING_STORES", ++ .udesc = "Counts Partial streaming stores (WC and should be programmed on PMC1)", ++ .ucode = 1ULL << (14 + 8), ++ .grpid = 0, ++ }, ++ { .uname = "STREAMING_STORES", ++ .udesc = "Counts all streaming stores (WC and should be programmed on PMC1)", ++ .ucode = (1ULL << 14 | 1ULL << 11) << 8, ++ .uequiv = "PARTIAL_STREAMING_STORES:FULL_STREAMING_STORES", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_REQUEST", ++ .udesc = "Counts any request", ++ .ucode = 1ULL << (15 + 8), ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_DATA_RD", ++ .udesc = "Counts Demand cacheable data and L1 prefetch data read requests", ++ .ucode = (1ULL << 0 | 1ULL << 7 | 1ULL << 12 | 1ULL << 13) << 8, ++ .uequiv = "DMND_DATA_RD:PARTIAL_READS:PF_SOFTWARE:PF_L1_DATA_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RFO", ++ .udesc = "Counts Demand cacheable data write requests", ++ .ucode = (1ULL << 1 | 1ULL << 5) << 8, ++ .grpid = 0, ++ }, ++ { .uname = "ANY_CODE_RD", ++ .udesc = "Counts Demand code reads and prefetch code read requests", ++ .ucode = (1ULL << 2 | 1ULL << 6) << 8, ++ .uequiv = "DMND_CODE_RD:PF_L2_CODE_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_READ", ++ .udesc = "Counts any Read request", ++ .ucode = (1ULL << 0 | 1ULL << 1 | 1ULL << 2 | 1ULL << 5 | 1ULL << 6 | 1ULL << 7 | 1ULL << 9 | 1ULL << 12 | 1ULL << 13 ) << 8, ++ .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_PF_L2", ++ .udesc = "Counts any Prefetch requests", ++ .ucode = (1ULL << 5 | 1ULL << 6) << 8, ++ .uequiv = "PF_L2_RFO:PF_L2_CODE_RD", ++ .grpid = 0, ++ }, ++ { .uname = "ANY_RESPONSE", ++ .udesc = "Accounts for any response", ++ .ucode = (1ULL << 16) << 8, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, ++ .grpid = 1, ++ }, ++ { .uname = "DDR_NEAR", ++ .udesc = "Accounts for data responses from DRAM Local.", ++ .ucode = (1ULL << 31 | 1ULL << 23 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "DDR_FAR", ++ .udesc = "Accounts for data responses from DRAM Far.", ++ .ucode = (1ULL << 31 | 1ULL << 24 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM_NEAR", ++ .udesc = "Accounts for data responses from MCDRAM Local.", ++ .ucode = (1ULL << 31 | 1ULL << 21 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM_FAR", ++ .udesc = "Accounts for data responses from MCDRAM Far or Other tile L2 hit far.", ++ .ucode = (1ULL << 32 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE_E_F", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", ++ .ucode = (1ULL << 35 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE_M", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", ++ .ucode = (1ULL << 36 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE_E_F", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.", ++ .ucode = (1ULL << 35 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE_M", ++ .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", ++ .ucode = (1ULL << 36 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "NON_DRAM", ++ .udesc = "accounts for responses from any NON_DRAM system address. This includes MMIO transactions", ++ .ucode = (1ULL << 37 | 1ULL << 17 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "MCDRAM", ++ .udesc = "accounts for responses from MCDRAM (local and far)", ++ .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 22 | 1ULL << 21 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "DDR", ++ .udesc = "accounts for responses from DDR (local and far)", ++ .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 24 | 1ULL << 23 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_NEAR_TILE", ++ .udesc = " accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state", ++ .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 20 | 1ULL << 19 ) << 8, ++ .grpid = 1, ++ }, ++ { .uname = "L2_HIT_FAR_TILE", ++ .udesc = "accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.", ++ .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 22 ) << 8, ++ .grpid = 1, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_br_misp_retired[]={ ++ { .uname = "ALL_BRANCHES", ++ .udesc = "All mispredicted branches (Precise Event)", ++ .uequiv = "ANY", ++ .ucode = 0x0000, /* architectural encoding */ ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "ANY", ++ .udesc = "All mispredicted branches (Precise Event)", ++ .ucode = 0x0000, /* architectural encoding */ ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, ++ }, ++ { .uname = "JCC", ++ .udesc = "Number of mispredicted conditional branch instructions retired (Precise Event)", ++ .ucode = 0x7e00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "NON_RETURN_IND", ++ .udesc = "Number of mispredicted non-return branch instructions retired (Precise Event)", ++ .ucode = 0xeb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "RETURN", ++ .udesc = "Number of mispredicted return branch instructions retired (Precise Event)", ++ .ucode = 0xf700, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "IND_CALL", ++ .udesc = "Number of mispredicted indirect call branch instructions retired (Precise Event)", ++ .ucode = 0xfb00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "TAKEN_JCC", ++ .udesc = "Number of mispredicted taken conditional branch instructions retired (Precise Event)", ++ .ucode = 0xfe00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "CALL", ++ .udesc = "Counts the number of mispredicted near CALL branch instructions retired.", ++ .ucode = 0xf900, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "REL_CALL", ++ .udesc = "Counts the number of mispredicted near relative CALL branch instructions retired.", ++ .ucode = 0xfd00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++ { .uname = "FAR_BRANCH", ++ .udesc = "Counts the number of mispredicted far branch instructions retired.", ++ .ucode = 0xbf00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_no_alloc_cycles[]={ ++ { .uname = "ROB_FULL", ++ .udesc = "Counts the number of core cycles when no micro-ops are allocated and the ROB is full", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "MISPREDICTS", ++ .udesc = "Counts the number of core cycles when no micro-ops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire.", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RAT_STALL", ++ .udesc = "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "NOT_DELIVERED", ++ .udesc = "Counts the number of core cycles when no micro-ops are allocated, the IQ is empty, and no other condition is blocking allocation.", ++ .ucode = 0x9000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ALL", ++ .udesc = "Counts the total number of core cycles when no micro-ops are allocated for any reason.", ++ .ucode = 0x7f00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "ANY", ++ .udesc = "Counts the total number of core cycles when no micro-ops are allocated for any reason.", ++ .uequiv = "ALL", ++ .ucode = 0x7f00, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_rs_full_stall[]={ ++ { .uname = "MEC", ++ .udesc = "Counts the number of core cycles when allocation pipeline is stalled and is waiting for a free MEC reservation station entry.", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY", ++ .udesc = "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full.", ++ .ucode = 0x1f00, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_cycles_div_busy[]={ ++ { .uname = "ALL", ++ .udesc = "Counts the number of core cycles when divider is busy. Does not imply a stall waiting for the divider.", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_ms_decoded[]={ ++ { .uname = "ENTRY", ++ .udesc = "Counts the number of times the MSROM starts a flow of uops.", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_decode_restriction[]={ ++ { .uname = "PREDECODE_WRONG", ++ .udesc = "Number of times the prediction (from the predecode cache) for instruction length is incorrect", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_knl_pe[]={ ++{ .name = "UNHALTED_CORE_CYCLES", ++ .desc = "Unhalted core cycles", ++ .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ ++ .cntmsk = 0x200000003ull, ++ .code = 0x3c, ++}, ++{ .name = "UNHALTED_REFERENCE_CYCLES", ++ .desc = "Unhalted reference cycle", ++ .modmsk = INTEL_FIXED3_ATTRS, ++ .cntmsk = 0x400000000ull, ++ .code = 0x0300, /* pseudo encoding */ ++ .flags = INTEL_X86_FIXED, ++}, ++{ .name = "INSTRUCTION_RETIRED", ++ .desc = "Instructions retired (any thread modifier supported in fixed counter)", ++ .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ ++ .cntmsk = 0x100000003ull, ++ .code = 0xc0, ++}, ++{ .name = "INSTRUCTIONS_RETIRED", ++ .desc = "This is an alias for INSTRUCTION_RETIRED (any thread modifier supported in fixed counter)", ++ .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ ++ .equiv = "INSTRUCTION_RETIRED", ++ .cntmsk = 0x10003, ++ .code = 0xc0, ++}, ++{ .name = "LLC_REFERENCES", ++ .desc = "Last level of cache references", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x4f2e, ++}, ++{ .name = "LAST_LEVEL_CACHE_REFERENCES", ++ .desc = "This is an alias for LLC_REFERENCES", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "LLC_REFERENCES", ++ .cntmsk = 0x3, ++ .code = 0x4f2e, ++}, ++{ .name = "LLC_MISSES", ++ .desc = "Last level of cache misses", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x412e, ++}, ++{ .name = "LAST_LEVEL_CACHE_MISSES", ++ .desc = "This is an alias for LLC_MISSES", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "LLC_MISSES", ++ .cntmsk = 0x3, ++ .code = 0x412e, ++}, ++{ .name = "BRANCH_INSTRUCTIONS_RETIRED", ++ .desc = "Branch instructions retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .equiv = "BR_INST_RETIRED:ANY", ++ .cntmsk = 0x3, ++ .code = 0xc4, ++}, ++{ .name = "MISPREDICTED_BRANCH_RETIRED", ++ .desc = "Mispredicted branch instruction retired", ++ .equiv = "BR_MISP_RETIRED:ANY", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc5, ++ .flags = INTEL_X86_PEBS, ++}, ++/* begin model specific events */ ++{ .name = "ICACHE", ++ .desc = "Instruction fetches", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x80, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_icache), ++ .ngrp = 1, ++ .umasks = knl_icache, ++}, ++{ .name = "UOPS_RETIRED", ++ .desc = "Micro-ops retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc2, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_uops_retired), ++ .ngrp = 1, ++ .umasks = knl_uops_retired, ++}, ++{ .name = "INST_RETIRED", ++ .desc = "Instructions retired", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc0, ++ .flags = INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_inst_retired), ++ .ngrp = 1, ++ .umasks = knl_inst_retired, ++}, ++{ .name = "CYCLES_DIV_BUSY", ++ .desc = "Counts the number of core cycles when divider is busy.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xcd, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_cycles_div_busy), ++ .ngrp = 1, ++ .umasks = knl_cycles_div_busy, ++}, ++{ .name = "RS_FULL_STALL", ++ .desc = "Counts the number of core cycles when allocation pipeline is stalled.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xcb, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_rs_full_stall), ++ .ngrp = 1, ++ .umasks = knl_rs_full_stall, ++}, ++{ .name = "L2_REQUESTS", ++ .desc = "L2 cache requests", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x2e, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_l2_rqsts), ++ .ngrp = 1, ++ .umasks = knl_l2_rqsts, ++}, ++{ .name = "MACHINE_CLEARS", ++ .desc = "Counts the number of times that the machine clears.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc3, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_machine_clears), ++ .ngrp = 1, ++ .umasks = knl_machine_clears, ++}, ++{ .name = "BR_INST_RETIRED", ++ .desc = "Retired branch instructions", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc4, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_br_inst_retired), ++ .flags = INTEL_X86_PEBS, ++ .ngrp = 1, ++ .umasks = knl_br_inst_retired, ++}, ++{ .name = "BR_MISP_RETIRED", ++ .desc = "Counts the number of mispredicted branch instructions retired.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xc5, ++ .flags = INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_br_misp_retired), ++ .ngrp = 1, ++ .umasks = knl_br_misp_retired, ++}, ++{ .name = "MS_DECODED", ++ .desc = "Number of times the MSROM starts a flow of uops.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xe7, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_ms_decoded), ++ .ngrp = 1, ++ .umasks = knl_ms_decoded, ++}, ++{ .name = "FETCH_STALL", ++ .desc = "Counts the number of core cycles the fetch stalls.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x86, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_fetch_stall), ++ .ngrp = 1, ++ .umasks = knl_fetch_stall, ++}, ++{ .name = "BACLEARS", ++ .desc = "Branch address calculator", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xe6, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_baclears), ++ .ngrp = 1, ++ .umasks = knl_baclears, ++}, ++{ .name = "NO_ALLOC_CYCLES", ++ .desc = "Front-end allocation", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0xca, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_no_alloc_cycles), ++ .ngrp = 1, ++ .umasks = knl_no_alloc_cycles, ++}, ++{ .name = "CPU_CLK_UNHALTED", ++ .desc = "Core cycles when core is not halted", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x3c, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_cpu_clk_unhalted), ++ .ngrp = 1, ++ .umasks = knl_cpu_clk_unhalted, ++}, ++{ .name = "MEM_UOPS_RETIRED", ++ .desc = "Counts the number of load micro-ops retired.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x4, ++ .flags = INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_mem_uops_retired), ++ .ngrp = 1, ++ .umasks = knl_mem_uops_retired, ++}, ++{ .name = "PAGE_WALKS", ++ .desc = "Number of page-walks executed", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x5, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_page_walks), ++ .ngrp = 1, ++ .umasks = knl_page_walks, ++}, ++{ .name = "L2_REQUESTS_REJECT", ++ .desc = "Counts the number of MEC requests from the L2Q that reference a cache line were rejected.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x30, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_l2_requests_reject), ++ .ngrp = 1, ++ .umasks = knl_l2_requests_reject, ++}, ++{ .name = "CORE_REJECT_L2Q", ++ .desc = "Number of requests not accepted into the L2Q because of any L2 queue reject condition.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x31, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_core_reject), ++ .ngrp = 1, ++ .umasks = knl_core_reject, ++}, ++{ .name = "RECYCLEQ", ++ .desc = "Counts the number of occurences a retired load gets blocked.", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0x3, ++ .code = 0x03, ++ .flags = INTEL_X86_PEBS, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_recycleq), ++ .ngrp = 1, ++ .umasks = knl_recycleq, ++}, ++{ .name = "OFFCORE_RESPONSE_0", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x01b7, ++ .flags = INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_offcore_response_0), ++ .ngrp = 3, ++ .umasks = knl_offcore_response_0, ++}, ++{ .name = "OFFCORE_RESPONSE_1", ++ .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", ++ .modmsk = INTEL_V2_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x02b7, ++ .flags = INTEL_X86_NHM_OFFCORE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_offcore_response_1), ++ .ngrp = 2, ++ .umasks = knl_offcore_response_1, ++}, ++}; +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index b4547be..f4a56df 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -202,6 +202,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &intel_hswep_unc_r3qpi1_support, + &intel_hswep_unc_r3qpi2_support, + &intel_hswep_unc_irp_support, ++ &intel_knl_support, + &intel_x86_arch_support, /* must always be last for x86 */ + #endif + +diff --git a/lib/pfmlib_intel_knl.c b/lib/pfmlib_intel_knl.c +new file mode 100644 +index 0000000..eb24b96 +--- /dev/null ++++ b/lib/pfmlib_intel_knl.c +@@ -0,0 +1,75 @@ ++/* ++ * pfmlib_intel_knl.c : Intel Knights Landing core PMU ++ * ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Based on Intel Software Optimization Guide 2015 ++ */ ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "events/intel_knl_events.h" ++ ++static const int knl_models[] = { ++ 87, /* knights landing */ ++ 0 ++}; ++ ++static int ++pfm_intel_knl_init(void *this) ++{ ++ pfm_intel_x86_cfg.arch_version = 2; ++ return PFM_SUCCESS; ++} ++ ++pfmlib_pmu_t intel_knl_support={ ++ .desc = "Intel Knights Landing", ++ .name = "knl", ++ .pmu = PFM_PMU_INTEL_KNL, ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_pe), ++ .type = PFM_PMU_TYPE_CORE, ++ .num_cntrs = 2, ++ .num_fixed_cntrs = 3, ++ .max_encoding = 2, ++ .pe = intel_knl_pe, ++ .atdesc = intel_x86_mods, ++ .flags = PFMLIB_PMU_FL_RAW_UMASK ++ | INTEL_X86_PMU_FL_ECMASK, ++ .supported_plm = INTEL_X86_PLM, ++ ++ .cpu_family = 6, ++ .cpu_models = knl_models, ++ .pmu_detect = pfm_intel_x86_model_detect, ++ .pmu_init = pfm_intel_knl_init, ++ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, ++ PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), ++ ++ .get_event_first = pfm_intel_x86_get_event_first, ++ .get_event_next = pfm_intel_x86_get_event_next, ++ .event_is_valid = pfm_intel_x86_event_is_valid, ++ .validate_table = pfm_intel_x86_validate_table, ++ .get_event_info = pfm_intel_x86_get_event_info, ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, ++}; +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 5cde35c..c49975f 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -353,6 +353,7 @@ extern pfmlib_pmu_t intel_hswep_unc_r3qpi2_support; + extern pfmlib_pmu_t intel_hswep_unc_irp_support; + extern pfmlib_pmu_t intel_knc_support; + extern pfmlib_pmu_t intel_slm_support; ++extern pfmlib_pmu_t intel_knl_support; + extern pfmlib_pmu_t intel_glm_support; + extern pfmlib_pmu_t power4_support; + extern pfmlib_pmu_t ppc970_support; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index 83b8c88..cede40b 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4382,7 +4382,103 @@ static const test_event_t x86_test_events[]={ + .codes[0] = 0x15301ca, + .fstr = "glm::ISSUE_SLOTS_NOT_CONSUMED:RESOURCE_FULL:k=1:u=1:e=0:i=0:c=1", + }, ++ { SRC_LINE, ++ .name = "knl::no_alloc_cycles:all", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x537fca, ++ .fstr = "knl::NO_ALLOC_CYCLES:ALL:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::MEM_UOPS_RETIRED:DTLB_MISS_LOADS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x530804, ++ .fstr = "knl::MEM_UOPS_RETIRED:DTLB_MISS_LOADS:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::uops_retired:any:t", ++ .ret = PFM_ERR_ATTR, ++ }, ++ { SRC_LINE, ++ .name = "knl::unhalted_reference_cycles:u:t", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x710300, ++ .fstr = "knl::UNHALTED_REFERENCE_CYCLES:k=0:u=1:t=1", ++ }, ++ { SRC_LINE, ++ .name = "knl::instructions_retired:k:t", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] =0x7200c0, ++ .fstr = "knl::INSTRUCTION_RETIRED:k=1:u=0:e=0:i=0:c=0:t=1", ++ }, ++ { SRC_LINE, ++ .name = "knl::unhalted_core_cycles:k:t", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x72003c, ++ .fstr = "knl::UNHALTED_CORE_CYCLES:k=1:u=0:e=0:i=0:c=0:t=1", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_1:any_request", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5302b7, ++ .codes[1] = 0x18000, ++ .fstr = "knl::OFFCORE_RESPONSE_1:ANY_REQUEST:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_0:any_read", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x132e7, ++ .fstr = "knl::OFFCORE_RESPONSE_0:DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_1:any_read", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5302b7, ++ .codes[1] = 0x132e7, ++ .fstr = "knl::OFFCORE_RESPONSE_1:DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD:ANY_RESPONSE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_0:any_request:ddr_near", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x80808000ull, ++ .fstr = "knl::OFFCORE_RESPONSE_0:ANY_REQUEST:DDR_NEAR:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_0:any_request:L2_HIT_NEAR_TILE:L2_HIT_FAR_TILE", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x1800588000ull, ++ .fstr = "knl::OFFCORE_RESPONSE_0:ANY_REQUEST:L2_HIT_NEAR_TILE:L2_HIT_FAR_TILE:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_0:dmnd_data_rd:outstanding", ++ .ret = PFM_SUCCESS, ++ .count = 2, ++ .codes[0] = 0x5301b7, ++ .codes[1] = 0x4000000001ull, ++ .fstr = "knl::OFFCORE_RESPONSE_0:DMND_DATA_RD:OUTSTANDING:k=1:u=1:e=0:i=0:c=0", ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_0:dmnd_data_rd:ddr_near:outstanding", ++ .ret = PFM_ERR_FEATCOMB, ++ }, ++ { SRC_LINE, ++ .name = "knl::offcore_response_1:dmnd_data_rd:outstanding", ++ .ret = PFM_ERR_ATTR, ++ }, + }; ++ + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) + + static int +-- +2.9.3 + + +From d422ba2ed289ba5293c35e11405d0d0ca495d3e9 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 16 Aug 2016 10:08:59 -0700 +Subject: [PATCH] fix Intel Goldmont offcore_response average latency support + +The OUTSTANDING umask is in its own umask group however, it should +not be the default. Instead, the whole group is optional so mark +it as such. This avoids issues encoding events such as: +OFFCORE_RESPONSE_0:dmnd_data_rd:l2_hit + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_glm_events.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/events/intel_glm_events.h b/lib/events/intel_glm_events.h +index 78dc5da..4a11b9f 100644 +--- a/lib/events/intel_glm_events.h ++++ b/lib/events/intel_glm_events.h +@@ -519,7 +519,7 @@ static const intel_x86_umask_t glm_offcore_response_0[]={ + { .uname = "OUTSTANDING", + .udesc = "Outstanding request: counts weighted cycles of outstanding offcore requests of the request type specified in the bits 15:0 of offcore_response from the time the XQ receives the request and any response received. Bits 37:16 must be set to 0. This is only available for offcore_response_0", + .ucode = 1ULL << (38 + 8), +- .uflags = INTEL_X86_DFL | INTEL_X86_EXCL_GRP_BUT_0, /* can only be combined with request type bits (grpid = 0) */ ++ .uflags = INTEL_X86_GRP_DFL_NONE | INTEL_X86_EXCL_GRP_BUT_0, /* can only be combined with request type bits (grpid = 0) */ + .grpid = 3, + .ucntmsk = 0xffull, + }, +-- +2.9.3 + + +From a2348eea45d02dd0e2a22406adb03f858b31a764 Mon Sep 17 00:00:00 2001 +From: Peinan Zhang +Date: Mon, 17 Oct 2016 05:28:44 -0700 +Subject: [PATCH] Add Intel Knights Landing untile PMU support + +This patch adds support for Intel Knights Landing untile (uncore) PMUs. + +The patch covers the following PMUs: + - CHA + - EDC + - IMC + - M2PCIE + +Based on the documentation: +Intel Xeon Phi Processor Performance Monitoring Reference Manual Vol2 rev1.0 June2016 +And event table from download.01.org/perfmon/KNL V9. + +Signed-off-by: Peinan Zhang +[yarkhan@icl.utk.edu: Split into core/uncore patches] +Signed-off-by: Asim YarKhan +Reviewed-by: Stephane Eranian +--- + README | 2 +- + include/perfmon/pfmlib.h | 66 ++ + lib/Makefile | 8 + + lib/events/intel_knl_unc_cha_events.h | 1276 ++++++++++++++++++++++++++++++ + lib/events/intel_knl_unc_edc_events.h | 88 +++ + lib/events/intel_knl_unc_imc_events.h | 68 ++ + lib/events/intel_knl_unc_m2pcie_events.h | 145 ++++ + lib/pfmlib_common.c | 63 ++ + lib/pfmlib_intel_knl_unc_cha.c | 103 +++ + lib/pfmlib_intel_knl_unc_edc.c | 111 +++ + lib/pfmlib_intel_knl_unc_imc.c | 101 +++ + lib/pfmlib_intel_knl_unc_m2pcie.c | 80 ++ + lib/pfmlib_intel_snbep_unc.c | 22 + + lib/pfmlib_intel_snbep_unc_priv.h | 3 + + lib/pfmlib_priv.h | 63 ++ + tests/validate_x86.c | 266 +++++++ + 16 files changed, 2464 insertions(+), 1 deletion(-) + create mode 100644 lib/events/intel_knl_unc_cha_events.h + create mode 100644 lib/events/intel_knl_unc_edc_events.h + create mode 100644 lib/events/intel_knl_unc_imc_events.h + create mode 100644 lib/events/intel_knl_unc_m2pcie_events.h + create mode 100644 lib/pfmlib_intel_knl_unc_cha.c + create mode 100644 lib/pfmlib_intel_knl_unc_edc.c + create mode 100644 lib/pfmlib_intel_knl_unc_imc.c + create mode 100644 lib/pfmlib_intel_knl_unc_m2pcie.c + +diff --git a/README b/README +index 287616e..6a49591 100644 +--- a/README ++++ b/README +@@ -55,7 +55,7 @@ The library supports many PMUs. The current version can handle: + Intel Goldmont + Intel RAPL (energy consumption) + Intel Knights Corner +- Intel Knights Landing ++ Intel Knights Landing (core, uncore) + Intel architectural perfmon v1, v2, v3 + + - For ARM: +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index b584672..0e370ba 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -301,6 +301,72 @@ typedef enum { + PFM_PMU_INTEL_GLM, /* Intel Goldmont */ + + PFM_PMU_INTEL_KNL, /* Intel Knights Landing */ ++ PFM_PMU_INTEL_KNL_UNC_IMC0, /* Intel KnightLanding IMC channel 0 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC1, /* Intel KnightLanding IMC channel 1 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC2, /* Intel KnightLanding IMC channel 2 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC3, /* Intel KnightLanding IMC channel 3 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC4, /* Intel KnightLanding IMC channel 4 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC5, /* Intel KnightLanding IMC channel 5 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC_UCLK0,/* Intel KnightLanding IMC UCLK unit 0 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_IMC_UCLK1,/* Intel KnightLanding IMC UCLK unit 1 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK0,/* Intel KnightLanding EDC ECLK unit 0 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK1,/* Intel KnightLanding EDC ECLK unit 1 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK2,/* Intel KnightLanding EDC ECLK unit 2 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK3,/* Intel KnightLanding EDC ECLK unit 3 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK4,/* Intel KnightLanding EDC ECLK unit 4 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK5,/* Intel KnightLanding EDC ECLK unit 5 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK6,/* Intel KnightLanding EDC ECLK unit 6 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK7,/* Intel KnightLanding EDC ECLK unit 7 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK0,/* Intel KnightLanding EDC UCLK unit 0 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK1,/* Intel KnightLanding EDC UCLK unit 1 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK2,/* Intel KnightLanding EDC UCLK unit 2 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK3,/* Intel KnightLanding EDC UCLK unit 3 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK4,/* Intel KnightLanding EDC UCLK unit 4 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK5,/* Intel KnightLanding EDC UCLK unit 5 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK6,/* Intel KnightLanding EDC UCLK unit 6 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK7,/* Intel KnightLanding EDC UCLK unit 7 uncore */ ++ ++ PFM_PMU_INTEL_KNL_UNC_CHA0, /* Intel KnightLanding CHA unit 0 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA1, /* Intel KnightLanding CHA unit 1 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA2, /* Intel KnightLanding CHA unit 2 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA3, /* Intel KnightLanding CHA unit 3 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA4, /* Intel KnightLanding CHA unit 4 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA5, /* Intel KnightLanding CHA unit 5 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA6, /* Intel KnightLanding CHA unit 6 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA7, /* Intel KnightLanding CHA unit 7 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA8, /* Intel KnightLanding CHA unit 8 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA9, /* Intel KnightLanding CHA unit 9 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA10, /* Intel KnightLanding CHA unit 10 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA11, /* Intel KnightLanding CHA unit 11 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA12, /* Intel KnightLanding CHA unit 12 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA13, /* Intel KnightLanding CHA unit 13 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA14, /* Intel KnightLanding CHA unit 14 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA15, /* Intel KnightLanding CHA unit 15 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA16, /* Intel KnightLanding CHA unit 16 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA17, /* Intel KnightLanding CHA unit 17 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA18, /* Intel KnightLanding CHA unit 18 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA19, /* Intel KnightLanding CHA unit 19 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA20, /* Intel KnightLanding CHA unit 20 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA21, /* Intel KnightLanding CHA unit 21 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA22, /* Intel KnightLanding CHA unit 22 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA23, /* Intel KnightLanding CHA unit 23 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA24, /* Intel KnightLanding CHA unit 24 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA25, /* Intel KnightLanding CHA unit 25 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA26, /* Intel KnightLanding CHA unit 26 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA27, /* Intel KnightLanding CHA unit 27 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA28, /* Intel KnightLanding CHA unit 28 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA29, /* Intel KnightLanding CHA unit 29 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA30, /* Intel KnightLanding CHA unit 30 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA31, /* Intel KnightLanding CHA unit 31 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA32, /* Intel KnightLanding CHA unit 32 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA33, /* Intel KnightLanding CHA unit 33 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA34, /* Intel KnightLanding CHA unit 34 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA35, /* Intel KnightLanding CHA unit 35 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA36, /* Intel KnightLanding CHA unit 36 uncore */ ++ PFM_PMU_INTEL_KNL_UNC_CHA37, /* Intel KnightLanding CHA unit 37 uncore */ ++ ++ PFM_PMU_INTEL_KNL_UNC_UBOX, /* Intel KnightLanding Ubox uncore */ ++ PFM_PMU_INTEL_KNL_UNC_M2PCIE, /* Intel KnightLanding M2PCIe uncore */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/Makefile b/lib/Makefile +index 3c5033f..20fc385 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -94,6 +94,10 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ + pfmlib_intel_knc.c \ + pfmlib_intel_slm.c \ + pfmlib_intel_knl.c \ ++ pfmlib_intel_knl_unc_imc.c \ ++ pfmlib_intel_knl_unc_edc.c \ ++ pfmlib_intel_knl_unc_cha.c \ ++ pfmlib_intel_knl_unc_m2pcie.c \ + pfmlib_intel_glm.c \ + pfmlib_intel_netburst.c \ + pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ +@@ -271,6 +275,10 @@ INC_X86= pfmlib_intel_x86_priv.h \ + events/intel_hswep_unc_r2pcie_events.h \ + events/intel_hswep_unc_r3qpi_events.h \ + events/intel_hswep_unc_irp_events.h \ ++ events/intel_knl_unc_imc_events.h \ ++ events/intel_knl_unc_edc_events.h \ ++ events/intel_knl_unc_cha_events.h \ ++ events/intel_knl_unc_m2pcie_events.h \ + events/intel_slm_events.h + + INC_MIPS=events/mips_74k_events.h events/mips_74k_events.h +diff --git a/lib/events/intel_knl_unc_cha_events.h b/lib/events/intel_knl_unc_cha_events.h +new file mode 100644 +index 0000000..11ace65 +--- /dev/null ++++ b/lib/events/intel_knl_unc_cha_events.h +@@ -0,0 +1,1276 @@ ++/* ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: knl_unc_cha (Intel Knights Landing CHA uncore PMU) ++ */ ++ ++static const intel_x86_umask_t knl_unc_cha_llc_lookup[]={ ++ { .uname = "DATA_READ", ++ .udesc = "Data read requests", ++ .ucode = 0x0300, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WRITE", ++ .udesc = "Write requests. Includes all write transactions (cached, uncached)", ++ .ucode = 0x0500, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "REMOTE_SNOOP", ++ .udesc = "External snoop request", ++ .ucode = 0x0900, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY", ++ .udesc = "Any request", ++ .ucode = 0x1100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_llc_victims[]={ ++ { .uname = "M_STATE", ++ .udesc = "Lines in M state", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "E_STATE", ++ .udesc = "Lines in E state", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "S_STATE", ++ .udesc = "Lines in S state", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "F_STATE", ++ .udesc = "Lines in F state", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "LOCAL", ++ .udesc = "Victimized Lines matching the NID filter.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "REMOTE", ++ .udesc = "Victimized Lines does not matching the NID.", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++ ++static const intel_x86_umask_t knl_unc_cha_ingress_int_starved[]={ ++ { .uname = "IRQ", ++ .udesc = "Internal starved with IRQ.", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IPQ", ++ .udesc = "Internal starved with IPQ.", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ISMQ", ++ .udesc = "Internal starved with ISMQ.", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ", ++ .udesc = "Internal starved with PRQ.", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ingress_ext[]={ ++ { .uname = "IRQ", ++ .udesc = "IRQ", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IRQ_REJ", ++ .udesc = "IRQ rejected", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IPQ", ++ .udesc = "IPQ", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ", ++ .udesc = "PRQ", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ_REJ", ++ .udesc = "PRQ rejected", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++ ++static const intel_x86_umask_t knl_unc_cha_ingress_entry_reject_q0[]={ ++ { .uname = "AD_REQ_VN0", ++ .udesc = "AD Request", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AD_RSP_VN0", ++ .udesc = "AD Response", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_RSP_VN0", ++ .udesc = "BL Response", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_WB_VN0", ++ .udesc = "BL WB", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_NCB_VN0", ++ .udesc = "BL NCB", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_NCS_VN0", ++ .udesc = "BL NCS", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK_NON_UPI", ++ .udesc = "AK non upi", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV_NON_UPI", ++ .udesc = "IV non upi", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ingress_entry_reject_q1[]={ ++ { .uname = "ANY_REJECT", ++ .udesc = "Any reject from request queue0", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++ { .uname = "SF_VICTIM", ++ .udesc = "SF victim", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "SF_WAY", ++ .udesc = "SF way", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ALLOW_SNP", ++ .udesc = "allow snoop", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PA_MATCH", ++ .udesc = "PA match", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++ ++static const intel_x86_umask_t knl_unc_cha_tor_subevent[]={ ++ { .uname = "IRQ", ++ .udesc = " -IRQ.", ++ .ucode = 0x3100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "EVICT", ++ .udesc = " -SF/LLC Evictions.", ++ .ucode = 0x3200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ", ++ .udesc = " -PRQ.", ++ .ucode = 0x3400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IPQ", ++ .udesc = " -IPQ.", ++ .ucode = 0x3800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "HIT", ++ .udesc = " -Hit (Not a Miss).", ++ .ucode = 0x1f00, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "MISS", ++ .udesc = " -Miss.", ++ .ucode = 0x2f00, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IRQ_HIT", ++ .udesc = " -IRQ HIT.", ++ .ucode = 0x1100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IRQ_MISS", ++ .udesc = " -IRQ MISS.", ++ .ucode = 0x2100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ_HIT", ++ .udesc = " -PRQ HIT.", ++ .ucode = 0x1400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "PRQ_MISS", ++ .udesc = " -PRQ MISS.", ++ .ucode = 0x2400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IPQ_HIT", ++ .udesc = " -IPQ HIT", ++ .ucode = 0x1800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IPQ_MISS", ++ .udesc = " -IPQ MISS", ++ .ucode = 0x2800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_misc[]={ ++ { .uname = "RSPI_WAS_FSE", ++ .udesc = "Silent Snoop Eviction", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "WC_ALIASING", ++ .udesc = "Write Combining Aliasing.", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RFO_HIT_S", ++ .udesc = "Counts the number of times that an RFO hits in S state.", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "CV0_PREF_VIC", ++ .udesc = "CV0 Prefetch Victim.", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "CV0_PREF_MISS", ++ .udesc = "CV0 Prefetch Miss.", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_tgr_ext[]={ ++ { .uname = "TGR0", ++ .udesc = "for Transgress 0", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR1", ++ .udesc = "for Transgress 1", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR2", ++ .udesc = "for Transgress 2", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR3", ++ .udesc = "for Transgress 3", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR4", ++ .udesc = "for Transgress 4", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR5", ++ .udesc = "for Transgress 5", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR6", ++ .udesc = "for Transgress 6", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "TGR7", ++ .udesc = "for Transgress 7", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_tgr_ext1[]={ ++ { .uname = "TGR8", ++ .udesc = "for Transgress 8", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "ANY_OF_TGR0_THRU_TGR7", ++ .udesc = "for Transgress 0-7", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_type_agent[]={ ++ { .uname = "AD_AG0", ++ .udesc = "AD - Agent 0", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK_AG0", ++ .udesc = "AK - Agent 0", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_AG0", ++ .udesc = "BL - Agent 0", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV_AG0", ++ .udesc = "IV - Agent 0", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AD_AG1", ++ .udesc = "AD - Agent 1", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK_AG1", ++ .udesc = "AK - Agent 1", ++ .ucode = 0x2000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_AG1", ++ .udesc = "BL - Agent 1", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_type[]={ ++ { .uname = "AD", ++ .udesc = " - AD ring", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK", ++ .udesc = " - AK ring", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL", ++ .udesc = " - BL ring", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV", ++ .udesc = " - IV ring", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_dire_ext[]={ ++ { .uname = "VERT", ++ .udesc = " - vertical", ++ .ucode = 0x0000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "HORZ", ++ .udesc = " - horizontal", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_use_vert[]={ ++ { .uname = "UP_EVEN", ++ .udesc = "UP_EVEN", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "UP_ODD", ++ .udesc = "UP_ODD", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "DN_EVEN", ++ .udesc = "DN_EVEN", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "DN_ODD", ++ .udesc = "DN_ODD", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_use_hori[]={ ++ { .uname = "LEFT_EVEN", ++ .udesc = "LEFT_EVEN", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "LEFT_ODD", ++ .udesc = "LEFT_ODD", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RIGHT_EVEN", ++ .udesc = "RIGHT_EVEN", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RIGHT_ODD", ++ .udesc = "RIGHT_ODD", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_use_updn[]={ ++ { .uname = "UP", ++ .udesc = "up", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "DN", ++ .udesc = "down", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_ring_use_lfrt[]={ ++ { .uname = "LEFT", ++ .udesc = "left", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "RIGHT", ++ .udesc = "right", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_iv_snp[]={ ++ { .uname = "IV_SNP_GO_UP", ++ .udesc = "IV_SNP_GO_UP", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV_SNP_GO_DN", ++ .udesc = "IV_SNP_GO_DN", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_cms_ext[]={ ++ { .uname = "AD_BNC", ++ .udesc = "AD_BNC", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK_BNC", ++ .udesc = "AK_BNC", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_BNC", ++ .udesc = "BL_BNC", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV_BNC", ++ .udesc = "IV_BNC", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AD_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_cms_crd_starved[]={ ++ { .uname = "AD_BNC", ++ .udesc = "AD_BNC", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AK_BNC", ++ .udesc = "AK_BNC", ++ .ucode = 0x0200, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_BNC", ++ .udesc = "BL_BNC", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IV_BNC", ++ .udesc = "IV_BNC", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AD_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "IVF", ++ .udesc = "IVF", ++ .ucode = 0x8000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_cha_cms_busy_starved[]={ ++ { .uname = "AD_BNC", ++ .udesc = "AD_BNC", ++ .ucode = 0x0100, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_BNC", ++ .udesc = "BL_BNC", ++ .ucode = 0x0400, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "AD_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x1000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++ { .uname = "BL_CRD", ++ .udesc = "AD_CRD", ++ .ucode = 0x4000, ++ .uflags = INTEL_X86_NCOMBO, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_knl_unc_cha_pe[]={ ++ { .name = "UNC_H_U_CLOCKTICKS", ++ .desc = "Uncore clockticks", ++ .modmsk = 0x0, ++ .cntmsk = 0xf, ++ .code = 0x00, ++ .flags = INTEL_X86_FIXED, ++ }, ++ { .name = "UNC_H_INGRESS_OCCUPANCY", ++ .desc = "Ingress Occupancy. Ingress Occupancy. Counts number of entries in the specified Ingress queue in each cycle", ++ .cntmsk = 0xf, ++ .code = 0x11, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_ext), ++ .umasks = knl_unc_cha_ingress_ext, ++ }, ++ { .name = "UNC_H_INGRESS_INSERTS", ++ .desc = "Ingress Allocations. Counts number of allocations per cycle into the specified Ingress queue", ++ .cntmsk = 0xf, ++ .code = 0x13, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_ext), ++ .umasks = knl_unc_cha_ingress_ext, ++ }, ++ { .name = "UNC_H_INGRESS_INT_STARVED", ++ .desc = "Cycles Internal Starvation", ++ .cntmsk = 0xf, ++ .code = 0x14, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_int_starved), ++ .umasks = knl_unc_cha_ingress_int_starved, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_IRQ0_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x18, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_IRQ01_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x19, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q1), ++ .umasks = knl_unc_cha_ingress_entry_reject_q1, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_PRQ0_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x20, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_PRQ1_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x21, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q1), ++ .umasks = knl_unc_cha_ingress_entry_reject_q1, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_IPQ0_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x22, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_IPQ1_REJECT", ++ .desc = "Ingress Request Queue Rejects", ++ .cntmsk = 0xf, ++ .code = 0x23, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q1), ++ .umasks = knl_unc_cha_ingress_entry_reject_q1, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_ISMQ0_REJECT", ++ .desc = "ISMQ Rejects", ++ .cntmsk = 0xf, ++ .code = 0x24, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_REQ_Q0_RETRY", ++ .desc = "REQUESTQ includes: IRQ, PRQ, IPQ, RRQ, WBQ (everything except for ISMQ)", ++ .cntmsk = 0xf, ++ .code = 0x2a, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_REQ_Q1_RETRY", ++ .desc = "REQUESTQ includes: IRQ, PRQ, IPQ, RRQ, WBQ (everything except for ISMQ)", ++ .cntmsk = 0xf, ++ .code = 0x2b, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q1), ++ .umasks = knl_unc_cha_ingress_entry_reject_q1, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_ISMQ0_RETRY", ++ .desc = "ISMQ retries", ++ .cntmsk = 0xf, ++ .code = 0x2c, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_OTHER0_RETRY", ++ .desc = "Other Queue Retries", ++ .cntmsk = 0xf, ++ .code = 0x2e, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q0), ++ .umasks = knl_unc_cha_ingress_entry_reject_q0, ++ }, ++ { .name = "UNC_H_INGRESS_RETRY_OTHER1_RETRY", ++ .desc = "Other Queue Retries", ++ .cntmsk = 0xf, ++ .code = 0x2f, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ingress_entry_reject_q1), ++ .umasks = knl_unc_cha_ingress_entry_reject_q1, ++ }, ++ { .name = "UNC_H_SF_LOOKUP", ++ .desc = "Cache Lookups. Counts the number of times the LLC was accessed.", ++ .cntmsk = 0xf, ++ .code = 0x34, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_llc_lookup), ++ .umasks = knl_unc_cha_llc_lookup, ++ }, ++ { .name = "UNC_H_CACHE_LINES_VICTIMIZED", ++ .desc = "Cache Lookups. Counts the number of times the LLC was accessed.", ++ .cntmsk = 0xf, ++ .code = 0x37, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_llc_victims), ++ .umasks = knl_unc_cha_llc_victims, ++ }, ++ { .name = "UNC_H_TOR_INSERTS", ++ .desc = "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent.", ++ .modmsk = KNL_UNC_CHA_TOR_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x35, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tor_subevent), ++ .umasks = knl_unc_cha_tor_subevent ++ }, ++ { .name = "UNC_H_TOR_OCCUPANCY", ++ .desc = "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent", ++ .modmsk = KNL_UNC_CHA_TOR_ATTRS, ++ .cntmsk = 0xf, ++ .code = 0x36, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tor_subevent), ++ .umasks = knl_unc_cha_tor_subevent ++ }, ++ { .name = "UNC_H_MISC", ++ .desc = "Miscellaneous events in the Cha", ++ .cntmsk = 0xf, ++ .code = 0x39, ++ .ngrp = 1, ++ .flags = INTEL_X86_NO_AUTOENCODE, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_misc), ++ .umasks = knl_unc_cha_misc, ++ }, ++ { .name = "UNC_H_AG0_AD_CRD_ACQUIRED", ++ .desc = "CMS Agent0 AD Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x80, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_AD_CRD_ACQUIRED_EXT", ++ .desc = "CMS Agent0 AD Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x81, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG0_AD_CRD_OCCUPANCY", ++ .desc = "CMS Agent0 AD Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x82, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_AD_CRD_OCCUPANCY_EXT", ++ .desc = "CMS Agent0 AD Credits Acquired For Transgress.", ++ .cntmsk = 0xf, ++ .code = 0x83, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_AD_CRD_ACQUIRED", ++ .desc = "CMS Agent1 AD Credits Acquired .", ++ .cntmsk = 0xf, ++ .code = 0x84, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_AD_CRD_ACQUIRED_EXT", ++ .desc = "CMS Agent1 AD Credits Acquired .", ++ .cntmsk = 0xf, ++ .code = 0x85, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_AD_CRD_OCCUPANCY", ++ .desc = "CMS Agent1 AD Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x86, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_AD_CRD_OCCUPANCY_EXT", ++ .desc = "CMS Agent1 AD Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x87, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG0_BL_CRD_ACQUIRED", ++ .desc = "CMS Agent0 BL Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x88, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_BL_CRD_ACQUIRED_EXT", ++ .desc = "CMS Agent0 BL Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x89, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG0_BL_CRD_OCCUPANCY", ++ .desc = "CMS Agent0 BL Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x8a, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_BL_CRD_OCCUPANCY_EXT", ++ .desc = "CMS Agent0 BL Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x8b, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_BL_CRD_ACQUIRED", ++ .desc = "CMS Agent1 BL Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x8c, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_BL_CRD_ACQUIRED_EXT", ++ .desc = "CMS Agent1 BL Credits Acquired.", ++ .cntmsk = 0xf, ++ .code = 0x8d, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_BL_CRD_OCCUPANCY", ++ .desc = "CMS Agent1 BL Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x8e, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_BL_CRD_OCCUPANCY_EXT", ++ .desc = "CMS Agent1 BL Credits Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x8f, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG0_STALL_NO_CRD_EGRESS_HORZ_AD", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD0, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_EXT", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD1, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_STALL_NO_CRD_EGRESS_HORZ_AD", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD2, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_EXT", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD3, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG0_STALL_NO_CRD_EGRESS_HORZ_BL", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD4, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_EXT", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD5, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_AG1_STALL_NO_CRD_EGRESS_HORZ_BL", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD6, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext), ++ .umasks = knl_unc_cha_tgr_ext, ++ }, ++ { .name = "UNC_H_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_EXT", ++ .desc = "Stall on No AD Transgress Credits.", ++ .cntmsk = 0xf, ++ .code = 0xD7, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_tgr_ext1), ++ .umasks = knl_unc_cha_tgr_ext1, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_OCCUPANCY", ++ .desc = "CMS Vert Egress Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x90, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_INSERTS", ++ .desc = "CMS Vert Egress Allocations.", ++ .cntmsk = 0xf, ++ .code = 0x91, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_CYCLES_FULL", ++ .desc = "Cycles CMS Vertical Egress Queue Is Full.", ++ .cntmsk = 0xf, ++ .code = 0x92, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_CYCLES_NE", ++ .desc = "Cycles CMS Vertical Egress Queue Is Not Empty.", ++ .cntmsk = 0xf, ++ .code = 0x93, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_NACK", ++ .desc = "CMS Vertical Egress NACKs.", ++ .cntmsk = 0xf, ++ .code = 0x98, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_STARVED", ++ .desc = "CMS Vertical Egress Injection Starvation.", ++ .cntmsk = 0xf, ++ .code = 0x9a, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_ADS_USED", ++ .desc = "CMS Vertical ADS Used.", ++ .cntmsk = 0xf, ++ .code = 0x9c, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_VERT_BYPASS", ++ .desc = "CMS Vertical Egress Bypass.", ++ .cntmsk = 0xf, ++ .code = 0x9e, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type_agent), ++ .umasks = knl_unc_cha_ring_type_agent, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_OCCUPANCY", ++ .desc = "CMS Horizontal Egress Occupancy.", ++ .cntmsk = 0xf, ++ .code = 0x94, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_INSERTS", ++ .desc = "CMS Horizontal Egress Inserts.", ++ .cntmsk = 0xf, ++ .code = 0x95, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_CYCLES_FULL", ++ .desc = "Cycles CMS Horizontal Egress Queue is Full.", ++ .cntmsk = 0xf, ++ .code = 0x96, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_CYCLES_NE", ++ .desc = "Cycles CMS Horizontal Egress Queue is Not Empty.", ++ .cntmsk = 0xf, ++ .code = 0x97, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_NACK", ++ .desc = "CMS Horizontal Egress NACKs.", ++ .cntmsk = 0xf, ++ .code = 0x99, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_STARVED", ++ .desc = "CMS Horizontal Egress Injection Starvation.", ++ .cntmsk = 0xf, ++ .code = 0x9b, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_ADS_USED", ++ .desc = "CMS Horizontal ADS Used.", ++ .cntmsk = 0xf, ++ .code = 0x9d, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_EGRESS_HORZ_BYPASS", ++ .desc = "CMS Horizontal Egress Bypass.", ++ .cntmsk = 0xf, ++ .code = 0x9f, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_RING_BOUNCES_VERT", ++ .desc = "Number of incoming messages from the Vertical ring that were bounced, by ring type.", ++ .cntmsk = 0xf, ++ .code = 0xa0, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_RING_BOUNCES_HORZ", ++ .desc = "Number of incoming messages from the Horizontal ring that were bounced, by ring type.", ++ .cntmsk = 0xf, ++ .code = 0xa1, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_RING_SINK_STARVED_VERT", ++ .desc = "Vertical ring sink starvation count.", ++ .cntmsk = 0xf, ++ .code = 0xa2, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_RING_SINK_STARVED_HORZ", ++ .desc = "Horizontal ring sink starvation count.", ++ .cntmsk = 0xf, ++ .code = 0xa3, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_type), ++ .umasks = knl_unc_cha_ring_type, ++ }, ++ { .name = "UNC_H_RING_SRC_THRT", ++ .desc = "Counts cycles in throttle mode.", ++ .cntmsk = 0xf, ++ .code = 0xa4, ++ }, ++ { .name = "UNC_H_FAST_ASSERTED", ++ .desc = "Counts cycles source throttling is adderted", ++ .cntmsk = 0xf, ++ .code = 0xa5, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_dire_ext), ++ .umasks = knl_unc_cha_dire_ext, ++ }, ++ { .name = "UNC_H_VERT_RING_AD_IN_USE", ++ .desc = "Counts the number of cycles that the Vertical AD ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xa6, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_vert), ++ .umasks = knl_unc_cha_ring_use_vert, ++ }, ++ { .name = "UNC_H_HORZ_RING_AD_IN_USE", ++ .desc = "Counts the number of cycles that the Horizontal AD ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xa7, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_hori), ++ .umasks = knl_unc_cha_ring_use_hori, ++ }, ++ { .name = "UNC_H_VERT_RING_AK_IN_USE", ++ .desc = "Counts the number of cycles that the Vertical AK ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xa8, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_vert), ++ .umasks = knl_unc_cha_ring_use_vert, ++ }, ++ { .name = "UNC_H_HORZ_RING_AK_IN_USE", ++ .desc = "Counts the number of cycles that the Horizontal AK ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xa9, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_hori), ++ .umasks = knl_unc_cha_ring_use_hori, ++ }, ++ { .name = "UNC_H_VERT_RING_BL_IN_USE", ++ .desc = "Counts the number of cycles that the Vertical BL ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xaa, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_vert), ++ .umasks = knl_unc_cha_ring_use_vert, ++ }, ++ { .name = "UNC_H_HORZ_RING_BL_IN_USE", ++ .desc = "Counts the number of cycles that the Horizontal BL ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xab, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_hori), ++ .umasks = knl_unc_cha_ring_use_hori, ++ }, ++ { .name = "UNC_H_VERT_RING_IV_IN_USE", ++ .desc = "Counts the number of cycles that the Vertical IV ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xac, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_updn), ++ .umasks = knl_unc_cha_ring_use_updn, ++ }, ++ { .name = "UNC_H_HORZ_RING_IV_IN_USE", ++ .desc = "Counts the number of cycles that the Horizontal IV ring is being used at this ring stop.", ++ .cntmsk = 0xf, ++ .code = 0xad, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_ring_use_lfrt), ++ .umasks = knl_unc_cha_ring_use_lfrt, ++ }, ++ { .name = "UNC_H_EGRESS_ORDERING", ++ .desc = "Counts number of cycles IV was blocked in the TGR Egress due to SNP/GO Ordering requirements.", ++ .cntmsk = 0xf, ++ .code = 0xae, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_iv_snp), ++ .umasks = knl_unc_cha_iv_snp, ++ }, ++ { .name = "UNC_H_TG_INGRESS_OCCUPANCY", ++ .desc = "Transgress Ingress Occupancy. Occupancy event for the Ingress buffers in the CMS The Ingress is used to queue up requests received from the mesh.", ++ .cntmsk = 0xf, ++ .code = 0xb0, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_cms_ext), ++ .umasks = knl_unc_cha_cms_ext, ++ }, ++ { .name = "UNC_H_TG_INGRESS_INSERTS", ++ .desc = "Transgress Ingress Allocations. Number of allocations into the CMS Ingress The Ingress is used to queue up requests received from the mesh.", ++ .cntmsk = 0xf, ++ .code = 0xb1, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_cms_ext), ++ .umasks = knl_unc_cha_cms_ext, ++ }, ++ { .name = "UNC_H_TG_INGRESS_BYPASS", ++ .desc = "Transgress Ingress Bypass. Number of packets bypassing the CMS Ingress.", ++ .cntmsk = 0xf, ++ .code = 0xb2, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_cms_ext), ++ .umasks = knl_unc_cha_cms_ext, ++ }, ++ { .name = "UNC_H_TG_INGRESS_CRD_STARVED", ++ .desc = "Transgress Injection Starvation. Counts cycles under injection starvation mode. This starvation is triggered when the CMS Ingress cannot send a transaction onto the mesh for a long period of time. In this case, the Ingress is unable to forward to the Egress due to a lack of credit.", ++ .cntmsk = 0xf, ++ .code = 0xb3, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_cms_crd_starved), ++ .umasks = knl_unc_cha_cms_crd_starved, ++ }, ++ { .name = "UNC_H_TG_INGRESS_BUSY_STARVED", ++ .desc = "Transgress Injection Starvation. Counts cycles under injection starvation mode. This starvation is triggered when the CMS Ingress cannot send a transaction onto the mesh for a long period of time. In this case, because a message from the other queue has higher priority.", ++ .cntmsk = 0xf, ++ .code = 0xb4, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_cha_cms_busy_starved), ++ .umasks = knl_unc_cha_cms_busy_starved, ++ }, ++}; +diff --git a/lib/events/intel_knl_unc_edc_events.h b/lib/events/intel_knl_unc_edc_events.h +new file mode 100644 +index 0000000..3cbd154 +--- /dev/null ++++ b/lib/events/intel_knl_unc_edc_events.h +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: knl_unc_edc (Intel Knights Landing EDC_UCLK, EDC_ECLK uncore PMUs) ++ */ ++ ++static const intel_x86_umask_t knl_unc_edc_uclk_access_count[]={ ++ { .uname = "HIT_CLEAN", ++ .udesc = "Hit E", ++ .ucode = 0x0100, ++ }, ++ { .uname = "HIT_DIRTY", ++ .udesc = "Hit M", ++ .ucode = 0x0200, ++ }, ++ { .uname = "MISS_CLEAN", ++ .udesc = "Miss E", ++ .ucode = 0x0400, ++ }, ++ { .uname = "MISS_DIRTY", ++ .udesc = "Miss M", ++ .ucode = 0x0800, ++ }, ++ { .uname = "MISS_INVALID", ++ .udesc = "Miss I", ++ .ucode = 0x1000, ++ }, ++ { .uname = "MISS_GARBAGE", ++ .udesc = "Miss G", ++ .ucode = 0x2000, ++ }, ++}; ++ ++ ++static const intel_x86_entry_t intel_knl_unc_edc_uclk_pe[]={ ++ { .name = "UNC_E_U_CLOCKTICKS", ++ .desc = "EDC UCLK clockticks (generic counters)", ++ .code = 0x00, /*encoding for generic counters */ ++ .cntmsk = 0xf, ++ }, ++ { .name = "UNC_E_EDC_ACCESS", ++ .desc = "Number of EDC Access Hits or Misses.", ++ .code = 0x02, ++ .cntmsk = 0xf, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_edc_uclk_access_count), ++ .umasks = knl_unc_edc_uclk_access_count ++ }, ++}; ++ ++static const intel_x86_entry_t intel_knl_unc_edc_eclk_pe[]={ ++ { .name = "UNC_E_E_CLOCKTICKS", ++ .desc = "EDC ECLK clockticks (generic counters)", ++ .code = 0x00, /*encoding for generic counters */ ++ .cntmsk = 0xf, ++ }, ++ { .name = "UNC_E_RPQ_INSERTS", ++ .desc = "Counts total number of EDC RPQ insers", ++ .code = 0x0101, ++ .cntmsk = 0xf, ++ }, ++ { .name = "UNC_E_WPQ_INSERTS", ++ .desc = "Counts total number of EDC WPQ insers", ++ .code = 0x0102, ++ .cntmsk = 0xf, ++ }, ++}; +diff --git a/lib/events/intel_knl_unc_imc_events.h b/lib/events/intel_knl_unc_imc_events.h +new file mode 100644 +index 0000000..cc0aa78 +--- /dev/null ++++ b/lib/events/intel_knl_unc_imc_events.h +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: knl_unc_imc (Intel Knights Landing IMC uncore PMU) ++ */ ++ ++static const intel_x86_umask_t knl_unc_m_cas_count[]={ ++ { .uname = "ALL", ++ .udesc = "Counts total number of DRAM CAS commands issued on this channel", ++ .ucode = 0x0300, ++ }, ++ { .uname = "RD", ++ .udesc = "Counts all DRAM reads on this channel, incl. underfills", ++ .ucode = 0x0100, ++ }, ++ { .uname = "WR", ++ .udesc = "Counts number of DRAM write CAS commands on this channel", ++ .ucode = 0x0200, ++ }, ++}; ++ ++ ++static const intel_x86_entry_t intel_knl_unc_imc_pe[]={ ++ { .name = "UNC_M_D_CLOCKTICKS", ++ .desc = "IMC Uncore DCLK counts", ++ .code = 0x00, /*encoding for generic counters */ ++ .cntmsk = 0xf, ++ }, ++ { .name = "UNC_M_CAS_COUNT", ++ .desc = "DRAM RD_CAS and WR_CAS Commands.", ++ .code = 0x03, ++ .cntmsk = 0xf, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m_cas_count), ++ .umasks = knl_unc_m_cas_count, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_knl_unc_imc_uclk_pe[]={ ++ { .name = "UNC_M_U_CLOCKTICKS", ++ .desc = "IMC UCLK counts", ++ .code = 0x00, /*encoding for generic counters */ ++ .cntmsk = 0xf, ++ }, ++}; ++ ++ +diff --git a/lib/events/intel_knl_unc_m2pcie_events.h b/lib/events/intel_knl_unc_m2pcie_events.h +new file mode 100644 +index 0000000..7c17c2c +--- /dev/null ++++ b/lib/events/intel_knl_unc_m2pcie_events.h +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * This file is part of libpfm, a performance monitoring support library for ++ * applications on Linux. ++ * ++ * PMU: knl_unc_m2pcie (Intel Knights Landing M2PCIe uncore) ++ */ ++ ++ ++static const intel_x86_umask_t knl_unc_m2p_ingress_cycles_ne[]={ ++ { .uname = "CBO_IDI", ++ .udesc = "CBO_IDI", ++ .ucode = 0x0100, ++ }, ++ { .uname = "CBO_NCB", ++ .udesc = "CBO_NCB", ++ .ucode = 0x0200, ++ }, ++ { .uname = "CBO_NCS", ++ .udesc = "CBO_NCS", ++ .ucode = 0x0400, ++ }, ++ { .uname = "ALL", ++ .udesc = "All", ++ .ucode = 0x0800, ++ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, ++ }, ++}; ++ ++ ++static const intel_x86_umask_t knl_unc_m2p_egress_cycles[]={ ++ { .uname = "AD_0", ++ .udesc = "AD_0", ++ .ucode = 0x0100, ++ }, ++ { .uname = "AK_0", ++ .udesc = "AK_0", ++ .ucode = 0x0200, ++ }, ++ { .uname = "BL_0", ++ .udesc = "BL_0", ++ .ucode = 0x0400, ++ }, ++ { .uname = "AD_1", ++ .udesc = "AD_1", ++ .ucode = 0x0800, ++ }, ++ { .uname = "AK_1", ++ .udesc = "AK_1", ++ .ucode = 0x1000, ++ }, ++ { .uname = "BL_1", ++ .udesc = "BL_1", ++ .ucode = 0x2000, ++ }, ++}; ++ ++static const intel_x86_umask_t knl_unc_m2p_egress_inserts[]={ ++ { .uname = "AD_0", ++ .udesc = "AD_0", ++ .ucode = 0x0100, ++ }, ++ { .uname = "AK_0", ++ .udesc = "AK_0", ++ .ucode = 0x0200, ++ }, ++ { .uname = "BL_0", ++ .udesc = "BL_0", ++ .ucode = 0x0400, ++ }, ++ { .uname = "AK_CRD_0", ++ .udesc = "AK_CRD_0", ++ .ucode = 0x0800, ++ }, ++ { .uname = "AD_1", ++ .udesc = "AD_1", ++ .ucode = 0x1000, ++ }, ++ { .uname = "AK_1", ++ .udesc = "AK_1", ++ .ucode = 0x2000, ++ }, ++ { .uname = "BL_1", ++ .udesc = "BL_1", ++ .ucode = 0x4000, ++ }, ++ { .uname = "AK_CRD_1", ++ .udesc = "AK_CRD_1", ++ .ucode = 0x8000, ++ }, ++}; ++ ++static const intel_x86_entry_t intel_knl_unc_m2pcie_pe[]={ ++ { .name = "UNC_M2P_INGRESS_CYCLES_NE", ++ .desc = "Ingress Queue Cycles Not Empty. Counts the number of cycles when the M2PCIe Ingress is not empty", ++ .code = 0x10, ++ .cntmsk = 0xf, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_ingress_cycles_ne), ++ .umasks = knl_unc_m2p_ingress_cycles_ne ++ }, ++ { .name = "UNC_M2P_EGRESS_CYCLES_NE", ++ .desc = "Egress (to CMS) Cycles Not Empty. Counts the number of cycles when the M2PCIe Egress is not empty", ++ .code = 0x23, ++ .cntmsk = 0x3, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_cycles), ++ .umasks = knl_unc_m2p_egress_cycles ++ }, ++ { .name = "UNC_M2P_EGRESS_INSERTS", ++ .desc = "Egress (to CMS) Ingress. Counts the number of number of messages inserted into the the M2PCIe Egress queue", ++ .code = 0x24, ++ .cntmsk = 0xf, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_inserts), ++ .umasks = knl_unc_m2p_egress_inserts ++ }, ++ { .name = "UNC_M2P_EGRESS_CYCLES_FULL", ++ .desc = "Egress (to CMS) Cycles Full. Counts the number of cycles when the M2PCIe Egress is full", ++ .code = 0x25, ++ .cntmsk = 0xf, ++ .ngrp = 1, ++ .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_cycles), ++ .umasks = knl_unc_m2p_egress_cycles ++ }, ++}; +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index f4a56df..cff4d2e 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -203,6 +203,69 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &intel_hswep_unc_r3qpi2_support, + &intel_hswep_unc_irp_support, + &intel_knl_support, ++ &intel_knl_unc_imc0_support, ++ &intel_knl_unc_imc1_support, ++ &intel_knl_unc_imc2_support, ++ &intel_knl_unc_imc3_support, ++ &intel_knl_unc_imc4_support, ++ &intel_knl_unc_imc5_support, ++ &intel_knl_unc_imc_uclk0_support, ++ &intel_knl_unc_imc_uclk1_support, ++ &intel_knl_unc_edc_uclk0_support, ++ &intel_knl_unc_edc_uclk1_support, ++ &intel_knl_unc_edc_uclk2_support, ++ &intel_knl_unc_edc_uclk3_support, ++ &intel_knl_unc_edc_uclk4_support, ++ &intel_knl_unc_edc_uclk5_support, ++ &intel_knl_unc_edc_uclk6_support, ++ &intel_knl_unc_edc_uclk7_support, ++ &intel_knl_unc_edc_eclk0_support, ++ &intel_knl_unc_edc_eclk1_support, ++ &intel_knl_unc_edc_eclk2_support, ++ &intel_knl_unc_edc_eclk3_support, ++ &intel_knl_unc_edc_eclk4_support, ++ &intel_knl_unc_edc_eclk5_support, ++ &intel_knl_unc_edc_eclk6_support, ++ &intel_knl_unc_edc_eclk7_support, ++ &intel_knl_unc_cha0_support, ++ &intel_knl_unc_cha1_support, ++ &intel_knl_unc_cha2_support, ++ &intel_knl_unc_cha3_support, ++ &intel_knl_unc_cha4_support, ++ &intel_knl_unc_cha5_support, ++ &intel_knl_unc_cha6_support, ++ &intel_knl_unc_cha7_support, ++ &intel_knl_unc_cha8_support, ++ &intel_knl_unc_cha9_support, ++ &intel_knl_unc_cha10_support, ++ &intel_knl_unc_cha11_support, ++ &intel_knl_unc_cha12_support, ++ &intel_knl_unc_cha13_support, ++ &intel_knl_unc_cha14_support, ++ &intel_knl_unc_cha15_support, ++ &intel_knl_unc_cha16_support, ++ &intel_knl_unc_cha17_support, ++ &intel_knl_unc_cha18_support, ++ &intel_knl_unc_cha19_support, ++ &intel_knl_unc_cha20_support, ++ &intel_knl_unc_cha21_support, ++ &intel_knl_unc_cha22_support, ++ &intel_knl_unc_cha23_support, ++ &intel_knl_unc_cha24_support, ++ &intel_knl_unc_cha25_support, ++ &intel_knl_unc_cha26_support, ++ &intel_knl_unc_cha27_support, ++ &intel_knl_unc_cha28_support, ++ &intel_knl_unc_cha29_support, ++ &intel_knl_unc_cha30_support, ++ &intel_knl_unc_cha31_support, ++ &intel_knl_unc_cha32_support, ++ &intel_knl_unc_cha33_support, ++ &intel_knl_unc_cha34_support, ++ &intel_knl_unc_cha35_support, ++ &intel_knl_unc_cha36_support, ++ &intel_knl_unc_cha37_support, ++ &intel_knl_unc_m2pcie_support, + &intel_x86_arch_support, /* must always be last for x86 */ + #endif + +diff --git a/lib/pfmlib_intel_knl_unc_cha.c b/lib/pfmlib_intel_knl_unc_cha.c +new file mode 100644 +index 0000000..4f2ee4c +--- /dev/null ++++ b/lib/pfmlib_intel_knl_unc_cha.c +@@ -0,0 +1,103 @@ ++/* ++ * pfmlib_intel_knl_unc_cha.c : Intel KnightsLanding CHA uncore PMU ++ * ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "pfmlib_intel_snbep_unc_priv.h" ++#include "events/intel_knl_unc_cha_events.h" ++ ++#define DEFINE_CHA_BOX(n) \ ++pfmlib_pmu_t intel_knl_unc_cha##n##_support = { \ ++ .desc = "Intel KnightLanding CHA "#n" uncore", \ ++ .name = "knl_unc_cha"#n, \ ++ .perf_name = "uncore_cha_"#n, \ ++ .pmu = PFM_PMU_INTEL_KNL_UNC_CHA##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_cha_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .num_cntrs = 4, \ ++ .num_fixed_cntrs = 0, \ ++ .max_encoding = 1, \ ++ .pe = intel_knl_unc_cha_pe, \ ++ .atdesc = snbep_unc_mods, \ ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ ++ .pmu_detect = pfm_intel_knl_unc_detect, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ ++ .get_event_first = pfm_intel_x86_get_event_first, \ ++ .get_event_next = pfm_intel_x86_get_event_next, \ ++ .event_is_valid = pfm_intel_x86_event_is_valid, \ ++ .validate_table = pfm_intel_x86_validate_table, \ ++ .get_event_info = pfm_intel_x86_get_event_info, \ ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ ++}; ++ ++DEFINE_CHA_BOX(0); ++DEFINE_CHA_BOX(1); ++DEFINE_CHA_BOX(2); ++DEFINE_CHA_BOX(3); ++DEFINE_CHA_BOX(4); ++DEFINE_CHA_BOX(5); ++DEFINE_CHA_BOX(6); ++DEFINE_CHA_BOX(7); ++DEFINE_CHA_BOX(8); ++DEFINE_CHA_BOX(9); ++DEFINE_CHA_BOX(10); ++DEFINE_CHA_BOX(11); ++DEFINE_CHA_BOX(12); ++DEFINE_CHA_BOX(13); ++DEFINE_CHA_BOX(14); ++DEFINE_CHA_BOX(15); ++DEFINE_CHA_BOX(16); ++DEFINE_CHA_BOX(17); ++DEFINE_CHA_BOX(18); ++DEFINE_CHA_BOX(19); ++DEFINE_CHA_BOX(20); ++DEFINE_CHA_BOX(21); ++DEFINE_CHA_BOX(22); ++DEFINE_CHA_BOX(23); ++DEFINE_CHA_BOX(24); ++DEFINE_CHA_BOX(25); ++DEFINE_CHA_BOX(26); ++DEFINE_CHA_BOX(27); ++DEFINE_CHA_BOX(28); ++DEFINE_CHA_BOX(29); ++DEFINE_CHA_BOX(30); ++DEFINE_CHA_BOX(31); ++DEFINE_CHA_BOX(32); ++DEFINE_CHA_BOX(33); ++DEFINE_CHA_BOX(34); ++DEFINE_CHA_BOX(35); ++DEFINE_CHA_BOX(36); ++DEFINE_CHA_BOX(37); ++ ++ +diff --git a/lib/pfmlib_intel_knl_unc_edc.c b/lib/pfmlib_intel_knl_unc_edc.c +new file mode 100644 +index 0000000..379496a +--- /dev/null ++++ b/lib/pfmlib_intel_knl_unc_edc.c +@@ -0,0 +1,111 @@ ++/* ++ * pfmlib_intel_knl_unc_edc.c : Intel KnightsLanding Integrated EDRAM uncore PMU ++ * ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "pfmlib_intel_snbep_unc_priv.h" ++#include "events/intel_knl_unc_edc_events.h" ++ ++ ++#define DEFINE_EDC_UCLK_BOX(n) \ ++pfmlib_pmu_t intel_knl_unc_edc_uclk##n##_support = { \ ++ .desc = "Intel KnightLanding EDC_UCLK_"#n" uncore", \ ++ .name = "knl_unc_edc_uclk"#n, \ ++ .perf_name = "uncore_edc_uclk_"#n, \ ++ .pmu = PFM_PMU_INTEL_KNL_UNC_EDC_UCLK##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_edc_uclk_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .num_cntrs = 4, \ ++ .num_fixed_cntrs = 0, \ ++ .max_encoding = 1, \ ++ .pe = intel_knl_unc_edc_uclk_pe, \ ++ .atdesc = snbep_unc_mods, \ ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ ++ .pmu_detect = pfm_intel_knl_unc_detect, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ ++ .get_event_first = pfm_intel_x86_get_event_first, \ ++ .get_event_next = pfm_intel_x86_get_event_next, \ ++ .event_is_valid = pfm_intel_x86_event_is_valid, \ ++ .validate_table = pfm_intel_x86_validate_table, \ ++ .get_event_info = pfm_intel_x86_get_event_info, \ ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ ++}; ++ ++DEFINE_EDC_UCLK_BOX(0); ++DEFINE_EDC_UCLK_BOX(1); ++DEFINE_EDC_UCLK_BOX(2); ++DEFINE_EDC_UCLK_BOX(3); ++DEFINE_EDC_UCLK_BOX(4); ++DEFINE_EDC_UCLK_BOX(5); ++DEFINE_EDC_UCLK_BOX(6); ++DEFINE_EDC_UCLK_BOX(7); ++ ++ ++#define DEFINE_EDC_ECLK_BOX(n) \ ++pfmlib_pmu_t intel_knl_unc_edc_eclk##n##_support = { \ ++ .desc = "Intel KnightLanding EDC_ECLK_"#n" uncore", \ ++ .name = "knl_unc_edc_eclk"#n, \ ++ .perf_name = "uncore_edc_eclk_"#n, \ ++ .pmu = PFM_PMU_INTEL_KNL_UNC_EDC_ECLK##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_edc_eclk_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .num_cntrs = 4, \ ++ .num_fixed_cntrs = 0, \ ++ .max_encoding = 1, \ ++ .pe = intel_knl_unc_edc_eclk_pe, \ ++ .atdesc = snbep_unc_mods, \ ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ ++ .pmu_detect = pfm_intel_knl_unc_detect, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ ++ .get_event_first = pfm_intel_x86_get_event_first, \ ++ .get_event_next = pfm_intel_x86_get_event_next, \ ++ .event_is_valid = pfm_intel_x86_event_is_valid, \ ++ .validate_table = pfm_intel_x86_validate_table, \ ++ .get_event_info = pfm_intel_x86_get_event_info, \ ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ ++}; ++ ++DEFINE_EDC_ECLK_BOX(0); ++DEFINE_EDC_ECLK_BOX(1); ++DEFINE_EDC_ECLK_BOX(2); ++DEFINE_EDC_ECLK_BOX(3); ++DEFINE_EDC_ECLK_BOX(4); ++DEFINE_EDC_ECLK_BOX(5); ++DEFINE_EDC_ECLK_BOX(6); ++DEFINE_EDC_ECLK_BOX(7); ++ +diff --git a/lib/pfmlib_intel_knl_unc_imc.c b/lib/pfmlib_intel_knl_unc_imc.c +new file mode 100644 +index 0000000..1d613b2 +--- /dev/null ++++ b/lib/pfmlib_intel_knl_unc_imc.c +@@ -0,0 +1,101 @@ ++/* ++ * pfmlib_intel_knl_unc_imc.c : Intel KnightsLanding Integrated Memory Controller (IMC) uncore PMU ++ * ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "pfmlib_intel_snbep_unc_priv.h" ++#include "events/intel_knl_unc_imc_events.h" ++ ++#define DEFINE_IMC_BOX(n) \ ++pfmlib_pmu_t intel_knl_unc_imc##n##_support = { \ ++ .desc = "Intel KnightLanding IMC "#n" uncore", \ ++ .name = "knl_unc_imc"#n, \ ++ .perf_name = "uncore_imc_"#n, \ ++ .pmu = PFM_PMU_INTEL_KNL_UNC_IMC##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_imc_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .num_cntrs = 4, \ ++ .num_fixed_cntrs = 1, \ ++ .max_encoding = 1, \ ++ .pe = intel_knl_unc_imc_pe, \ ++ .atdesc = snbep_unc_mods, \ ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ ++ .pmu_detect = pfm_intel_knl_unc_detect, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ ++ .get_event_first = pfm_intel_x86_get_event_first, \ ++ .get_event_next = pfm_intel_x86_get_event_next, \ ++ .event_is_valid = pfm_intel_x86_event_is_valid, \ ++ .validate_table = pfm_intel_x86_validate_table, \ ++ .get_event_info = pfm_intel_x86_get_event_info, \ ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ ++}; ++ ++DEFINE_IMC_BOX(0); ++DEFINE_IMC_BOX(1); ++DEFINE_IMC_BOX(2); ++DEFINE_IMC_BOX(3); ++DEFINE_IMC_BOX(4); ++DEFINE_IMC_BOX(5); ++ ++#define DEFINE_IMC_UCLK_BOX(n) \ ++pfmlib_pmu_t intel_knl_unc_imc_uclk##n##_support = { \ ++ .desc = "Intel KnightLanding IMC UCLK "#n" uncore", \ ++ .name = "knl_unc_imc_uclk"#n, \ ++ .perf_name = "uncore_mc_uclk_"#n, \ ++ .pmu = PFM_PMU_INTEL_KNL_UNC_IMC_UCLK##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_imc_uclk_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .num_cntrs = 4, \ ++ .num_fixed_cntrs = 1, \ ++ .max_encoding = 1, \ ++ .pe = intel_knl_unc_imc_uclk_pe, \ ++ .atdesc = snbep_unc_mods, \ ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ ++ .pmu_detect = pfm_intel_knl_unc_detect, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ ++ .get_event_first = pfm_intel_x86_get_event_first, \ ++ .get_event_next = pfm_intel_x86_get_event_next, \ ++ .event_is_valid = pfm_intel_x86_event_is_valid, \ ++ .validate_table = pfm_intel_x86_validate_table, \ ++ .get_event_info = pfm_intel_x86_get_event_info, \ ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ ++}; ++ ++DEFINE_IMC_UCLK_BOX(0); ++DEFINE_IMC_UCLK_BOX(1); ++ +diff --git a/lib/pfmlib_intel_knl_unc_m2pcie.c b/lib/pfmlib_intel_knl_unc_m2pcie.c +new file mode 100644 +index 0000000..c4d6059 +--- /dev/null ++++ b/lib/pfmlib_intel_knl_unc_m2pcie.c +@@ -0,0 +1,80 @@ ++/* ++ * pfmlib_intel_knl_m2pcie.c : Intel Knights Landing M2PCIe uncore PMU ++ * ++ * Copyright (c) 2016 Intel Corp. All rights reserved ++ * Contributed by Peinan Zhang ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_intel_x86_priv.h" ++#include "pfmlib_intel_snbep_unc_priv.h" ++#include "events/intel_knl_unc_m2pcie_events.h" ++ ++static void ++display_m2p(void *this, pfmlib_event_desc_t *e, void *val) ++{ ++ const intel_x86_entry_t *pe = this_pe(this); ++ pfm_snbep_unc_reg_t *reg = val; ++ ++ __pfm_vbprintf("[UNC_R2PCIE=0x%"PRIx64" event=0x%x umask=0x%x en=%d " ++ "inv=%d edge=%d thres=%d] %s\n", ++ reg->val, ++ reg->com.unc_event, ++ reg->com.unc_umask, ++ reg->com.unc_en, ++ reg->com.unc_inv, ++ reg->com.unc_edge, ++ reg->com.unc_thres, ++ pe[e->event].name); ++} ++ ++pfmlib_pmu_t intel_knl_unc_m2pcie_support = { ++ .desc = "Intel Knights Landing M2PCIe uncore", ++ .name = "knl_unc_m2pcie", ++ .perf_name = "uncore_m2pcie", ++ .pmu = PFM_PMU_INTEL_KNL_UNC_M2PCIE, ++ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_m2pcie_pe), ++ .type = PFM_PMU_TYPE_UNCORE, ++ .num_cntrs = 4, ++ .num_fixed_cntrs = 0, ++ .max_encoding = 1, ++ .pe = intel_knl_unc_m2pcie_pe, ++ .atdesc = snbep_unc_mods, ++ .flags = PFMLIB_PMU_FL_RAW_UMASK, ++ .pmu_detect = pfm_intel_knl_unc_detect, ++ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, ++ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), ++ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), ++ .get_event_first = pfm_intel_x86_get_event_first, ++ .get_event_next = pfm_intel_x86_get_event_next, ++ .event_is_valid = pfm_intel_x86_event_is_valid, ++ .validate_table = pfm_intel_x86_validate_table, ++ .get_event_info = pfm_intel_x86_get_event_info, ++ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, ++ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), ++ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, ++ .display_reg = display_m2p, ++}; +diff --git a/lib/pfmlib_intel_snbep_unc.c b/lib/pfmlib_intel_snbep_unc.c +index c61065e..075ae33 100644 +--- a/lib/pfmlib_intel_snbep_unc.c ++++ b/lib/pfmlib_intel_snbep_unc.c +@@ -109,6 +109,28 @@ pfm_intel_hswep_unc_detect(void *this) + return PFM_SUCCESS; + } + ++int ++pfm_intel_knl_unc_detect(void *this) ++{ ++ int ret; ++ ++ ret = pfm_intel_x86_detect(); ++ if (ret != PFM_SUCCESS) ++ ++ if (pfm_intel_x86_cfg.family != 6) ++ return PFM_ERR_NOTSUPP; ++ ++ switch(pfm_intel_x86_cfg.model) { ++ case 87: /* Knights Landing */ ++ break; ++ default: ++ return PFM_ERR_NOTSUPP; ++ } ++ return PFM_SUCCESS; ++} ++ ++ ++ + static void + display_com(void *this, pfmlib_event_desc_t *e, void *val) + { +diff --git a/lib/pfmlib_intel_snbep_unc_priv.h b/lib/pfmlib_intel_snbep_unc_priv.h +index 13875f5..500ff84 100644 +--- a/lib/pfmlib_intel_snbep_unc_priv.h ++++ b/lib/pfmlib_intel_snbep_unc_priv.h +@@ -164,6 +164,8 @@ + #define HSWEP_UNC_SBO_ATTRS \ + (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) + ++#define KNL_UNC_CHA_TOR_ATTRS _SNBEP_UNC_ATTR_NF1 ++ + typedef union { + uint64_t val; + struct { +@@ -324,6 +326,7 @@ extern const pfmlib_attr_desc_t snbep_unc_mods[]; + extern int pfm_intel_snbep_unc_detect(void *this); + extern int pfm_intel_ivbep_unc_detect(void *this); + extern int pfm_intel_hswep_unc_detect(void *this); ++extern int pfm_intel_knl_unc_detect(void *this); + extern int pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); + extern int pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx); + extern int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info); +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index c49975f..33d7fdf 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -354,6 +354,69 @@ extern pfmlib_pmu_t intel_hswep_unc_irp_support; + extern pfmlib_pmu_t intel_knc_support; + extern pfmlib_pmu_t intel_slm_support; + extern pfmlib_pmu_t intel_knl_support; ++extern pfmlib_pmu_t intel_knl_unc_imc0_support; ++extern pfmlib_pmu_t intel_knl_unc_imc1_support; ++extern pfmlib_pmu_t intel_knl_unc_imc2_support; ++extern pfmlib_pmu_t intel_knl_unc_imc3_support; ++extern pfmlib_pmu_t intel_knl_unc_imc4_support; ++extern pfmlib_pmu_t intel_knl_unc_imc5_support; ++extern pfmlib_pmu_t intel_knl_unc_imc_uclk0_support; ++extern pfmlib_pmu_t intel_knl_unc_imc_uclk1_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk0_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk1_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk2_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk3_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk4_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk5_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk6_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_uclk7_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk0_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk1_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk2_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk3_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk4_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk5_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk6_support; ++extern pfmlib_pmu_t intel_knl_unc_edc_eclk7_support; ++extern pfmlib_pmu_t intel_knl_unc_cha0_support; ++extern pfmlib_pmu_t intel_knl_unc_cha1_support; ++extern pfmlib_pmu_t intel_knl_unc_cha2_support; ++extern pfmlib_pmu_t intel_knl_unc_cha3_support; ++extern pfmlib_pmu_t intel_knl_unc_cha4_support; ++extern pfmlib_pmu_t intel_knl_unc_cha5_support; ++extern pfmlib_pmu_t intel_knl_unc_cha6_support; ++extern pfmlib_pmu_t intel_knl_unc_cha7_support; ++extern pfmlib_pmu_t intel_knl_unc_cha8_support; ++extern pfmlib_pmu_t intel_knl_unc_cha9_support; ++extern pfmlib_pmu_t intel_knl_unc_cha10_support; ++extern pfmlib_pmu_t intel_knl_unc_cha11_support; ++extern pfmlib_pmu_t intel_knl_unc_cha12_support; ++extern pfmlib_pmu_t intel_knl_unc_cha13_support; ++extern pfmlib_pmu_t intel_knl_unc_cha14_support; ++extern pfmlib_pmu_t intel_knl_unc_cha15_support; ++extern pfmlib_pmu_t intel_knl_unc_cha16_support; ++extern pfmlib_pmu_t intel_knl_unc_cha17_support; ++extern pfmlib_pmu_t intel_knl_unc_cha18_support; ++extern pfmlib_pmu_t intel_knl_unc_cha19_support; ++extern pfmlib_pmu_t intel_knl_unc_cha20_support; ++extern pfmlib_pmu_t intel_knl_unc_cha21_support; ++extern pfmlib_pmu_t intel_knl_unc_cha22_support; ++extern pfmlib_pmu_t intel_knl_unc_cha23_support; ++extern pfmlib_pmu_t intel_knl_unc_cha24_support; ++extern pfmlib_pmu_t intel_knl_unc_cha25_support; ++extern pfmlib_pmu_t intel_knl_unc_cha26_support; ++extern pfmlib_pmu_t intel_knl_unc_cha27_support; ++extern pfmlib_pmu_t intel_knl_unc_cha28_support; ++extern pfmlib_pmu_t intel_knl_unc_cha29_support; ++extern pfmlib_pmu_t intel_knl_unc_cha30_support; ++extern pfmlib_pmu_t intel_knl_unc_cha31_support; ++extern pfmlib_pmu_t intel_knl_unc_cha32_support; ++extern pfmlib_pmu_t intel_knl_unc_cha33_support; ++extern pfmlib_pmu_t intel_knl_unc_cha34_support; ++extern pfmlib_pmu_t intel_knl_unc_cha35_support; ++extern pfmlib_pmu_t intel_knl_unc_cha36_support; ++extern pfmlib_pmu_t intel_knl_unc_cha37_support; ++extern pfmlib_pmu_t intel_knl_unc_m2pcie_support; + extern pfmlib_pmu_t intel_glm_support; + extern pfmlib_pmu_t power4_support; + extern pfmlib_pmu_t ppc970_support; +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index cede40b..c9770fc 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4477,6 +4477,272 @@ static const test_event_t x86_test_events[]={ + .name = "knl::offcore_response_1:dmnd_data_rd:outstanding", + .ret = PFM_ERR_ATTR, + }, ++ { SRC_LINE, ++ .name = "knl_unc_imc0::UNC_M_D_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_imc0::UNC_M_D_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_imc0::UNC_M_CAS_COUNT:RD", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0103, ++ .fstr = "knl_unc_imc0::UNC_M_CAS_COUNT:RD", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_imc0::UNC_M_CAS_COUNT:WR", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0203, ++ .fstr = "knl_unc_imc0::UNC_M_CAS_COUNT:WR", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_imc0::UNC_M_CAS_COUNT:ALL", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0303, ++ .fstr = "knl_unc_imc0::UNC_M_CAS_COUNT:ALL", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_imc_uclk0::UNC_M_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_imc_uclk0::UNC_M_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:HIT_CLEAN", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0102, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:HIT_CLEAN", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:HIT_DIRTY", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0202, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:HIT_DIRTY", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_CLEAN", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0402, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_CLEAN", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_DIRTY", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0802, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_DIRTY", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_INVALID", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x1002, ++ .fstr = "knl_unc_edc_uclk0::UNC_E_EDC_ACCESS:MISS_INVALID", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_eclk0::UNC_E_E_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_edc_eclk0::UNC_E_E_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_edc_eclk0::UNC_E_RPQ_INSERTS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0101, ++ .fstr = "knl_unc_edc_eclk0::UNC_E_RPQ_INSERTS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha0::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha1::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha1::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha10::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha10::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha20::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha20::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha25::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha25::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha30::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha30::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha37::UNC_H_U_CLOCKTICKS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x00, ++ .fstr = "knl_unc_cha37::UNC_H_U_CLOCKTICKS", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IRQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0111, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IRQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IRQ_REJ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0211, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IRQ_REJ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IPQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0411, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:IPQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:PRQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x1011, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:PRQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:PRQ_REJ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x2011, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_OCCUPANCY:PRQ_REJ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IRQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0113, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IRQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IRQ_REJ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0213, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IRQ_REJ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IPQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0413, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:IPQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:PRQ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x1013, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:PRQ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:PRQ_REJ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x2013, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_INSERTS:PRQ_REJ", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_cha0::UNC_H_INGRESS_RETRY_IRQ0_REJECT:AD_RSP_VN0", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0218, ++ .fstr = "knl_unc_cha0::UNC_H_INGRESS_RETRY_IRQ0_REJECT:AD_RSP_VN0", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_INGRESS_CYCLES_NE:ALL", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0810, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_INGRESS_CYCLES_NE:ALL", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_NE:AD_0", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0123, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_NE:AD_0", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_NE:AD_1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0823, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_NE:AD_1", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_INSERTS:AD_0", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0124, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_INSERTS:AD_0", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_INSERTS:AD_1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x1024, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_INSERTS:AD_1", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_0", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0125, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_0", ++ }, ++ { SRC_LINE, ++ .name = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_1", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x0825, ++ .fstr = "knl_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_1", ++ }, + }; + + #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) +-- +2.9.3 + + +From 192db474a97b5c67d917e18c04ab0848405e077d Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sun, 6 Nov 2016 23:37:41 -0800 +Subject: [PATCH] add more Skylake models + +Add Skylake X core PMU support (equiv to Skylake desktop for now) +Add Kabylake mobile and desktop. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_intel_skl.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/lib/pfmlib_intel_skl.c b/lib/pfmlib_intel_skl.c +index 87ee70d..a190ead 100644 +--- a/lib/pfmlib_intel_skl.c ++++ b/lib/pfmlib_intel_skl.c +@@ -29,6 +29,9 @@ + static const int skl_models[] = { + 78, /* Skylake mobile */ + 94, /* Skylake desktop */ ++ 85, /* Skylake X */ ++ 142,/* KabyLake mobile */ ++ 158,/* KabyLake desktop */ + 0 + }; + +-- +2.9.3 + + +From 05edb2f56598752e14071009c3c52cb22ae6036b Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Sun, 5 Feb 2017 00:35:24 -0800 +Subject: [PATCH] Fix offcore_response for Intel BDW-EP + +The umasks was missing all the L3_HIT umasks because +they wer all marked as Broadwell (client) only. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_bdw_events.h | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/lib/events/intel_bdw_events.h b/lib/events/intel_bdw_events.h +index fba5ad2..ba5d1f7 100644 +--- a/lib/events/intel_bdw_events.h ++++ b/lib/events/intel_bdw_events.h +@@ -1746,81 +1746,69 @@ static const intel_x86_umask_t bdw_offcore_response[]={ + { .uname = "L3_HITM", + .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", + .ucode = 1ULL << (18+8), +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HITM", + .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", + .ucode = 1ULL << (18+8), + .uequiv = "L3_HITM", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_HITE", + .udesc = "Supplier: counts L3 hits in E-state", + .ucode = 1ULL << (19+8), +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HITE", + .udesc = "Supplier: counts L3 hits in E-state", + .ucode = 1ULL << (19+8), + .uequiv = "L3_HITE", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_HITS", + .udesc = "Supplier: counts L3 hits in S-state", + .ucode = 1ULL << (20+8), +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HITS", + .udesc = "Supplier: counts L3 hits in S-state", + .ucode = 1ULL << (20+8), + .uequiv = "L3_HITS", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_HITF", + .udesc = "Supplier: counts L3 hits in F-state", + .ucode = 1ULL << (21+8), +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HITF", + .udesc = "Supplier: counts L3 hits in F-state", + .ucode = 1ULL << (20+8), + .uequiv = "L3_HITF", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_HITMESF", + .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", + .ucode = 0xfULL << (18+8), + .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HITMESF", + .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", + .ucode = 0xfULL << (18+8), + .uequiv = "L3_HITMESF", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_HIT", + .udesc = "Alias for L3_HITMESF", + .ucode = 0xfULL << (18+8), + .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "LLC_HIT", + .udesc = "Alias for LLC_HITMESF", + .ucode = 0xfULL << (18+8), + .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", +- .umodel = PFM_PMU_INTEL_BDW, + .grpid = 1, + }, + { .uname = "L3_MISS_LOCAL", +-- +2.9.3 + + +From 28ba4f45ab37915a4e91c6f8d33318bb6a1b1947 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 21 Feb 2017 23:49:07 -0800 +Subject: [PATCH] add UOPS_DISPATCHED_PORT event for Intel Skylake + +This patch add UOPS_DISPATCHED_PORT event for Intel Skylake event. +This is the official name of the event 0xa1. + +Make the old UOPS_DISPATCHED event an alias for backward +compatibility reason. + +Also add a test case for the new event and alias. + +Signed-off-by: Stephane Eranian +--- + lib/events/intel_skl_events.h | 19 ++++++++++++++----- + tests/validate_x86.c | 12 ++++++++++++ + 2 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/lib/events/intel_skl_events.h b/lib/events/intel_skl_events.h +index e7b522d..84dfabf 100644 +--- a/lib/events/intel_skl_events.h ++++ b/lib/events/intel_skl_events.h +@@ -1154,7 +1154,7 @@ static const intel_x86_umask_t skl_uops_executed[]={ + }, + }; + +-static const intel_x86_umask_t skl_uops_dispatched[]={ ++static const intel_x86_umask_t skl_uops_dispatched_port[]={ + { .uname = "PORT_0", + .udesc = "Cycles which a Uop is executed on port 0", + .ucode = 0x100, +@@ -2510,15 +2510,24 @@ static const intel_x86_entry_t intel_skl_pe[]={ + .numasks = LIBPFM_ARRAY_SIZE(skl_lsd), + .umasks = skl_lsd, + }, +- ++ { .name = "UOPS_DISPATCHED_PORT", ++ .desc = "Uops dispatched to specific ports", ++ .code = 0xa1, ++ .cntmsk = 0xff, ++ .ngrp = 1, ++ .modmsk = INTEL_V4_ATTRS, ++ .numasks = LIBPFM_ARRAY_SIZE(skl_uops_dispatched_port), ++ .umasks = skl_uops_dispatched_port, ++ }, + { .name = "UOPS_DISPATCHED", +- .desc = "Uops dispatch to specific ports", ++ .desc = "Uops dispatched to specific ports", ++ .equiv = "UOPS_DISPATCHED_PORT", + .code = 0xa1, + .cntmsk = 0xff, + .ngrp = 1, + .modmsk = INTEL_V4_ATTRS, +- .numasks = LIBPFM_ARRAY_SIZE(skl_uops_dispatched), +- .umasks = skl_uops_dispatched, ++ .numasks = LIBPFM_ARRAY_SIZE(skl_uops_dispatched_port), ++ .umasks = skl_uops_dispatched_port, + }, + { .name = "UOPS_ISSUED", + .desc = "Uops issued", +diff --git a/tests/validate_x86.c b/tests/validate_x86.c +index c9770fc..790ba58 100644 +--- a/tests/validate_x86.c ++++ b/tests/validate_x86.c +@@ -4031,6 +4031,18 @@ static const test_event_t x86_test_events[]={ + .ret = PFM_ERR_ATTR_SET, + }, + { SRC_LINE, ++ .name = "skl::uops_dispatched_port:port_0", ++ .count = 1, ++ .codes[0] = 0x5301a1, ++ .fstr = "skl::UOPS_DISPATCHED_PORT:PORT_0:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, ++ .name = "skl::uops_dispatched:port_0", ++ .count = 1, ++ .codes[0] = 0x5301a1, ++ .fstr = "skl::UOPS_DISPATCHED_PORT:PORT_0:k=1:u=1:e=0:i=0:c=0:t=0:intx=0:intxcp=0", ++ }, ++ { SRC_LINE, + .name = "hsw::CYCLE_ACTIVITY:CYCLES_L2_PENDING:k=1:u=1:e=0:i=0:c=1:t=0:intx=0:intxcp=0", + .ret = PFM_SUCCESS, + .count = 1, +-- +2.9.3 + + +From 1bd352eef242f53e130c3b025bbf7881a5fb5d1e Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Wed, 22 Feb 2017 01:16:42 -0800 +Subject: [PATCH] update Intel RAPL processor support + +Added Kabylake, Skylake X + +Added PSYS RAPL event for Skylake client. + +Signed-off-by: Stephane Eranian +--- + lib/pfmlib_intel_rapl.c | 51 ++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 36 insertions(+), 15 deletions(-) + +diff --git a/lib/pfmlib_intel_rapl.c b/lib/pfmlib_intel_rapl.c +index 1413b5f..8a04079 100644 +--- a/lib/pfmlib_intel_rapl.c ++++ b/lib/pfmlib_intel_rapl.c +@@ -59,6 +59,20 @@ static const intel_x86_entry_t intel_rapl_cln_pe[]={ + } + }; + ++static const intel_x86_entry_t intel_rapl_skl_cln_pe[]={ ++ RAPL_COMMON_EVENTS, ++ { .name = "RAPL_ENERGY_GPU", ++ .desc = "Number of Joules consumed by the builtin GPU. Unit is 2^-32 Joules", ++ .cntmsk = 0x8, ++ .code = 0x4, ++ }, ++ { .name = "RAPL_ENERGY_PSYS", ++ .desc = "Number of Joules consumed by the builtin PSYS. Unit is 2^-32 Joules", ++ .cntmsk = 0x8, ++ .code = 0x5, ++ } ++}; ++ + static const intel_x86_entry_t intel_rapl_srv_pe[]={ + RAPL_COMMON_EVENTS, + { .name = "RAPL_ENERGY_DRAM", +@@ -97,29 +111,36 @@ pfm_rapl_detect(void *this) + return PFM_ERR_NOTSUPP; + + switch(pfm_intel_x86_cfg.model) { +- case 42: /* Sandy Bridge */ +- case 58: /* Ivy Bridge */ +- case 60: /* Haswell */ +- case 69: /* Haswell */ +- case 70: /* Haswell */ +- case 61: /* Broadwell */ +- case 71: /* Broadwell */ +- case 78: /* Skylake */ +- case 94: /* Skylake H/S */ ++ case 42: /* Sandy Bridge */ ++ case 58: /* Ivy Bridge */ ++ case 60: /* Haswell */ ++ case 69: /* Haswell */ ++ case 70: /* Haswell */ ++ case 61: /* Broadwell */ ++ case 71: /* Broadwell GT3E */ ++ case 92: /* Goldmont */ + /* already setup by default */ + break; +- case 45: /* Sandy Bridg-EP */ +- case 62: /* Ivy Bridge-EP */ ++ case 45: /* Sandy Bridg-EP */ ++ case 62: /* Ivy Bridge-EP */ + intel_rapl_support.pe = intel_rapl_srv_pe; + intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_srv_pe); + break; +- case 63: /* Haswell-EP */ +- case 79: /* Broadwell-EP */ +- case 86: /* Broadwell D */ ++ case 78: /* Skylake */ ++ case 94: /* Skylake H/S */ ++ case 142: /* Kabylake */ ++ case 158: /* Kabylake */ ++ intel_rapl_support.pe = intel_rapl_skl_cln_pe; ++ intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_skl_cln_pe); ++ break; ++ case 63: /* Haswell-EP */ ++ case 79: /* Broadwell-EP */ ++ case 86: /* Broadwell D */ ++ case 85: /* Skylake X */ + intel_rapl_support.pe = intel_rapl_hswep_pe; + intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_hswep_pe); + break; +- default: ++ default : + return PFM_ERR_NOTSUPP; + } + return PFM_SUCCESS; +-- +2.9.3 + diff --git a/SPECS/libpfm.spec b/SPECS/libpfm.spec index 1e09d3c..d1e007f 100644 --- a/SPECS/libpfm.spec +++ b/SPECS/libpfm.spec @@ -10,7 +10,7 @@ Name: libpfm Version: 4.7.0 -Release: 1%{?dist} +Release: 4%{?dist} Summary: Library to encode performance events for use by perf tool @@ -18,6 +18,8 @@ Group: System Environment/Libraries License: MIT URL: http://perfmon2.sourceforge.net/ Source0: http://sourceforge.net/projects/perfmon2/files/libpfm4/%{name}-%{version}.tar.gz +Patch1: libpfm-updates.patch +Patch2: libpfm-rhbz1440249.patch %if %{with python} BuildRequires: python-devel @@ -61,6 +63,8 @@ Python bindings for libpfm4 and perf_event_open system call. %prep %setup -q +%patch1 -p1 +%patch2 -p1 %build %if %{with python} @@ -108,6 +112,15 @@ make \ %endif %changelog +* Wed Apr 12 2017 William Cohen - 4.7.0-4 +- Correct handling of raw offcore umask handling. rhbz1440249 + +* Thu Mar 23 2017 William Cohen - 4.7.0-3 +- Avoid ABI breakage caused by some Intel KNL related patches. rhbz1412950 + +* Tue Mar 21 2017 William Cohen - 4.7.0-2 +- Updates for IBM Power and Intel KNL. rhbz1385009, rhbz1412950 + * Thu May 12 2016 William Cohen - 4.7.0-1 - Rebase to libpfm-4.7.0.