diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0040f64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/papi-5.2.0.tar.gz diff --git a/.papi.metadata b/.papi.metadata new file mode 100644 index 0000000..0399ff4 --- /dev/null +++ b/.papi.metadata @@ -0,0 +1 @@ +f044cc59884cd2902534037b86cfe1716dade40e SOURCES/papi-5.2.0.tar.gz diff --git a/README.md b/README.md deleted file mode 100644 index 0e7897f..0000000 --- a/README.md +++ /dev/null @@ -1,5 +0,0 @@ -The master branch has no content - -Look at the c7 branch if you are working with CentOS-7, or the c4/c5/c6 branch for CentOS-4, 5 or 6 - -If you find this file in a distro specific branch, it means that no content has been checked in yet diff --git a/SOURCES/papi-aarch64.patch b/SOURCES/papi-aarch64.patch new file mode 100644 index 0000000..3a35034 --- /dev/null +++ b/SOURCES/papi-aarch64.patch @@ -0,0 +1,88 @@ +commit 071943b6ffb67047c78bc7e7c370fc41c16ef538 +Author: William Cohen +Date: Fri Oct 11 13:41:41 2013 -0400 + + add preliminary aarch64 (arm64) support + + There has been some work to build fedora 19 on 64-bit arm armv8 machines + (aarch64). I took a look that the why the papi build was failing. The + attached is a set of minimal patches to get papi to build. The patch is + just a step toward getting aarch64 support for papi. Things are not all + there for papi to work in that environment. Still need libpfm to + support aarch64 and papi_events.csv describing mappings to machine + specific events. + +diff --git a/src/configure b/src/configure +index 7cbd262..3c50e20 100755 +--- a/src/configure ++++ b/src/configure +@@ -4262,7 +4262,7 @@ _ACEOF + + # First set pthread-mutexes based on arch + case $arch in +- arm*) ++ aarch64|arm*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 +diff --git a/src/configure.in b/src/configure.in +index 2a73ad6..acc8bfc 100644 +--- a/src/configure.in ++++ b/src/configure.in +@@ -382,7 +382,7 @@ AC_DEFINE_UNQUOTED(CPU,$CPU,[cpu type]) + + # First set pthread-mutexes based on arch + case $arch in +- arm*) ++ aarch64|arm*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 +diff --git a/src/linux-context.h b/src/linux-context.h +index 2135695..524490b 100644 +--- a/src/linux-context.h ++++ b/src/linux-context.h +@@ -31,6 +31,8 @@ typedef ucontext_t hwd_ucontext_t; + #define OVERFLOW_ADDRESS(ctx) ((struct sigcontext *)ctx.ucontext)->si_regs.pc + #elif defined(__arm__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.arm_pc ++#elif defined(__aarch64__) ++#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc + #elif defined(__mips__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc + #else +diff --git a/src/linux-timer.c b/src/linux-timer.c +index 8e2d21f..9b918cd 100644 +--- a/src/linux-timer.c ++++ b/src/linux-timer.c +@@ -215,7 +215,7 @@ get_cycles( void ) + /* POWER get_cycles() */ + /************************/ + +-#elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__)) ++#elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__) || defined(__aarch64__)) + /* + * It's not possible to read the cycles from user space on ppc970. + * There is a 64-bit time-base register (TBU|TBL), but its +commit 479e89a51ee18ba331c86a41be9ff17cd501e6d9 +Author: William Cohen +Date: Wed May 28 17:01:41 2014 -0400 + + Add aarch64 Cortex A57 presets + + The events for the ARM Cortex A57 events look to be the same as the + events for the ARM Cortex A15 events. Add an entry so the preset are + available for the ARM Cortex A57. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 441844e..dbbc8d8 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1473,6 +1473,7 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,DCACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,IFETCH_MISS + # + CPU,arm_ac15 ++CPU,arm_ac57 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED + PRESET,PAPI_TOT_IIS,NOT_DERIVED,INST_SPEC_EXEC diff --git a/SOURCES/papi-avoid_libpfm_enum.patch b/SOURCES/papi-avoid_libpfm_enum.patch new file mode 100644 index 0000000..f109e92 --- /dev/null +++ b/SOURCES/papi-avoid_libpfm_enum.patch @@ -0,0 +1,170 @@ +diff -up papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c.orig papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c +--- papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c.orig 2013-08-06 12:12:20.000000000 -0400 ++++ papi-5.2.0/src/components/perf_event/pe_libpfm4_events.c 2017-06-15 22:41:58.784904523 -0400 +@@ -236,14 +236,15 @@ static int find_next_no_aliases(int code + current_pmu++; + SUBDBG("Incrementing PMU: %#x\n",current_pmu); + ++ memset(&pinfo,0,sizeof(pfm_pmu_info_t)); ++ ret = pfm_get_pmu_info(current_pmu, &pinfo); ++ + /* Off the end, so done iterating */ +- if (current_pmu>PFM_PMU_MAX) { ++ if (ret==PFM_ERR_INVAL) { + return PFM_ERR_NOTFOUND; + } + +- memset(&pinfo,0,sizeof(pfm_pmu_info_t)); +- pfm_get_pmu_info(current_pmu, &pinfo); +- if (pmu_is_present_and_right_type(&pinfo,pmu_type)) break; ++ if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) break; + } + + current_event=pinfo.first_event; +@@ -533,12 +534,21 @@ get_event_first_active(int pmu_type) + + pmu_idx=0; + +- while(pmu_idxdefault_pmu)); + + SUBDBG("Detected pmus:\n"); +- for(i=0;idefault_pmu.num_fixed_cntrs; + + SUBDBG( "num_counters: %d\n", my_vector->cmp_info.num_cntrs ); +- ++ + /* Setup presets, only if Component 0 */ + if (cidx==0) { + retval = _papi_load_preset_table( (char *)event_table->default_pmu.name, +diff -up papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c.orig papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c +--- papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c.orig 2013-08-06 12:12:20.000000000 -0400 ++++ papi-5.2.0/src/components/perf_event_uncore/peu_libpfm4_events.c 2017-06-15 22:50:08.700238377 -0400 +@@ -238,14 +238,15 @@ static int find_next_no_aliases(int code + current_pmu++; + SUBDBG("Incrementing PMU: %#x\n",current_pmu); + ++ memset(&pinfo,0,sizeof(pfm_pmu_info_t)); ++ ret=pfm_get_pmu_info(current_pmu, &pinfo); ++ + /* Off the end, so done iterating */ +- if (current_pmu>PFM_PMU_MAX) { ++ if (ret==PFM_ERR_INVAL) { + return PFM_ERR_NOTFOUND; + } + +- memset(&pinfo,0,sizeof(pfm_pmu_info_t)); +- pfm_get_pmu_info(current_pmu, &pinfo); +- if (pmu_is_present_and_right_type(&pinfo,pmu_type)) break; ++ if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) break; + } + + current_event=pinfo.first_event; +@@ -531,12 +532,20 @@ get_event_first_active(int pmu_type) + + pmu_idx=0; + +- while(pmu_idxcmp_info.num_cntrs=0; + + SUBDBG("Detected pmus:\n"); +- for(i=0;icmp_info.num_cntrs += pinfo.num_cntrs+ + pinfo.num_fixed_cntrs; + } ++ i++; + } + SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); + diff --git a/SOURCES/papi-bz1263666.patch b/SOURCES/papi-bz1263666.patch new file mode 100644 index 0000000..7942c3a --- /dev/null +++ b/SOURCES/papi-bz1263666.patch @@ -0,0 +1,50 @@ +commit ba5ef24a0c27c667a7de6d40eb396aeaa0f1cc41 +Author: Asim YarKhan +Date: Wed Oct 22 13:35:43 2014 -0400 + + PPC64, fix L1 data cache read, write and all access equations. + + Thanks to Carl Love for this patch and the following documentation: + + The current POWER 7 equations for all accesses over counts because it + includes non load accesses to the cache. The equation was changed to + be the sum of the reads and the writes. The read accesses to the two + units, can be counted with the single event PM_LD_REF_L1 rather then + counting the events to the two LSU units independently. The number of + reads to the L1 must be adjusted by subtracting the misses as these + become writes. + + Power 8 has four LSU units. The same equations can be used since + PM_LD_REF_L1 counts across all four LSU units. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 73a72e7..d27a41d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1307,10 +1307,9 @@ CPU,power7 + PRESET,PAPI_L1_DCM,DERIVED_ADD,PM_LD_MISS_L1,PM_ST_MISS_L1 + PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 +-PRESET,PAPI_L1_DCW,DERIVED_POSTFIX,N0|N1|-|,PM_ST_FIN,PM_ST_MISS_L1 +-PRESET,PAPI_L1_DCA,NOT_DERIVED,PM_LSU_FIN +-#PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1 +-PRESET,PAPI_L1_DCR,DERIVED_ADD,PM_LD_REF_L1_LSU0,PM_LD_REF_L1_LSU1 ++PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 ++PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS +@@ -1357,9 +1356,9 @@ CPU,power8 + PRESET,PAPI_L1_DCM,DERIVED_ADD,PM_LD_MISS_L1,PM_ST_MISS_L1 + PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 +-PRESET,PAPI_L1_DCW,DERIVED_POSTFIX,N0|N1|-|,PM_ST_FIN,PM_ST_MISS_L1 +-#n/aPRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1 +-#n/aPRESET,PAPI_L1_DCR,DERIVED_ADD,PM_LD_REF_L1_LSU0,PM_LD_REF_L1_LSU1 ++PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 ++PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + #n/aPRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + #n/aPRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS diff --git a/SOURCES/papi-bz1277931.patch b/SOURCES/papi-bz1277931.patch new file mode 100644 index 0000000..b42eb40 --- /dev/null +++ b/SOURCES/papi-bz1277931.patch @@ -0,0 +1,19 @@ +commit 8914dcfcfdc013efcf44a242048ae94c82639fef +Author: sangamesh +Date: Thu Apr 16 16:38:15 2015 -0400 + + Bug reported by William Cohen in papi_events.csv for the event PAPI_L1_TCM + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index e339f0e..07cbc4f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -649,7 +649,7 @@ PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT + PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD + PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB +-PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES + #PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT diff --git a/SOURCES/papi-bz1313088.patch b/SOURCES/papi-bz1313088.patch new file mode 100644 index 0000000..5f5d5a3 --- /dev/null +++ b/SOURCES/papi-bz1313088.patch @@ -0,0 +1,68 @@ +From 88f6669fa358bf4670d91e379f469e3cd3016543 Mon Sep 17 00:00:00 2001 +From: William Cohen +Date: Tue, 21 Jun 2016 10:53:32 -0400 +Subject: [PATCH 1/2] Have Fortran test support code report errors more clearly + +When a Fortran test called the ftest_skip or ftest_fail the support +code would attempt to print out error strings. However, this support +code would print out gibberish because the string was not properly +initialized. There doesn't seem to be a easy way in Fortran to get +the error string, for the time being just print out the error number +and people will need to manually map it back to the string. + +Signed-off-by: William Cohen +--- + src/ftests/ftests_util.F | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/src/ftests/ftests_util.F b/src/ftests/ftests_util.F +index 83433f9..8de2bd1 100644 +--- a/src/ftests/ftests_util.F ++++ b/src/ftests/ftests_util.F +@@ -108,7 +108,6 @@ C And also to make the test code read cleaner + integer line + character*(*) callstr + integer retval,ilen +- character*(PAPI_MAX_STR_LEN) papi_errstr + integer last_char + external last_char + +@@ -134,10 +133,10 @@ C And also to make the test code read cleaner + else if(retval.eq.0)then + write(*,*) 'SGI requires root permissions for this test' + else +- call PAPIF_perror( ) ++C Just printing the error number because of difficulty getting error string. + ilen=last_char(callstr) +- write(*,'(T2,3a)') 'PAPI error in ', callstr(1:ilen), +- * ': '// papi_errstr(1:last_char(papi_errstr)) ++ write(*,'(T2,3a,I3)') 'PAPI error in ', callstr(1:ilen), ++ * ': ', retval + end if + call pause() + stop +@@ -153,7 +152,6 @@ C And also to make the test code read cleaner + integer quiet + common quiet + +- character*(PAPI_MAX_STR_LEN) papi_errstr + integer last_char + external last_char + +@@ -165,10 +163,10 @@ C And also to make the test code read cleaner + else if (retval.gt.0) then + write(*,*) "Error calculating: ", callstr + else +- call PAPIF_perror( ) ++C Just printing the error number because of difficulty getting error string. + ilen=last_char(callstr) +- write(*,'(T2,3a)') 'Error in ', callstr(1:ilen), +- * ': ' // papi_errstr(1:last_char(papi_errstr)) ++ write(*,'(T2,3a,I3)') 'Error in ', callstr(1:ilen), ++ * ': ', retval + end if + end if + call pause() +-- +1.8.3.1 + diff --git a/SOURCES/papi-bz1326977.patch b/SOURCES/papi-bz1326977.patch new file mode 100644 index 0000000..73d23cd --- /dev/null +++ b/SOURCES/papi-bz1326977.patch @@ -0,0 +1,73 @@ +commit 547f44124e1a38b8c5c635f673d88054ce6ff0d3 +Author: Asim YarKhan +Date: Thu Oct 16 12:19:34 2014 -0400 + + byte_profile.c: PPC64 add support for PPC64 Little Endian to byte_profile.c + + Thanks to Carl Love for this patch and the following description: + The POWER 8 platform is Little Endian. It uses ELF + version 2 which does not use function descriptors. This + patch adds the needed #ifdef support to correctly compile + the test case for Big Endian or Little Endian. + + This patch is untested by the PAPI developers (hardware not easily accessible). + +diff --git a/src/ctests/byte_profile.c b/src/ctests/byte_profile.c +index 0f33c0d..f693e9f 100644 +--- a/src/ctests/byte_profile.c ++++ b/src/ctests/byte_profile.c +@@ -213,9 +213,14 @@ main( int argc, char **argv ) + /* Itanium and PowerPC64 processors return function descriptors instead + * of function addresses. You must dereference the descriptor to get the address. + */ +-#if defined(ITANIUM1) || defined(ITANIUM2) || defined(__powerpc64__) ++#if defined(ITANIUM1) || defined(ITANIUM2) \ ++ || (defined(__powerpc64__) && (_CALL_ELF != 2)) + start = ( caddr_t ) ( ( ( struct fdesc * ) start )->ip ); + end = ( caddr_t ) ( ( ( struct fdesc * ) end )->ip ); ++ /* PPC64 Big Endian is ELF version 1 which uses function descriptors. ++ * PPC64 Little Endian is ELF version 2 which does not use ++ * function descriptors ++ */ + #endif + + /* call dummy so it doesn't get optimized away */ +commit 14f70ebc77deaad4a3fc3f808613772ef1165137 +Author: Asim YarKhan +Date: Wed Oct 15 14:48:05 2014 -0400 + + PPC64 add support for PPC64 Little Endian to sprofile.c + + Thanks to Carl Love for this patch and the following description: + The POWER 8 platform is Little Endian. It uses ELF + version 2 which does not use function descriptors. This + patch adds the needed #ifdef support to correctly compile + the test case for Big Endian or Little Endian. + +diff --git a/src/ctests/sprofile.c b/src/ctests/sprofile.c +index e7ee47a..4bd7cc7 100644 +--- a/src/ctests/sprofile.c ++++ b/src/ctests/sprofile.c +@@ -1,4 +1,4 @@ +-/* ++/* + * File: sprofile.c + * Author: Philip Mucci + * mucci@cs.utk.edu +@@ -10,10 +10,15 @@ + + /* These architectures use Function Descriptors as Function Pointers */ + +-#if (defined(linux) && defined(__ia64__)) || (defined(_AIX)) ||(defined(__powerpc64__)) ++#if (defined(linux) && defined(__ia64__)) || (defined(_AIX)) \ ++ || ((defined(__powerpc64__) && (_CALL_ELF != 2))) ++/* PPC64 Big Endian is ELF version 1 which uses function descriptors */ + #define DO_READS (unsigned long)(*(void **)do_reads) + #define DO_FLOPS (unsigned long)(*(void **)do_flops) + #else ++/* PPC64 Little Endian is ELF version 2 which does not use ++ * function descriptors ++ */ + #define DO_READS (unsigned long)(do_reads) + #define DO_FLOPS (unsigned long)(do_flops) + #endif diff --git a/SOURCES/papi-coverity.patch b/SOURCES/papi-coverity.patch new file mode 100644 index 0000000..e63aec8 --- /dev/null +++ b/SOURCES/papi-coverity.patch @@ -0,0 +1,834 @@ +commit 284f25c227d1b6c07e87f6336d3b6ff0533c85d7 +Author: William Cohen +Date: Thu Jan 30 16:20:24 2014 -0500 + + Use correct specification for signed and unsigned int + + A run of cppcheck showed that some mismatches between the specfications + for sscanf and the variables being used to store the values. This corrects + those minor issues. + +diff --git a/src/components/lustre/linux-lustre.c b/src/components/lustre/linux-lustre.c +index 4f4fb7a..46899f2 100644 +--- a/src/components/lustre/linux-lustre.c ++++ b/src/components/lustre/linux-lustre.c +@@ -334,13 +334,13 @@ read_lustre_counter( ) + if (fgets(buffer,BUFSIZ,fff)==NULL) break; + + if (strstr( buffer, "write_bytes" )) { +- sscanf(buffer,"%*s %*d %*s %*s %*d %*d %lld",&fs->write_cntr->value); +- SUBDBG("Read %lld write_bytes\n",fs->write_cntr->value); ++ sscanf(buffer,"%*s %*d %*s %*s %*d %*d %llu",&fs->write_cntr->value); ++ SUBDBG("Read %llu write_bytes\n",fs->write_cntr->value); + } + + if (strstr( buffer, "read_bytes" )) { +- sscanf(buffer,"%*s %*d %*s %*s %*d %*d %lld",&fs->read_cntr->value); +- SUBDBG("Read %lld read_bytes\n",fs->read_cntr->value); ++ sscanf(buffer,"%*s %*d %*s %*s %*d %*d %llu",&fs->read_cntr->value); ++ SUBDBG("Read %llu read_bytes\n",fs->read_cntr->value); + } + } + fclose(fff); +@@ -352,8 +352,8 @@ read_lustre_counter( ) + if (fgets(buffer,BUFSIZ,fff)==NULL) break; + + if (strstr( buffer, "read but discarded")) { +- sscanf(buffer,"%*s %*s %*s %lld",&fs->readahead_cntr->value); +- SUBDBG("Read %lld discared\n",fs->readahead_cntr->value); ++ sscanf(buffer,"%*s %*s %*s %llu",&fs->readahead_cntr->value); ++ SUBDBG("Read %llu discared\n",fs->readahead_cntr->value); + break; + } + } +diff --git a/src/components/net/linux-net.c b/src/components/net/linux-net.c +index ad15d84..ba7563c 100644 +--- a/src/components/net/linux-net.c ++++ b/src/components/net/linux-net.c +@@ -240,7 +240,7 @@ read_net_counters( long long *values ) + SUBDBG("Interface <%s> not found\n", ifname); + } else { + nf = sscanf( data, +- "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", ++ "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + &values[if_bidx + 0], &values[if_bidx + 1], + &values[if_bidx + 2], &values[if_bidx + 3], + &values[if_bidx + 4], &values[if_bidx + 5], +@@ -251,7 +251,7 @@ read_net_counters( long long *values ) + &values[if_bidx + 14], &values[if_bidx + 15]); + + SUBDBG("\nRead " +- "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", ++ "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + values[if_bidx + 0], values[if_bidx + 1], + values[if_bidx + 2], values[if_bidx + 3], + values[if_bidx + 4], values[if_bidx + 5], +commit c810cd0d90baead96838145004545cc156b7ab77 +Author: James Ralph +Date: Tue Aug 13 14:13:55 2013 -0400 + + Close resource leaks + + User dcb reported several resource leaks in trac bug #184. + -------------------- + I just ran the static analysis checker "cppcheck" over the source + code of papi-5.2.0 + + It said + + 1. [linux-memory.c:711]: (error) Resource leak: sys_cpu + + 2. [papi_preset.c:735]: (error) Resource leak: fp + + 3. [components/micpower/linux-micpower.c:166]: (error) Resource leak: fp + + I've checked them all and they all look like resource leaks to me. + + Suggest code rework. + ---------------------------------- + +diff --git a/src/components/micpower/linux-micpower.c b/src/components/micpower/linux-micpower.c +index 896e75f..4da4577 100644 +--- a/src/components/micpower/linux-micpower.c ++++ b/src/components/micpower/linux-micpower.c +@@ -163,6 +163,7 @@ read_sysfs_file( long long* counts) + retval&= fscanf(fp, "%lld %lld %lld", &counts[i], &counts[i+1], &counts[i+2] ); + } + ++ fclose(fp); + return retval; + } + +diff --git a/src/linux-memory.c b/src/linux-memory.c +index 6c69fbd..bf6c420 100644 +--- a/src/linux-memory.c ++++ b/src/linux-memory.c +@@ -707,6 +707,7 @@ sparc_sysfs_cpu_attr( char *name, char **result ) + return 0; + } + } ++ closedir( sys_cpu ); + return -1; + } + +diff --git a/src/papi_preset.c b/src/papi_preset.c +index 9485793..603c8df 100644 +--- a/src/papi_preset.c ++++ b/src/papi_preset.c +@@ -732,12 +732,14 @@ _xml_papi_hwi_setup_all_presets( char *arch, hwi_dev_notes_t * notes ) + + if ( !p ) { + PAPIERROR( "Couldn't allocate memory for XML parser." ); ++ fclose(fp); + return ( PAPI_ESYS ); + } + XML_SetElementHandler( p, _xml_start, _xml_end ); + XML_SetCharacterDataHandler( p, _xml_content ); + if ( fp == NULL ) { + PAPIERROR( "Error opening Preset XML file." ); ++ fclose(fp); + return ( PAPI_ESYS ); + } + +@@ -749,11 +751,13 @@ _xml_papi_hwi_setup_all_presets( char *arch, hwi_dev_notes_t * notes ) + + if ( buffer == NULL ) { + PAPIERROR( "Couldn't allocate memory for XML buffer." ); ++ fclose(fp); + return ( PAPI_ESYS ); + } + len = fread( buffer, 1, BUFFSIZE, fp ); + if ( ferror( fp ) ) { + PAPIERROR( "XML read error." ); ++ fclose(fp); + return ( PAPI_ESYS ); + } + done = feof( fp ); +@@ -761,10 +765,13 @@ _xml_papi_hwi_setup_all_presets( char *arch, hwi_dev_notes_t * notes ) + PAPIERROR( "Parse error at line %d:\n%s\n", + XML_GetCurrentLineNumber( p ), + XML_ErrorString( XML_GetErrorCode( p ) ) ); ++ fclose(fp); + return ( PAPI_ESYS ); + } +- if ( error ) ++ if ( error ) { ++ fclose(fp); + return ( PAPI_ESYS ); ++ } + } while ( !done ); + XML_ParserFree( p ); + fclose( fp ); +commit e5b335740fad31cd230295508f2a4e9fb77a2878 +Author: James Ralph +Date: Fri Nov 8 15:15:28 2013 -0500 + + multiplex_cost: check return value on PAPI_set_opt + + Thanks to Will Cohen for reporting based upon output of coverity. + +diff --git a/src/utils/multiplex_cost.c b/src/utils/multiplex_cost.c +index 8fbd7a4..2f8216a 100644 +--- a/src/utils/multiplex_cost.c ++++ b/src/utils/multiplex_cost.c +@@ -112,7 +112,8 @@ init_test(int SoftwareMPX, int KernelMPX, int* Events) + option.multiplex.eventset = SoftwareMPX; + option.multiplex.ns = itimer.itimer.ns; + +- PAPI_set_opt( PAPI_MULTIPLEX, &option ); ++ if ( (retval = PAPI_set_opt( PAPI_MULTIPLEX, &option )) != PAPI_OK ) ++ test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval); + + for (i = 0; i < options.min - 1; i++) { + if ( options.kernel_mpx ) { +@@ -249,7 +250,8 @@ main( int argc, char **argv ) + option.multiplex.eventset = SoftwareMPX; + option.multiplex.ns = itimer.itimer.ns; + +- PAPI_set_opt( PAPI_MULTIPLEX, &option ); ++ if ( PAPI_OK != (retval = PAPI_set_opt( PAPI_MULTIPLEX, &option ))) ++ test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval); + + if ( !options.kernel_mpx && !options.force_sw ) { + test_fail(__FILE__, __LINE__, "No tests to run.", -1); +commit 83c31e25409040aac8178a4a3f89111efd060cc0 +Author: James Ralph +Date: Fri Nov 8 16:10:18 2013 -0500 + + perf_event.c: Check return value of ioctl + + Thanks to Will Cohen for reporting based upon output of coverity. + +diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c +index 4b52cce..b4b2656 100644 +--- a/src/components/perf_event/perf_event.c ++++ b/src/components/perf_event/perf_event.c +@@ -1909,7 +1909,9 @@ _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc) + return; + } + +- ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ); ++ if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) { ++ PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed.\n"); ++ } + + if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && + !( thread->running_eventset[cidx]->profile.flags & +commit 60fb1dd4497df7d0ea77d88f586142ceb3e22b32 +Author: James Ralph +Date: Thu Nov 21 13:18:49 2013 -0500 + + command_line utility: Initialize a variable + + Initialize data_type to PAPI_DATATYPE_INT64 + Addresses a coverity error + Error: COMPILER_WARNING: [#def19] + papi-5.2.0/src/utils/command_line.c:133:4: warning: 'data_type' may be used uninitialized in this function [-Wmaybe-uninitialized] + switch (data_type) { + ^ + +diff --git a/src/utils/command_line.c b/src/utils/command_line.c +index 36f7df5..2f4a816 100644 +--- a/src/utils/command_line.c ++++ b/src/utils/command_line.c +@@ -56,7 +56,7 @@ main( int argc, char **argv ) + char *success; + PAPI_event_info_t info; + int EventSet = PAPI_NULL; +- int i, j, data_type, event; ++ int i, j, event, data_type = PAPI_DATATYPE_INT64; + int u_format = 0; + int hex_format = 0; + +commit e43b1138296866795c5db1a6dcd123d312af1b46 +Author: James Ralph +Date: Wed Jul 23 15:40:47 2014 -0400 + + native_avail.c: Bug fixes and updates + + Thanks to Gary Mohr + -------------------------------------------------- + This patch fixes a couple of problems found in the papi_native_avail program. + + First change fixes a problem introduced when the -validate option was added. This + option causes events to get added to an event set but never removes them. This change + will remove them if the add works. This change also fixes a coverity detected error + where the return value from PAPI_destroy_eventset was being ignored. + + Second change improves the delimitor check when separating the event description from + the event mask description. The previous check only looked for a colon but some of the + event descriptions contain a colon so descriptions would get displayed incorrectly. The + new check finds the "masks:" substring which is what papi inserts to separate these two + descriptions. + + Third change adds code to allow the user to enter events of the form pmu:::event or + pmu::event when using the -e option in the program. + +diff --git a/src/utils/native_avail.c b/src/utils/native_avail.c +index 2073ed7..59fc1b4 100644 +--- a/src/utils/native_avail.c ++++ b/src/utils/native_avail.c +@@ -227,13 +232,19 @@ parse_unit_masks( PAPI_event_info_t * info ) + if ( ( pmask = strchr( ptr, ':' ) ) == NULL ) { + return ( 0 ); + } +- memmove( info->symbol, pmask, ( strlen( pmask ) + 1 ) * sizeof ( char ) ); +- pmask = strchr( info->long_descr, ':' ); +- if ( pmask == NULL ) ++ memmove( info->symbol, pmask, ( strlen(pmask) + 1 ) * sizeof(char) ); ++ ++ // The description field contains the event description followed by a tag 'masks:' ++ // and then the mask description (if there was a mask with this event). The following ++ // code isolates the mask description part of this information. ++ ++ pmask = strstr( info->long_descr, "masks:" ); ++ if ( pmask == NULL ) { + info->long_descr[0] = 0; +- else +- memmove( info->long_descr, pmask + sizeof ( char ), +- ( strlen( pmask ) + 1 ) * sizeof ( char ) ); ++ } else { ++ pmask += 6; // bump pointer past 'masks:' identifier in description ++ memmove( info->long_descr, pmask, (strlen(pmask) + 1) * sizeof(char) ); ++ } + return ( 1 ); + } + +@@ -295,8 +306,20 @@ main( int argc, char **argv ) + "Event name:", info.symbol); + printf( "%-29s|%s|\n", "Description:", info.long_descr ); + ++ /* handle the PAPI component-style events which have a component:::event type */ ++ char *ptr; ++ if ((ptr=strstr(flags.name, ":::"))) { ++ ptr+=3; ++ /* handle libpfm4-style events which have a pmu::event type event name */ ++ } else if ((ptr=strstr(flags.name, "::"))) { ++ ptr+=2; ++ } ++ else { ++ ptr=flags.name; ++ } ++ + /* if unit masks exist but none specified, process all */ +- if ( !strchr( flags.name, ':' ) ) { ++ if ( !strchr( ptr, ':' ) ) { + if ( PAPI_enum_event( &i, PAPI_NTV_ENUM_UMASKS ) == PAPI_OK ) { + printf( "\nUnit Masks:\n" ); + do { +commit 74041b3ebcfc69575efb4ff830b9dc2f651b458b +Author: James Ralph +Date: Fri Aug 29 14:44:08 2014 -0400 + + event_info utility: address coverity defect + + From Gary Mohr + -------------- + This patch corrects a defect reported by Coverity. The defect reported + that the call to PAPI_enum_cmp_event was setting retval which was never + getting used before it got set again by a call to PAPI_get_event_info. + + After looking at the code, I decided that we should not be trying to get + the next event inside a loop that is enumerating masks for the current + event. It makes more sense to break out of the loop to get masks and + let the outer loop that is walking the events get the next event. + -------------- + +diff --git a/src/utils/event_info.c b/src/utils/event_info.c +index d1010b6..1de375f 100644 +--- a/src/utils/event_info.c ++++ b/src/utils/event_info.c +@@ -237,8 +237,7 @@ enum_native_events( FILE * f, int cidx) + retval = PAPI_get_event_info( k, &info ); + if ( retval == PAPI_OK ) { + if ( test_event( k )!=PAPI_OK ) { +- retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ); +- continue; ++ break; + } + xmlize_event( f, &info, -1 ); + } +commit 07990f85c706221f41d8b27bb2aebfc6c4874dbd +Author: James Ralph +Date: Tue Sep 2 11:54:07 2014 -0400 + + ctests/ Address coverity reported defects + + Thanks to Gary Mohr for the patch + --------------------------------- + he contents of this patch file fix defects reported by Coverity in the + directory 'papi/src/ctests'. + + The defect reported in branches.c was that a comparison between + different kinds of data was being done. + + The defect reported in calibrate.c was that the variable + 'papi_event_str' could end up without a null terminator. + + The defects reported in describe.c, get_event_component.c, and + krentel_pthreads.c were that return values from function calls were + being stored in a variable but never being used. + + I also did a little clean-up in describe.c. This test had been failing + for me on Intel NHM and SNBEP but now it runs and reports that it + PASSED. + --------------------------------- + +diff --git a/src/ctests/branches.c b/src/ctests/branches.c +index 930329e..5292323 100644 +--- a/src/ctests/branches.c ++++ b/src/ctests/branches.c +@@ -94,7 +94,7 @@ main( int argc, char **argv ) + /* Find a reasonable number of iterations (each + * event active 20 times) during the measurement + */ +- t2 = 10000 * 20 * nevents; /* Target: 10000 usec/multiplex, 20 repeats */ ++ t2 = (long long)(10000 * 20) * nevents; /* Target: 10000 usec/multiplex, 20 repeats */ + if ( t2 > 30e6 ) + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); +diff --git a/src/ctests/calibrate.c b/src/ctests/calibrate.c +index a3dea0a..e370ba9 100644 +--- a/src/ctests/calibrate.c ++++ b/src/ctests/calibrate.c +@@ -160,7 +160,8 @@ main( int argc, char *argv[] ) + print_help( argv ); + exit( 1 ); + } +- strncpy( papi_event_str, argv[i + 1], sizeof ( papi_event_str ) ); ++ strncpy( papi_event_str, argv[i + 1], sizeof ( papi_event_str ) - 1); ++ papi_event_str[sizeof ( papi_event_str )-1] = '\0'; + i++; + } else if ( strstr( argv[i], "-d" ) ) + double_precision = 1; +diff --git a/src/ctests/describe.c b/src/ctests/describe.c +index d29bf72..f03309e 100644 +--- a/src/ctests/describe.c ++++ b/src/ctests/describe.c +@@ -25,7 +25,6 @@ main( int argc, char **argv ) + int retval; + long long g1[2]; + int eventcode = PAPI_TOT_INS; +- char eventname[PAPI_MAX_STR_LEN]; + PAPI_event_info_t info, info1, info2; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ +@@ -52,18 +51,19 @@ main( int argc, char **argv ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* Case 0, no info, should fail */ +- eventname[0] = '\0'; + eventcode = 0; + /* + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) == PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); + */ ++ if (!TESTS_QUIET) { ++ printf("This test expects a 'PAPI Error' to be returned from this PAPI call.\n"); ++ } + if ( ( retval = PAPI_get_event_info( eventcode, &info ) ) == PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + /* Case 1, fill in name field. */ + eventcode = PAPI_TOT_INS; +- eventname[0] = '\0'; + /* + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); +@@ -85,11 +85,9 @@ main( int argc, char **argv ) + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); + */ +- strcpy( eventname, info1.symbol ); +- if ( ( retval = +- PAPI_event_name_to_code( eventname, +- ( int * ) &eventcode ) ) != PAPI_OK ) ++ if ( ( retval = PAPI_event_name_to_code( info1.symbol, ( int * ) &eventcode ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); ++ } + + if ( eventcode != PAPI_TOT_INS ) + test_fail( __FILE__, __LINE__, +diff --git a/src/ctests/get_event_component.c b/src/ctests/get_event_component.c +index ae1bdd9..874f394 100644 +--- a/src/ctests/get_event_component.c ++++ b/src/ctests/get_event_component.c +@@ -42,8 +42,7 @@ main( int argc, char **argv ) + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); + } + +- if (cmpinfo->disabled) +- { ++ if (cmpinfo->disabled && !TESTS_QUIET) { + printf( "Name: %-23s %s\n", cmpinfo->name ,cmpinfo->description); + printf(" \\-> Disabled: %s\n",cmpinfo->disabled_reason); + continue; +@@ -55,7 +54,12 @@ main( int argc, char **argv ) + if (retval!=PAPI_OK) continue; + + do { +- retval = PAPI_get_event_info( i, &info ); ++ if (PAPI_get_event_info( i, &info ) != PAPI_OK) { ++ if (!TESTS_QUIET) { ++ printf("Getting information about event: %#x failed\n", i); ++ } ++ continue; ++ } + our_cid=PAPI_get_event_component(i); + + if (our_cid!=cid) { +diff --git a/src/ctests/krentel_pthreads.c b/src/ctests/krentel_pthreads.c +index a8b97ff..2417976 100644 +--- a/src/ctests/krentel_pthreads.c ++++ b/src/ctests/krentel_pthreads.c +@@ -125,11 +125,18 @@ my_thread( void *v ) + } + + PAPI_stop( EventSet, &value ); +- PAPI_remove_event( EventSet, EVENT ); +- PAPI_destroy_eventset( &EventSet ); ++ retval = PAPI_remove_event( EventSet, EVENT ); ++ if ( PAPI_OK != retval ) { ++ test_fail( __FILE__, __LINE__, "PAPI_remove_event", retval ); ++ } ++ retval = PAPI_destroy_eventset( &EventSet ); ++ if ( PAPI_OK != retval ) { ++ test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); ++ } + retval = PAPI_unregister_thread( ); +- if ( retval != PAPI_OK ) ++ if ( PAPI_OK != retval ) { + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); ++ } + return ( NULL ); + } + +commit 266c61a4d4e5beee43cce7e3ef1d64da202c0b09 +Author: James Ralph +Date: Fri Sep 19 17:46:42 2014 -0400 + + Address coverity reported issues in src/ + + Thanks to Gary Mohr + ------------------- + Changes in this patch file: + + linux-common.c: Add code to insure that cpu info vendor_string and + model_string buffers are NULL terminated strings. Also insure that + the value which gets read into mdi->exe_info.fullname gets NULL + terminated. This makes it safe to use the 'strxxx' functions on the + value (which is done immediately after it is read in). + + papi_hl.c: Fix call to _hl_rate_calls() where the third argument was + not the correct data type. + + papi_internal.c: Add code to insure that event info name, short_desc, + and long_desc buffers are NULL terminated strings. + + papi_user_events.c: While processing define symbols, insure that the + 'local_line', 'name', and 'value' + buffers get NULL terminated (so we can safely use + 'strxxx' functions on them). + Insure that the 'symbol' field in the user defined + event ends up NULL terminated. + Rearrange code to avoid falling through from one + case to the next in a switch statement. + Coverity flagged falling out the bottom of a case + statement as a potential defect but it + was doing what it should. + + sw_multiplex.c: Unnecessary test. The value of ESI can not be NULL + when this code is reached. + + x86_cpuid_info.c: The variable need_leaf4 is set but not used. The + only place it gets set returns without + checking its value. The place that checks its value + never could have set its value non-zero. + +diff --git a/src/linux-common.c b/src/linux-common.c +index 4adc232..614a83c 100644 +--- a/src/linux-common.c ++++ b/src/linux-common.c +@@ -171,6 +171,7 @@ int + _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + { + int tmp, retval = PAPI_OK; ++ unsigned int strSize; + char maxargs[PAPI_HUGE_STR_LEN], *t, *s; + float mhz = 0.0; + FILE *f; +@@ -197,14 +198,17 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + /* Vendor Name and Vendor Code */ + rewind( f ); + s = search_cpu_info( f, "vendor_id", maxargs ); ++ strSize = sizeof(hwinfo->vendor_string); + if ( s && ( t = strchr( s + 2, '\n' ) ) ) { + *t = '\0'; ++ if (strlen(s+2) >= strSize-1) s[strSize+1] = '\0'; + strcpy( hwinfo->vendor_string, s + 2 ); + } else { + rewind( f ); + s = search_cpu_info( f, "vendor", maxargs ); + if ( s && ( t = strchr( s + 2, '\n' ) ) ) { + *t = '\0'; ++ if (strlen(s+2) >= strSize-1) s[strSize+1] = '\0'; + strcpy( hwinfo->vendor_string, s + 2 ); + } else { + rewind( f ); +@@ -212,6 +216,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + if ( s && ( t = strchr( s + 2, '\n' ) ) ) { + *t = '\0'; + s = strtok( s + 2, " " ); ++ if (strlen(s) >= strSize-1) s[strSize-1] = '\0'; + strcpy( hwinfo->vendor_string, s ); + } else { + rewind( f ); +@@ -258,14 +263,17 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + /* Model Name */ + rewind( f ); + s = search_cpu_info( f, "model name", maxargs ); ++ strSize = sizeof(hwinfo->model_string); + if ( s && ( t = strchr( s + 2, '\n' ) ) ) { + *t = '\0'; ++ if (strlen(s+2) >= strSize-1) s[strSize+1] = '\0'; + strcpy( hwinfo->model_string, s + 2 ); + } else { + rewind( f ); + s = search_cpu_info( f, "family", maxargs ); + if ( s && ( t = strchr( s + 2, '\n' ) ) ) { + *t = '\0'; ++ if (strlen(s+2) >= strSize-1) s[strSize+1] = '\0'; + strcpy( hwinfo->model_string, s + 2 ); + } else { + rewind( f ); +@@ -274,6 +282,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + *t = '\0'; + strtok( s + 2, " " ); + s = strtok( NULL, " " ); ++ if (strlen(s) >= strSize-1) s[strSize-1] = '\0'; + strcpy( hwinfo->model_string, s ); + } else { + rewind( f ); +@@ -282,6 +291,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + *t = '\0'; + /* get just the first token */ + s = strtok( s + 2, " " ); ++ if (strlen(s) >= strSize-1) s[strSize-1] = '\0'; + strcpy( hwinfo->model_string, s ); + } + } +@@ -444,15 +454,18 @@ _linux_get_system_info( papi_mdi_t *mdi ) { + mdi->pid = pid; + + sprintf( maxargs, "/proc/%d/exe", ( int ) pid ); +- if ( readlink( maxargs, mdi->exe_info.fullname, PAPI_HUGE_STR_LEN ) < 0 ) { ++ if ( (retval = readlink( maxargs, mdi->exe_info.fullname, PAPI_HUGE_STR_LEN-1 )) < 0 ) { + PAPIERROR( "readlink(%s) returned < 0", maxargs ); + return PAPI_ESYS; + } ++ if (retval > PAPI_HUGE_STR_LEN-1) retval=PAPI_HUGE_STR_LEN-1; ++ mdi->exe_info.fullname[retval] = '\0'; + + /* Careful, basename can modify it's argument */ + + strcpy( maxargs, mdi->exe_info.fullname ); +- strcpy( mdi->exe_info.address_info.name, basename( maxargs ) ); ++ strncpy( mdi->exe_info.address_info.name, basename( maxargs ), PAPI_HUGE_STR_LEN-1); ++ mdi->exe_info.address_info.name[PAPI_HUGE_STR_LEN-1] = '\0'; + + SUBDBG( "Executable is %s\n", mdi->exe_info.address_info.name ); + SUBDBG( "Full Executable is %s\n", mdi->exe_info.fullname ); +diff --git a/src/papi_hl.c b/src/papi_hl.c +index 19111e7..4fcfe23 100644 +--- a/src/papi_hl.c ++++ b/src/papi_hl.c +@@ -204,13 +204,13 @@ int + PAPI_flips( float *rtime, float *ptime, long long *flpins, float *mflips ) + { + int retval; +- int events = PAPI_FP_INS; ++ int events[1] = {PAPI_FP_INS}; + long long values = 0; + + if ( rtime == NULL || ptime == NULL || flpins == NULL || mflips == NULL ) + return PAPI_EINVAL; + +- retval = _hl_rate_calls( rtime, ptime, &events, &values, flpins, mflips, HL_FLIP ); ++ retval = _hl_rate_calls( rtime, ptime, events, &values, flpins, mflips, HL_FLIP ); + return ( retval ); + } + +@@ -259,13 +259,13 @@ int + PAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops ) + { + int retval; +- int events = PAPI_FP_OPS; ++ int events[1] = {PAPI_FP_OPS}; + long long values = 0; + + if ( rtime == NULL || ptime == NULL || flpops == NULL || mflops == NULL ) + return PAPI_EINVAL; + +- retval = _hl_rate_calls( rtime, ptime, &events, &values, flpops, mflops, HL_FLOP ); ++ retval = _hl_rate_calls( rtime, ptime, events, &values, flpops, mflops, HL_FLOP ); + return ( retval ); + } + +diff --git a/src/papi_internal.c b/src/papi_internal.c +index d354b76..3c51717 100644 +--- a/src/papi_internal.c ++++ b/src/papi_internal.c +@@ -2162,20 +2162,22 @@ _papi_hwi_get_preset_event_info( int EventCode, PAPI_event_info_t * info ) + unsigned int j; + + if ( _papi_hwi_presets[i].symbol ) { /* if the event is in the preset table */ +- /* set whole structure to 0 */ ++ // since we are setting the whole structure to zero the strncpy calls below will ++ // be leaving NULL terminates strings as long as they copy 1 less byte than the ++ // buffer size of the field. + memset( info, 0, sizeof ( PAPI_event_info_t ) ); + + info->event_code = ( unsigned int ) EventCode; + strncpy( info->symbol, _papi_hwi_presets[i].symbol, +- sizeof(info->symbol)); ++ sizeof(info->symbol)-1); + + if ( _papi_hwi_presets[i].short_descr != NULL ) + strncpy( info->short_descr, _papi_hwi_presets[i].short_descr, +- sizeof ( info->short_descr ) ); ++ sizeof ( info->short_descr )-1 ); + + if ( _papi_hwi_presets[i].long_descr != NULL ) + strncpy( info->long_descr, _papi_hwi_presets[i].long_descr, +- sizeof ( info->long_descr ) ); ++ sizeof ( info->long_descr )-1 ); + + info->event_type = _papi_hwi_presets[i].event_type; + info->count = _papi_hwi_presets[i].count; +@@ -2185,17 +2187,17 @@ _papi_hwi_get_preset_event_info( int EventCode, PAPI_event_info_t * info ) + + if ( _papi_hwi_presets[i].postfix != NULL ) + strncpy( info->postfix, _papi_hwi_presets[i].postfix, +- sizeof ( info->postfix ) ); ++ sizeof ( info->postfix )-1 ); + + for(j=0;j < info->count; j++) { + info->code[j]=_papi_hwi_presets[i].code[j]; + strncpy(info->name[j], _papi_hwi_presets[i].name[j], +- sizeof(info->name[j])); ++ sizeof(info->name[j])-1); + } + + if ( _papi_hwi_presets[i].note != NULL ) { + strncpy( info->note, _papi_hwi_presets[i].note, +- sizeof ( info->note ) ); ++ sizeof ( info->note )-1 ); + } + + return PAPI_OK; +diff --git a/src/papi_user_events.c b/src/papi_user_events.c +index 04fc4af..b1f124a 100644 +--- a/src/papi_user_events.c ++++ b/src/papi_user_events.c +@@ -246,10 +246,11 @@ get_event_line( char **place, FILE * table, char **tmp_perfmon_events_table ) + + int add_define( char *line, list_t* LIST ) { + char *t; +- char local_line[USER_EVENT_OPERATION_LEN]; ++ char local_line[USER_EVENT_OPERATION_LEN+1]; + list_t *temp; + + strncpy( local_line, line, USER_EVENT_OPERATION_LEN ); ++ local_line[USER_EVENT_OPERATION_LEN] = '\0'; + + temp = (list_t*)papi_malloc(sizeof(list_t)); + +@@ -262,12 +263,14 @@ int add_define( char *line, list_t* LIST ) { + + /* next token should be the name */ + t = strtok(NULL, " "); +- strncpy( temp->name, t, PAPI_MIN_STR_LEN); ++ strncpy( temp->name, t, PAPI_MIN_STR_LEN-1); ++ temp->name[PAPI_MIN_STR_LEN-1] = '\0'; + + /* next token should be the value */ + t = strtok(NULL," "); + t[strlen(t)] = '\0'; +- strncpy( temp->value, t, PAPI_MIN_STR_LEN); ++ strncpy( temp->value, t, PAPI_MIN_STR_LEN-1); ++ temp->value[PAPI_MIN_STR_LEN-1] = '\0'; + + temp->next = LIST->next; + LIST->next = temp; +@@ -395,12 +398,15 @@ check_preset_events (char *target, user_defined_event_t* ue, int* msi) + strcat(ue->operation, temp); + ue->events[ue->count++] = _papi_hwi_presets[j].code[0]; + } else { +- op = '-'; + switch ( _papi_hwi_presets[j].derived_int ) { + case DERIVED_ADD: + case DERIVED_ADD_PS: +- op = '+'; + case DERIVED_SUB: ++ if (_papi_hwi_presets[j].derived_int == DERIVED_SUB) { ++ op = '-'; ++ } else { ++ op = '+'; ++ } + for ( k = 0; k < (int) _papi_hwi_presets[j].count; k++) { + ue->events[ue->count++] = _papi_hwi_presets[j].code[k]; + if (k%2) +@@ -574,7 +580,8 @@ load_user_event_table( char *file_name) + goto nextline; + } + +- strncpy(foo->symbol, t, PAPI_MIN_STR_LEN); ++ // the entire structure was zeroed so if we only copy one less that what fits in the 'symbol' buffer, it will insure that this buffer is NULL terminated ++ strncpy(foo->symbol, t, PAPI_MIN_STR_LEN-1); + #ifdef SHOW_LOADS + INTDBG("Found a user event named %s\n", foo->symbol ); + #endif +diff --git a/src/sw_multiplex.c b/src/sw_multiplex.c +index 22db6c2..4b2109c 100644 +--- a/src/sw_multiplex.c ++++ b/src/sw_multiplex.c +@@ -1136,8 +1136,6 @@ mpx_check( int EventSet ) + if ( strcmp( _papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) { + unsigned int chk_domain = + PAPI_DOM_USER + PAPI_DOM_KERNEL + PAPI_DOM_SUPERVISOR; +- if ( ESI == NULL ) +- return ( PAPI_ENOEVST ); + + if ( ( ESI->domain.domain & chk_domain ) != chk_domain ) { + PAPIERROR +diff --git a/src/x86_cpuid_info.c b/src/x86_cpuid_info.c +index dacc021..2527f3c 100644 +--- a/src/x86_cpuid_info.c ++++ b/src/x86_cpuid_info.c +@@ -1416,8 +1416,6 @@ init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels) + int size; /* size of the descriptor table */ + int last_level = 0; /* how many levels in the cache hierarchy */ + +- int need_leaf4=0; +- + /* All of Intel's cache info is in 1 call to cpuid + * however it is a table lookup :( + */ +@@ -1459,7 +1457,6 @@ init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels) + if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */ + if ( reg.descrip[i] == 0xff ) { + MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n"); +- need_leaf4=1; + return PAPI_ENOSUPP; + /* we might continue instead */ + /* in order to get TLB info */ +@@ -1480,9 +1477,6 @@ init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels) + early_exit: + MEMDBG( "# of Levels: %d\n", last_level ); + *num_levels=last_level; +- if (need_leaf4) { +- return PAPI_ENOSUPP; +- } + return PAPI_OK; + } + diff --git a/SOURCES/papi-errmsg.patch b/SOURCES/papi-errmsg.patch new file mode 100644 index 0000000..88841e9 --- /dev/null +++ b/SOURCES/papi-errmsg.patch @@ -0,0 +1,242 @@ +commit a37160c18eb8106a2b61ed181e2479a83381f3dc +Author: James Ralph +Date: Fri Jul 25 15:59:41 2014 -0400 + + perf_event.c: cleanup error messages + + Thanks to Gary Mohr + ------------------- + This patch contains general cleanup code. Calls to PAPIERROR pass a string which does + not need to end with a new line because this function will always add one. New lines at + the end of strings passed to this function have been removed. These changes also add + some additional debug messages. + +diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c +index 049c810..3eea38d 100644 +--- a/src/components/perf_event/perf_event.c ++++ b/src/components/perf_event/perf_event.c +@@ -300,11 +300,10 @@ sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu, + { + int ret; + +- SUBDBG("sys_perf_event_open(%p,%d,%d,%d,%lx\n",hw_event,pid,cpu,group_fd,flags); ++ SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, group_fd: %d, flags: %lx\n", hw_event, pid, cpu, group_fd, flags); + SUBDBG(" type: %d\n",hw_event->type); + SUBDBG(" size: %d\n",hw_event->size); +- SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config, +- hw_event->config); ++ SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config, hw_event->config); + SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period); + SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type); + SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format); +@@ -323,6 +322,21 @@ sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu, + SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec); + SUBDBG(" task: %d\n",hw_event->task); + SUBDBG(" watermark: %d\n",hw_event->watermark); ++ SUBDBG(" precise_ip: %d\n",hw_event->precise_ip); ++ SUBDBG(" mmap_data: %d\n",hw_event->mmap_data); ++ SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all); ++ SUBDBG(" exclude_host: %d\n",hw_event->exclude_host); ++ SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest); ++ SUBDBG(" exclude_callchain_kernel: %d\n",hw_event->exclude_callchain_kernel); ++ SUBDBG(" exclude_callchain_user: %d\n",hw_event->exclude_callchain_user); ++ SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n", hw_event->wakeup_events, hw_event->wakeup_events); ++ SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n", hw_event->bp_type, hw_event->bp_type); ++ SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n", hw_event->config1, hw_event->config1); ++ SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n", hw_event->config2, hw_event->config2); ++ SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n", hw_event->branch_sample_type, hw_event->branch_sample_type); ++ SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n", hw_event->sample_regs_user, hw_event->sample_regs_user); ++ SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n", hw_event->sample_stack_user, hw_event->sample_stack_user); ++ + ret = + syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); + SUBDBG("Returned %d %d %s\n",ret, +@@ -470,14 +484,14 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx ) + /* start the event */ + retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL ); + if (retval == -1) { +- PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n"); ++ PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); + return PAPI_ESYS; + } + + /* stop the event */ + retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL ); + if (retval == -1) { +- PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed.\n" ); ++ PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" ); + return PAPI_ESYS; + } + +@@ -514,7 +528,7 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx ) + retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); + if (retval == -1) { + PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d " +- "(fd %d)failed.\n", ++ "(fd %d)failed", + i,ctl->num_events,idx,ctl->events[i].event_fd); + return PAPI_ESYS; + } +@@ -794,7 +808,7 @@ close_pe_events( pe_context_t *ctx, pe_control_t *ctl ) + if (ctl->num_events!=num_closed) { + if (ctl->num_events!=(num_closed+events_not_opened)) { + PAPIERROR("Didn't close all events: " +- "Closed %d Not Opened: %d Expected %d\n", ++ "Closed %d Not Opened: %d Expected %d", + num_closed,events_not_opened,ctl->num_events); + return PAPI_EBUG; + } +@@ -824,9 +838,7 @@ _pe_set_domain( hwd_control_state_t *ctl, int domain) + int i; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + +- SUBDBG("old control domain %d, new domain %d\n", +- pe_ctl->domain,domain); +- ++ SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain); + pe_ctl->domain = domain; + + /* Force the domain on all events */ +@@ -915,6 +927,8 @@ int + _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) + { ++ SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", ctx, ctl, events, flags); ++ + ( void ) flags; /*unused */ + int i, ret = -1; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; +@@ -964,7 +978,7 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + + /* We should read 3 64-bit values from the counter */ + if (ret<(signed)(3*sizeof(long long))) { +- PAPIERROR("Error! short read!\n"); ++ PAPIERROR("Error! short read"); + return PAPI_ESYS; + } + +@@ -1020,8 +1034,8 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + + /* we should read one 64-bit value from each counter */ + if (ret!=sizeof(long long)) { +- PAPIERROR("Error! short read!\n"); +- PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", ++ PAPIERROR("Error! short read"); ++ PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d", + pe_ctl->events[i].event_fd, + (long)pe_ctl->tid, pe_ctl->cpu, ret); + return PAPI_ESYS; +@@ -1042,7 +1056,7 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + + else { + if (pe_ctl->events[0].group_leader_fd!=-1) { +- PAPIERROR("Was expecting group leader!\n"); ++ PAPIERROR("Was expecting group leader"); + } + + ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer, +@@ -1056,7 +1070,7 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + /* we read 1 64-bit value (number of events) then */ + /* num_events more 64-bit values that hold the counts */ + if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) { +- PAPIERROR("Error! short read!\n"); ++ PAPIERROR("Error! short read"); + return PAPI_ESYS; + } + +@@ -1072,7 +1086,7 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + + /* Make sure the kernel agrees with how many events we have */ + if (papi_pe_buffer[0]!=pe_ctl->num_events) { +- PAPIERROR("Error! Wrong number of events!\n"); ++ PAPIERROR("Error! Wrong number of events"); + return PAPI_ESYS; + } + +@@ -1106,6 +1120,7 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + /* point PAPI to the values we read */ + *events = pe_ctl->counts; + ++ SUBDBG("EXIT: *events: %p\n", *events); + return PAPI_OK; + } + +@@ -1134,7 +1149,7 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) + + /* ioctls always return -1 on failure */ + if (ret == -1) { +- PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n"); ++ PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); + return PAPI_ESYS; + } + +@@ -1143,7 +1158,7 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) + } + + if (!did_something) { +- PAPIERROR("Did not enable any counters.\n"); ++ PAPIERROR("Did not enable any counters"); + return PAPI_EBUG; + } + +@@ -1157,6 +1172,7 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) + int + _pe_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) + { ++ SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl); + + int ret; + int i; +@@ -1178,6 +1194,7 @@ _pe_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) + + pe_ctx->state &= ~PERF_EVENTS_RUNNING; + ++ SUBDBG( "EXIT:\n"); + return PAPI_OK; + } + +@@ -1190,6 +1207,7 @@ _pe_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, hwd_context_t *ctx ) + { ++ SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n", ctl, native, count, ctx); + int i = 0, ret; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; +@@ -1202,7 +1220,7 @@ _pe_update_control_state( hwd_control_state_t *ctl, + /* Calling with count==0 should be OK, it's how things are deallocated */ + /* when an eventset is destroyed. */ + if ( count == 0 ) { +- SUBDBG( "Called with count == 0\n" ); ++ SUBDBG( "EXIT: Called with count == 0\n" ); + return PAPI_OK; + } + +@@ -1236,15 +1254,16 @@ _pe_update_control_state( hwd_control_state_t *ctl, + pe_ctl->num_events = count; + _pe_set_domain( ctl, pe_ctl->domain ); + +- /* actuall open the events */ ++ /* actually open the events */ + /* (why is this a separate function?) */ + ret = open_pe_events( pe_ctx, pe_ctl ); + if ( ret != PAPI_OK ) { +- SUBDBG("open_pe_events failed\n"); ++ SUBDBG("EXIT: open_pe_events returned: %d\n", ret); + /* Restore values ? */ + return ret; + } + ++ SUBDBG( "EXIT:\n" ); + return PAPI_OK; + } + +@@ -1914,7 +1933,7 @@ _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc) + } + + if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) { +- PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed.\n"); ++ PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed"); + } + + if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && diff --git a/SOURCES/papi-hsw_ep.patch b/SOURCES/papi-hsw_ep.patch new file mode 100644 index 0000000..2b37e24 --- /dev/null +++ b/SOURCES/papi-hsw_ep.patch @@ -0,0 +1,31 @@ +From e5b1007a1c2ec49548104291f5a4ec5ee4d8ebfc Mon Sep 17 00:00:00 2001 +From: William Cohen +Date: Tue, 14 Oct 2014 10:30:17 -0400 +Subject: [PATCH] Recognize hsw_ep from newer versions of libpfm + +A recent September 11, 2014 patch (98c00b) to the upstream libpfm +split out Intel family 6 model 63 into its own name of "hsw_ep". The +papi_events.csv needs to be updated to support that new name. This +should have now impact for older libpfms that still identify Intel +family 6 model 63 as "hswv" and "hsw_ep" map to the same papi presets. + +Signed-off-by: William Cohen +--- + src/papi_events.csv | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index aea3b04..752ec20 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -638,6 +638,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + + # Intel Haswell events (and most likely also Sandy Bridge) + CPU,hsw ++CPU,hsw_ep + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK +-- +1.9.3 + diff --git a/SOURCES/papi-inficonst.patch b/SOURCES/papi-inficonst.patch new file mode 100644 index 0000000..57bba30 --- /dev/null +++ b/SOURCES/papi-inficonst.patch @@ -0,0 +1,13 @@ +diff -up papi-5.2.0/src/components/infiniband/linux-infiniband.c.decl papi-5.2.0/src/components/infiniband/linux-infiniband.c +--- papi-5.2.0/src/components/infiniband/linux-infiniband.c.decl 2015-08-10 16:04:04.528460972 -0400 ++++ papi-5.2.0/src/components/infiniband/linux-infiniband.c 2015-08-10 16:06:25.243444756 -0400 +@@ -27,6 +27,9 @@ + #include "papi_vector.h" + #include "papi_memory.h" + ++/* This is a hack remove consts in the declarations to avoid decl mismatch. */ ++#define const ++ + #include "linux-infiniband.h" + + void (*_dl_non_dynamic_init)(void) __attribute__((weak)); diff --git a/SOURCES/papi-intel.patch b/SOURCES/papi-intel.patch new file mode 100644 index 0000000..4a9bcc6 --- /dev/null +++ b/SOURCES/papi-intel.patch @@ -0,0 +1,442 @@ +commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e +Author: James Ralph +Date: Mon Aug 26 10:23:52 2013 -0400 + + papi_events.csv: First draft preset events on HSW + + Contributed by Nils Smeds + ------------------------- + Here is a suggestion for addition to Hsw counters. These are not + rigorously tested. It compiles and loads. + I'm rather uncertain on many of the events so I am hoping that adding + events like this will get some useful + feedback from the community so that we can improve. + ------------------------- + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2e0da80..39ec16c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK + #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS + # ++ ++# Intel Haswell events (and most likely also Sandy Bridge) ++CPU,hsw ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK ++# Loads and stores ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES ++# L1 cache ++PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS ++PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT ++PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS ++# L2 cache ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES ++PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT ++PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES ++PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT ++PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++# L3 cache ++PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE ++PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS ++# SMP ++PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY ++PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD ++PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD ++# TLB ++PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK ++PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK ++# Prefetcher ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS ++# Stalls ++PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB ++PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY ++PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1 ++PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS ++PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4 ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++# End of hsw list ++# + CPU,Intel Core2 + CPU,Intel Core + CPU,core +commit f20568575d3d8023f4f97d3d968a606a51a1e01f +Author: James Ralph +Date: Tue Sep 17 09:06:50 2013 -0400 + + papi_events.csv: Add PAPI_L1_ICM for Haswell + + Thanks to Maurice Marks of Unisys for the contribution + ------------- + I've continued testing on Haswell. By comparison with Vtune and Emon on + Haswell I found that we can use + the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful + measure. + + Attached is my current version of papi_events.csv with Haswell fixes. + ------------- + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 39ec16c..01821a8 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_ + PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS + PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT + PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES + PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT +commit b2d643df6a20a85e24a2f797c6bea164ed099a84 +Author: Vince Weaver +Date: Tue Nov 5 16:09:11 2013 -0500 + + Add floating point events for IvyBridge + + Now that Intel has documented them and libpfm4 supports them, PAPI + can use them. We just use the same events as on sandybridge. + + Tested on an ivybridge system. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 01821a8..42c1da0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL + PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS + PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES + # ++# Counts scalars only; no SSE or AVX is counted; includes speculative ++PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ++# ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE ++# + # Intel SandyBridge only + CPU,snb + CPU,snb_ep +@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL + PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK + # +-# Counts scalars only; no SSE or AVX is counted; includes speculative +-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +-# +-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE +-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE +-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE +-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE +-# + # Intel IvyBridge only + CPU,ivb + CPU,ivb_ep +From 035fb0849fb84aa02b262b6abe67bc306c3a8600 Mon Sep 17 00:00:00 2001 +From: Vince Weaver +Date: Fri, 6 Dec 2013 13:03:39 -0500 +Subject: [PATCH 4/4] papi_events.csv : add initial atom silvermont support + +This is based on the manual, as I don't actually have one of these +chips. + +The events available differ a lot from older atoms. They also +support offcore events and some sort of RAPL support. +--- + src/papi_events.csv | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 42c1da0..0e1163e 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -356,6 +356,7 @@ CPU,ix86arch + PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS + # ++# Intel Atom + CPU,Intel Atom + CPU,atom + # +@@ -412,6 +413,29 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,MUL:AR + PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV:AR + PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED:VECTOR + # ++# Intel Atom Silvermont ++CPU,slm ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES ++PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES ++PRESET,PAPI_L1_ICH,DERIVED_SUB,ICACHE:ACCESSES,ICACHE:MISSES ++PRESET,PAPI_L1_TCM,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES ++PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES ++PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES ++# ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED ++# ++PRESET,PAPI_RES_STL,NOT_DERIVED,UOPS_RETIRED:STALLS ++# ++#PRESET,PAPI_FP_INS,NOT_DERIVED,UOPS_RETIRED:X87 ++PRESET,PAPI_FML_INS,NOT_DERIVED,UOPS_RETIRED:MUL ++PRESET,PAPI_FDV_INS,NOT_DERIVED,UOPS_RETIRED:DIV ++# + CPU,Intel Nehalem + CPU,Intel Westmere + CPU,nhm +-- +1.8.3.1 + +commit c50e0dfed7e0624061d81059bbf6157ae6873e11 +Author: Vince Weaver +Date: Wed Mar 26 16:41:34 2014 -0400 + + remove Hawell PAPI_L1_TCA predefined event + + It was making the tests complain a lot, and as far as I can + tell there's no way to make the event. + + It had been set to + + MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS + + but you cannot have multiple umasks on MEM_LOAD_UOPS_RETIRED + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 22a82ad..e449529 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -641,7 +641,6 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS + PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES + PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES + # L1 cache +-PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS + PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT + PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS + PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD +commit a870eef277ea782e15f91582ce87c46652932e77 +Author: James Ralph +Date: Wed Apr 9 16:18:11 2014 -0400 + + Add x87 counts to FP_INS and FP_OPS on [S|I]VB + + In Sandy/Ivy Bridge processors it is safe to assume 3 general counters + and the definition of FP_OPS/INS was only using 2. This commit changes + the definition PAPI_FP_INS/OPS to include FP_COMP_OPS_EXEC:X87 + + The effect appears minimal and improves counts with naively compiled + LAPACK. ( gfortran version 4.6 on an IvyBridge with the default build + paramaters for LAPACK produced no SSE/AVX ins, it did all its work + with X87 ins) + + If issues arise, this is safe to revert. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index e449529..441844e 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -601,8 +601,8 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS + PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES + # + # Counts scalars only; no SSE or AVX is counted; includes speculative +-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE +-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE ++PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87 ++PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:X87 + # + PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE +commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 +Author: James Ralph +Date: Fri Jun 27 14:06:17 2014 -0400 + + Update preset mappings for Intel Haswell + + Patch due to Michel Brown @ Bull, many thanks. + ---------------------------------------------- + As I did for some earlier CPUs, I have made an update to the Haswell Preset + Cache Events to provide a more accurate and a more complete set of preset + cache events. + + I have validated with a test program all the events except the I-cache events. + The Haswell CPU used for the test was an “Intel(R) Xeon(R) CPU E5-2683 v3 @ + 2.00GHz” model 63. + + I defined a couple of events that are not currently accepted by the preset + mechanism: PAPI_L2_LDH (Level 2 Cache Load Hits) and PAPI_L3_LDH (Level 3 Cache + Load Hits). I have validated the native events used for these presets. + + I will leave it to you decide whether these presets should be included. + They are in the file; but are commented out. + + The preset file for Haswell is already organized with the L1, L2 and L3 events + grouped together. For the preset definitions I felt it necessary to replace, + I commented them out. For the ones I added I put in a section following the + current group beginning with a “# Added by FMB” comment. + ---------------------------------------------- + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index dbbc8d8..97fd2ca 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -621,6 +621,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES: + # + # Intel IvyBridge only + CPU,ivb ++# Added by FMB ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD + CPU,ivb_ep + # + PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO +@@ -646,19 +651,42 @@ PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS + PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES +-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT +-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++#PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++#PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS + PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD + PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT + PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS + PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD +-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES +-PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT +-PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++#PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES ++#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT ++#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++# Added by FMB ++PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO ++PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS ++PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO + # L3 cache +-PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE +-PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT +-PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS ++#PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE ++#PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++#PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS ++# Added by FMB ++PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD ++PRESET,PAPI_L3_DCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS ++PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS ++PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES ++PRESET,PAPI_L3_TCR,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:DEMAND_RFO_MISS ++PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + # SMP + PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY + PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD +commit bf55b6b72f3ad6df59050739c248bc94ad9c6722 +Author: James Ralph +Date: Thu Jul 24 11:02:36 2014 -0400 + + Update HSW presets + + Thanks to Gary Mohr + ------------------- + Previously we sent updates to the PAPI preset event definitions to improve the + preset cache events on Haswell processors. In checking the latest source, it + looks like the L1 cache events changes did not get applied quite right. Here + is a patch to the latest source that will make it the way we had intended. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 97fd2ca..aea3b04 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -646,9 +646,14 @@ PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS + PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES + PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES + # L1 cache +-PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT +-PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS ++#PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT ++#PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS + PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++# Added by FMB ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES + #PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT +From bb8143e44aa9c249c79c3fd820e55678b01b19fa Mon Sep 17 00:00:00 2001 +From: William Cohen +Date: Sun, 28 Sep 2014 11:32:43 -0400 +Subject: [PATCH] Remove stray Intel Haswell events from Intel Ivy Bridge + presets + +Commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 added some events +meant for Intel Haswell to the Intel Ivy bridge presets. This patch +removes those stray events. Without this patch on Intel Ivy Bridge +machines would see messages like the following: + +PAPI Error: papi_preset: Error finding event L2_TRANS:DEMAND_DATA_RD. +PAPI Error: papi_preset: Error finding event L2_RQSTS:ALL_DEMAND_REFERENCES. +--- + src/papi_events.csv | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index aea3b04..d6566c0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -621,11 +621,6 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES: + # + # Intel IvyBridge only + CPU,ivb +-# Added by FMB +-PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT +-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD +-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB +-PRESET,PAPI_L1_TCM,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD + CPU,ivb_ep + # + PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_RQSTS:ALL_RFO +-- +1.9.3 + diff --git a/SOURCES/papi-intel_knl.patch b/SOURCES/papi-intel_knl.patch new file mode 100644 index 0000000..2532ceb --- /dev/null +++ b/SOURCES/papi-intel_knl.patch @@ -0,0 +1,401 @@ +commit adbae8cd948234539d3ad63363878011e5a59949 +Author: Heike McCraw +Date: Thu Dec 11 12:07:38 2014 -0500 + + Update presets for Intel Haswell and Haswell-EP + (according to the updates of the libpfm4 event table + for Intel Haswell and Haswell-EP). + These mods have not been tested due to lacking access + to an Intel Haswell system. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index d27a41d..0a17ab3 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -634,7 +634,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + # Intel Haswell events (and most likely also Sandy Bridge) + CPU,hsw + CPU,hsw_ep +-PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK + # Loads and stores +@@ -692,7 +692,7 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY + PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD + PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO +-PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK + +commit 1dbc7038450d53c7e8724f9e2cb0eb773bdc97bf +Author: Heike McCraw +Date: Thu May 28 13:46:52 2015 -0400 + + Temporary workaround: exclude_guest and exclude_host bits have to be + zero in the attribute structure (via :mg=1:mh=1). + + exclude_guest wasn't introduced until Linux 3.2, and so, running newer + PAPI versions with libpfm4 that allows exclude_guest to be set on older + kernels completely breaks all events unless :mg=1:mh=1 is passed. + + PAPI code passes the attribute block created by libpfm4 to the kernel + without modifying its contents. It would be better if libpfm4 provides + different defaults for these attribute bits. + This commit, however, enforces mg=1 and mh=1 for all Xeon Phi + predefined events. A problem with always enforcding mg=1 (i.e. + exclude_guest=0) is that if exclude_guest=1 (mg=0) is needed (e.g., + to use PEBS) users have to fall back to using native events with the + appropriate qualifier settings. + + This issue has been extensively discussed on the mailing list (Subject + “KNC events", discussion started on 11/26/14) where more details can be + found. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8fe0ae1..74da53c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1761,20 +1761,20 @@ PRESET,PAPI_TLB_TL,DERIVED_POSTFIX,N0|N1|+|N2|+|,PEVT_MMU_TLB_MISS_DIRECT_DERAT, + # Intel MIC / Xeon-Phi / Knights Corner + CPU,knc + # +-PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES +-PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED +-PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS +-PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK +-PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ +-PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED +-PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK +-PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ +-PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE +-PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS +-PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE +-PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED +-PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS +-PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES:mg=1:mh=1 ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCHES_MISPREDICTED:mg=1:mh=1 ++PRESET,PAPI_L1_ICM,NOT_DERIVED,CODE_CACHE_MISS:mg=1:mh=1 ++PRESET,PAPI_TLB_IM,NOT_DERIVED,CODE_PAGE_WALK:mg=1:mh=1 ++PRESET,PAPI_L1_ICA,NOT_DERIVED,CODE_READ:mg=1:mh=1 ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:mg=1:mh=1 ++PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_PAGE_WALK:mg=1:mh=1 ++PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_READ:mg=1:mh=1 ++PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_WRITE:mg=1:mh=1 ++PRESET,PAPI_L1_DCM,NOT_DERIVED,DATA_READ_MISS_OR_WRITE_MISS:mg=1:mh=1 ++PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_READ_OR_WRITE:mg=1:mh=1 ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_EXECUTED:mg=1:mh=1 ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_READ_MISS:mg=1:mh=1 ++PRESET,PAPI_VEC_INS,NOT_DERIVED,VPU_INSTRUCTIONS_EXECUTED:mg=1:mh=1 + + CPU,BGP + # The following PAPI presets are accurate for all application nodes + +commit f42eda64e7c3cc0784b3ce8b8a71f88647a61640 +Author: Heike McCraw +Date: Thu Jun 25 15:05:53 2015 -0400 + + Added definitions to Power8 for PAPI_SP_OPS, PAPI_DP_OPS. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74da53c..40c562b 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1378,6 +1378,8 @@ PRESET,PAPI_TOT_INS,NOT_DERIVED,PM_INST_CMPL + #n/aPRESET,PAPI_INT_INS,DERIVED_ADD,PM_FXU0_FIN,PM_FXU1_FIN + PRESET,PAPI_FP_OPS,NOT_DERIVED,PM_FLOP + PRESET,PAPI_FP_INS,NOT_DERIVED,PM_FLOP ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|4|*|N1|8|*|N2|16|*|N3|32|*|+|+|+|,PM_VSU0_2FLOP,PM_VSU0_4FLOP,PM_VSU0_8FLOP,PM_VSU0_16FLOP + PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC + PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT + PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP + +commit 36c5b5b6b9bc90142743e4b62fa6cc8f99b3e46c +Author: Vince Weaver +Date: Thu Jun 25 22:20:17 2015 -0400 + + add broadwell predefined events + + For now they are the same as Haswell, as that's what the Linux kernel + does. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74da53c..2ffb6f2 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -631,9 +631,11 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + #PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS + # + +-# Intel Haswell events (and most likely also Sandy Bridge) ++# Intel Haswell events ++# Using also for Broadwell events, this is what the Linux kernel does + CPU,hsw + CPU,hsw_ep ++CPU,bdw + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK +@@ -692,7 +694,6 @@ PRESET,PAPI_L3_TCW,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY + PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD + PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO +-PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK +@@ -714,6 +715,13 @@ PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++ ++CPU,hsw ++CPU,hsw_ep ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM ++CPU,bdw ++PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM ++ + # End of hsw list + # + CPU,Intel Core2 + +commit 71dcdb92c477bf5d2f419c03f94783098a991214 +Merge: 36c5b5b f42eda6 +Author: Vince Weaver +Date: Thu Jun 25 22:21:06 2015 -0400 + + Merge branch 'master' of https://icl.cs.utk.edu/git/papi + +commit 0829a4f51b3de92de72f6c6185b99ece15e20254 +Author: Vince Weaver +Date: Fri Jun 26 11:41:42 2015 -0400 + + Add future broadwell-ep support. + + libpfm4 doesn't support it yet, but add it for when it appears. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index ca556c9..c38a892 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -636,6 +636,7 @@ PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTL + CPU,hsw + CPU,hsw_ep + CPU,bdw ++CPU,bdw_ep + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK +@@ -720,6 +721,7 @@ CPU,hsw + CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + CPU,bdw ++CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + + # End of hsw list + +commit a10e8331ced0173ead9982c3f78c2e5238b04d66 +Author: Vince Weaver +Date: Wed Oct 21 08:58:20 2015 -0400 + + papi_events: add Intel Skylake presets + + This just shares all of teh broadwell events with skylake. + Some quick tests show that this probably works. + Someone with skylake hardware should validate this at some point. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index c38a892..2865560 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -637,6 +637,7 @@ CPU,hsw + CPU,hsw_ep + CPU,bdw + CPU,bdw_ep ++CPU,skl + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P + PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK + +commit db9c70f517aae119145ef7ec5cded597b70b0437 +Author: Heike McCraw +Date: Fri Jun 17 18:11:07 2016 -0400 + + Added FP (SP, DP) presets for Skylake. Corrected L1_LDM|STM, L2_DCW|TCW, PRF_DM, STL_ICY presets for Skylake. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2865560..114149d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -651,8 +651,6 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_ + PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + # Added by FMB + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT +-PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD +-PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB + PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD + # L2 cache + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES +@@ -667,7 +665,6 @@ PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + #PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS + # Added by FMB + PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS +-PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO + PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD + #PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS +@@ -675,7 +672,6 @@ PRESET,PAPI_L2_STM,NOT_DERIVED,L2_RQSTS:DEMAND_RFO_MISS + PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD + PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES + PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD +-PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO + # L3 cache + #PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE + #PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT +@@ -700,12 +696,9 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD + # TLB + PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK + PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK +-# Prefetcher +-PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS + # Stalls + PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB + PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY +-PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY + PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1 + PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS + PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4 +@@ -720,13 +713,41 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES + + CPU,hsw + CPU,hsw_ep ++CPU,bdw ++CPU,bdw_ep ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_TRANS:DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_TRANS:L1D_WB ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_TRANS:RFO ++PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_TRANS:RFO ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY ++ ++CPU,hsw ++CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM + CPU,bdw + CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + +-# End of hsw list ++CPU,skl ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++ ++PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L1_STM,NOT_DERIVED,L2_RQSTS:ALL_RFO ++PRESET,PAPI_L2_DCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT ++PRESET,PAPI_L2_TCW,DERIVED_ADD,L2_RQSTS:DEMAND_RFO_HIT,L2_RQSTS:RFO_HIT ++PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:PF_MISS ++PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE ++ ++# End of hsw,bdw,skl list + # ++ ++ + CPU,Intel Core2 + CPU,Intel Core + CPU,core + +commit 1c64bfc0d4aa17aa36b8ab542c841203518e6df7 +Author: Heike Jagode +Date: Thu Jun 23 15:26:26 2016 -0400 + + Added FP (SP, DP) presets for Broadwell. NOT TESTED yet due to lack of access to bdw hardware + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 114149d..5c04442 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -725,10 +725,18 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY + CPU,hsw + CPU,hsw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HITM ++ + CPU,bdw + CPU,bdw_ep + PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM + ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE ++ + CPU,skl + # PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + +commit e9144b9bda355874a1cefd45285578f6c825cc31 +Author: Heike Jagode +Date: Thu Aug 18 16:34:54 2016 -0400 + + Added preset definitions for KNL. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2d2eca0..e3e80a4 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -754,7 +754,42 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE + + # End of hsw,bdw,skl list + # +- ++# ++# Intel MIC / Xeon-Phi / Knights Landing ++# ++CPU,knl ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++PRESET,PAPI_L1_ICM,NOT_DERIVED,ICACHE:MISSES ++PRESET,PAPI_L1_ICA,NOT_DERIVED,ICACHE:ACCESSES ++PRESET,PAPI_L1_ICH,NOT_DERIVED,ICACHE:HIT ++# ++PRESET,PAPI_L1_DCA,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST ++PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS ++PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_UOPS_RETIRED:LD_DCU_MISS,ICACHE:MISSES ++PRESET,PAPI_L1_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_DCU_MISS ++# ++PRESET,PAPI_L2_TCA,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_MISSES ++PRESET,PAPI_L2_TCH,DERIVED_SUB,LLC_REFERENCES,LLC_MISSES ++PRESET,PAPI_L2_LDM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_L2_MISS ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_LD ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ANY_ST ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ANY_LD,MEM_UOPS_RETIRED:ANY_ST ++# ++PRESET,PAPI_TLB_DM,NOT_DERIVED,MEM_UOPS_RETIRED:LD_UTLB_MISS ++# ++PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCH_INSTRUCTIONS_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,MISPREDICTED_BRANCH_RETIRED ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BRANCH_INSTRUCTIONS_RETIRED,BR_INST_RETIRED:JCC ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:TAKEN_JCC ++PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED:JCC,BR_INST_RETIRED:TAKEN_JCC ++# ++PRESET,PAPI_RES_STL,NOT_DERIVED,RS_FULL_STALL:ANY ++PRESET,PAPI_STL_ICY,NOT_DERIVED,NO_ALLOC_CYCLES:ANY ++# + + CPU,Intel Core2 + CPU,Intel Core diff --git a/SOURCES/papi-krentel.patch b/SOURCES/papi-krentel.patch new file mode 100644 index 0000000..5c8ae11 --- /dev/null +++ b/SOURCES/papi-krentel.patch @@ -0,0 +1,71 @@ +commit 71e6e5e52d7205a6a792bf9b754550a8719db6b1 +Author: Asim YarKhan +Date: Wed Feb 25 12:17:40 2015 -0500 + + Sync thread exit in krental_threads.c + + Thanks to William Cohen for this patch and to Phil Mucci for approving it. + + William Cohnen and Michael Petlan noticed that this test can have + threads dangling after the main thread is done. This patch tracks the + created threads and ensures that they are joined before the code + exits. + + Note: There is still some problem remaining. For example, the following + test will sometimes (maybe 1 of 10 runs) generate an error message. + > ./ctests/krentel_pthreads 8 2000 10 + .... + [10] time = 8, count = 38110, iter = 20, rate = 1905500.0/Kiter + PAPI Error: thread->running_eventset == NULL in _papi_pe_dispatch_timer for fd 14!. + [0] time = 8, count = 38161, iter = 20, rate = 1908050.0/Kiter + krentel_pthreads.c PASSED + +diff --git a/src/ctests/krentel_pthreads.c b/src/ctests/krentel_pthreads.c +index 2417976..9fa3e25 100644 +--- a/src/ctests/krentel_pthreads.c ++++ b/src/ctests/krentel_pthreads.c +@@ -143,7 +143,7 @@ my_thread( void *v ) + int + main( int argc, char **argv ) + { +- pthread_t td; ++ pthread_t *td = NULL; + long n; + + tests_quiet( argc, argv ); /*Set TESTS_QUIET variable */ +@@ -155,6 +155,10 @@ main( int argc, char **argv ) + if ( argc < 4 || sscanf( argv[3], "%d", &num_threads ) < 1 ) + num_threads = 3; + ++ td = malloc((num_threads+1) * sizeof(pthread_t)); ++ if (!td) ++ test_fail( __FILE__, __LINE__, "td malloc failed", 1 ); ++ + printf( "program_time = %d, threshold = %d, num_threads = %d\n\n", + program_time, threshold, num_threads ); + +@@ -171,15 +175,22 @@ main( int argc, char **argv ) + gettimeofday( &start, NULL ); + + for ( n = 1; n <= num_threads; n++ ) { +- if ( pthread_create( &td, NULL, my_thread, ( void * ) n ) != 0 ) ++ if ( pthread_create( &(td[n]), NULL, my_thread, ( void * ) n ) != 0 ) + test_fail( __FILE__, __LINE__, "pthread create failed", 1 ); + } + + my_thread( ( void * ) 0 ); + ++ /* wait for all the threads */ ++ for ( n = 1; n <= num_threads; n++ ) { ++ if ( pthread_join( td[n], NULL)) ++ test_fail( __FILE__, __LINE__, "pthread join failed", 1 ); ++ } ++ ++ free(td); ++ + printf( "done\n" ); + + test_pass( __FILE__, NULL, 0 ); +- pthread_exit( NULL ); + return ( 0 ); + } diff --git a/SOURCES/papi-kvmrapl.patch b/SOURCES/papi-kvmrapl.patch new file mode 100644 index 0000000..daf3d30 --- /dev/null +++ b/SOURCES/papi-kvmrapl.patch @@ -0,0 +1,30 @@ +commit a5ecda79870b41f5b2ee21975f0f1c931114d258 +Author: sangamesh +Date: Thu Jun 11 17:48:36 2015 -0400 + + Thanks to William Cohen for the patch which does the following: + Checking the cpu family and module number is not sufficient to determine + whether RAPL can be used. If the papi is running inside a guest VM, + the MSR used by the PAPI RAPL component may not be available. There + should be a simple read test to verify the RAPL MSR registers are + available. This allows the component to more clearly report that RAPL + is unsupported rather than just exiting program when the RAPL + +diff --git a/src/components/rapl/linux-rapl.c b/src/components/rapl/linux-rapl.c +index f0d376e..8dff3fe 100644 +--- a/src/components/rapl/linux-rapl.c ++++ b/src/components/rapl/linux-rapl.c +@@ -447,6 +447,13 @@ _rapl_init_component( int cidx ) + return PAPI_ESYS; + } + ++ /* Verify needed MSR is readable. In a guest VM it may not be readable*/ ++ if (pread(fd, &result, sizeof result, MSR_RAPL_POWER_UNIT) != sizeof result ) { ++ strncpy(_rapl_vector.cmp_info.disabled_reason, ++ "Unable to access RAPL registers",PAPI_MAX_STR_LEN); ++ return PAPI_ESYS; ++ } ++ + /* Calculate the units used */ + result=read_msr(fd,MSR_RAPL_POWER_UNIT); + diff --git a/SOURCES/papi-postfixcalc.patch b/SOURCES/papi-postfixcalc.patch new file mode 100644 index 0000000..eb9b4ea --- /dev/null +++ b/SOURCES/papi-postfixcalc.patch @@ -0,0 +1,181 @@ +diff -up papi-5.2.0/src/papi_internal.c.postfix papi-5.2.0/src/papi_internal.c +--- papi-5.2.0/src/papi_internal.c.postfix 2016-07-25 14:25:09.331487358 -0400 ++++ papi-5.2.0/src/papi_internal.c 2016-07-25 14:27:17.271715363 -0400 +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + #include "papi.h" + #include "papi_internal.h" +@@ -1840,89 +1841,90 @@ handle_derived_add_ps( int *position, lo + /* this function implement postfix calculation, it reads in a string where I use: + | as delimiter + N2 indicate No. 2 native event in the derived preset +- +, -, *, /, % as operator ++ +, -, *, / as operator + # as MHZ(million hz) got from _papi_hwi_system_info.hw_info.cpu_max_mhz*1000000.0 + + Haihang (you@cs.utk.edu) + */ +-static long long +-_papi_hwi_postfix_calc( EventInfo_t * evi, long long *hw_counter ) +-{ +- char *point = evi->ops, operand[16]; +- double stack[PAPI_EVENTS_IN_DERIVED_EVENT]; +- int i, top = 0; +- +- memset(&stack,0,PAPI_EVENTS_IN_DERIVED_EVENT*sizeof(double)); +- +- while ( *point != '\0' ) { +- if ( *point == 'N' ) { /* to get count for each native event */ +- i = 0; +- point++; +- do { +- operand[i] = *point; +- point++; +- i++; +- } while ( *point != '|' ); +- operand[i] = '\0'; +- stack[top] = ( double ) hw_counter[evi->pos[atoi( operand )]]; +- top++; +- point++; +- } else if ( *point == '#' ) { /* to get mhz, ignore the rest char's */ +- stack[top] = _papi_hwi_system_info.hw_info.cpu_max_mhz * 1000000.0; +- top++; +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } else if ( isdigit( *point ) ) { /* to get integer, I suppose only integer will be used, +- no error check here, please only use integer */ +- i = 0; +- do { +- operand[i] = *point; +- point++; +- i++; +- } while ( *point != '|' ); +- operand[i] = '\0'; +- stack[top] = atoi( operand ); +- top++; +- point++; +- } else if ( *point == '+' ) { /* + calculation */ +- stack[top - 2] += stack[top - 1]; +- top--; +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } else if ( *point == '-' ) { /* - calculation */ +- stack[top - 2] -= stack[top - 1]; +- top--; +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } else if ( *point == '*' ) { /* * calculation */ +- stack[top - 2] *= stack[top - 1]; +- top--; +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } else if ( *point == '/' ) { /* / calculation */ +- stack[top - 2] /= stack[top - 1]; +- top--; +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } else { /* do nothing */ +- do { +- point++; +- } while ( *point != '|' ); +- point++; +- } +- } +- return ( long long ) stack[0]; +-} ++ static long long ++ _papi_hwi_postfix_calc( EventInfo_t * evi, long long *hw_counter ) ++ { ++ char *point = evi->ops, operand[16]; ++ double stack[PAPI_EVENTS_IN_DERIVED_EVENT]; ++ int i, val, top = 0; ++ ++ INTDBG("ENTER: evi: %p, evi->ops: %p (%s), evi->pos[0]: %d, evi->pos[1]: %d, hw_counter: %p (%lld %lld)\n", ++ evi, evi->ops, evi->ops, evi->pos[0], evi->pos[1], hw_counter, hw_counter[0], hw_counter[1]); ++ ++ memset(&stack,0,PAPI_EVENTS_IN_DERIVED_EVENT*sizeof(double)); ++ ++ while ( *point != '\0' ) { ++ if ( *point == '|' ) { /* consume '|' characters */ ++ point++; ++ } else if ( *point == 'N' ) { /* to get count for each native event */ ++ point++; ++ i = 0; ++ while ( isdigit(*point) ) { ++ assert(i<16); ++ operand[i] = *point; ++ point++; ++ i++; ++ } ++ assert(0pos[val]]; ++ top++; ++ } else if ( *point == '#' ) { /* to get mhz */ ++ point++; ++ assert( top < PAPI_EVENTS_IN_DERIVED_EVENT ); ++ stack[top] = _papi_hwi_system_info.hw_info.cpu_max_mhz * 1000000.0; ++ top++; ++ } else if ( isdigit( *point ) ) { ++ i = 0; ++ while ( isdigit(*point) ) { ++ assert(i<16); ++ operand[i] = *point; ++ point++; ++ i++; ++ } ++ assert(0= 2); ++ stack[top - 2] += stack[top - 1]; ++ top--; ++ } else if ( *point == '-' ) { /* - calculation */ ++ point++; ++ assert(top >= 2); ++ stack[top - 2] -= stack[top - 1]; ++ top--; ++ } else if ( *point == '*' ) { /* * calculation */ ++ point++; ++ assert(top >= 2); ++ stack[top - 2] *= stack[top - 1]; ++ top--; ++ } else if ( *point == '/' ) { /* / calculation */ ++ point++; ++ assert(top >= 2); ++ /* FIXME should handle runtime divide by zero */ ++ stack[top - 2] /= stack[top - 1]; ++ top--; ++ } else { /* flag an error parsing the preset */ ++ PAPIERROR( "BUG! Unable to parse \"%s\"", evi->ops ); ++ return ( long long ) stack[0]; ++ } ++ } ++ assert(top == 1); ++ INTDBG("EXIT: stack[0]: %lld\n", (long long)stack[0]); ++ return ( long long ) stack[0]; ++ } + + static long long + handle_derived( EventInfo_t * evi, long long *from ) diff --git a/SOURCES/papi-power9.patch b/SOURCES/papi-power9.patch new file mode 100644 index 0000000..ae82b64 --- /dev/null +++ b/SOURCES/papi-power9.patch @@ -0,0 +1,84 @@ +commit 6d13ae368b1ac4897da7ab9832aabe1f38147a4e +Author: Will Schmidt +Date: Thu Jun 22 17:28:20 2017 -0500 + + PAPI power9 event list presets + + Here is an initial set of events and changes to help support Power9. + + This is based on similar changes that were made for power8 when + initial support was added there. I've updated the event names to + match what we expect to have in power9, and have done compile/build/ + sniff tests. + + Signed-off-by: Will Schmidt + +diff --git a/src/linux-memory.c b/src/linux-memory.c +index 1686299..a74c866 100644 +--- a/src/linux-memory.c ++++ b/src/linux-memory.c +@@ -619,7 +619,8 @@ ppc64_get_memory_info( PAPI_hw_info_t * hw_info ) + case 0x3F: /* POWER7 */ + index = 3; + break; +- case 0x4b: /*POWER8*/ ++ case 0x4b: /* POWER8 */ ++ case 0x4e: /* POWER9 */ + index = 4; + break; + default: +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4e24cfc..4385386 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1516,6 +1516,50 @@ PRESET,PAPI_SR_INS,NOT_DERIVED,PM_ST_FIN + #n/aPRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED + #n/aPRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE + # ++CPU,POWER9 ++CPU,power9 ++# ++PRESET,PAPI_L1_DCM,DERIVED_ADD,PM_LD_MISS_L1,PM_ST_MISS_L1 ++PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 ++PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 ++PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 ++PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 ++#PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL ++PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS ++PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS ++PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS ++PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS ++PRESET,PAPI_L3_DCM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM ++PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM ++PRESET,PAPI_L1_ICH,NOT_DERIVED,PM_INST_FROM_L1 ++PRESET,PAPI_L1_ICM,NOT_DERIVED,PM_L1_ICACHE_MISS ++PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_INST_FROM_L2MISS ++PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,PM_INST_FROM_L2 ++PRESET,PAPI_L3_ICA,NOT_DERIVED,PM_INST_FROM_L2MISS ++PRESET,PAPI_L3_ICH,NOT_DERIVED,PM_INST_FROM_L3 ++PRESET,PAPI_L3_ICM,NOT_DERIVED,PM_INST_FROM_L3MISS ++PRESET,PAPI_FMA_INS,NOT_DERIVED,PM_FMA_CMPL ++PRESET,PAPI_TOT_IIS,NOT_DERIVED,PM_INST_DISP ++PRESET,PAPI_TOT_INS,NOT_DERIVED,PM_INST_CMPL ++PRESET,PAPI_INT_INS,NOT_DERIVED,PM_FXU_FIN ++PRESET,PAPI_FP_OPS,NOT_DERIVED,PM_FLOP_CMPL ++PRESET,PAPI_FP_INS,NOT_DERIVED,PM_FLOP_CMPL ++PRESET,PAPI_DP_OPS,NOT_DERIVED,PM_DP_QP_FLOP_CMPL ++PRESET,PAPI_SP_OPS,NOT_DERIVED,PM_SP_FLOP_CMPL ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC ++PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT ++PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP ++PRESET,PAPI_SR_INS,NOT_DERIVED,PM_ST_FIN ++PRESET,PAPI_LD_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_LST_INS,NOT_DERIVED,PM_LSU_FIN ++PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN ++PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL ++PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED ++PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE ++# + CPU,ultra12 + # + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLE_CNT diff --git a/SOURCES/papi-ppc64_cache.patch b/SOURCES/papi-ppc64_cache.patch new file mode 100644 index 0000000..99d0131 --- /dev/null +++ b/SOURCES/papi-ppc64_cache.patch @@ -0,0 +1,35 @@ +commit 45c2935e88d1eaf34c0769f9b514c0dcb0e43c1d +Author: William Cohen +Date: Wed Jun 22 14:08:30 2016 -0400 + + Correct IBM Power7 and Power8 computation of PAPI_L1_DCA + + When reviewing the test results for IBM Power7 and Power8 Michael + Petlan found that the PAPI_L1_DCA preset was incorrectly computed. + The L1 cache misses need to be subtracted rather than added to the + result. + + Signed-off-by: William Cohen + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 5c04442..2d2eca0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1349,7 +1349,7 @@ PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 +-PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS +@@ -1398,7 +1398,7 @@ PRESET,PAPI_L1_LDM,NOT_DERIVED,PM_LD_MISS_L1 + PRESET,PAPI_L1_STM,NOT_DERIVED,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCW,DERIVED_SUB,PM_ST_FIN,PM_ST_MISS_L1 + PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF_L1,PM_LD_MISS_L1 +-PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 ++PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1 + PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS + #n/aPRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS + #n/aPRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS diff --git a/SOURCES/papi-rhbz1362591.patch b/SOURCES/papi-rhbz1362591.patch new file mode 100644 index 0000000..389bab3 --- /dev/null +++ b/SOURCES/papi-rhbz1362591.patch @@ -0,0 +1,20 @@ +commit e9347373c8b18b5a2902e63dd5fd9df3e54f3216 +Author: James Ralph +Date: Mon Nov 18 10:39:42 2013 -0500 + + ctests/Makefile: Don't clobber value of LIBRARY + + TOOD: write a better message + +diff --git a/src/ctests/Makefile b/src/ctests/Makefile +index 0526555..5dba43b 100644 +--- a/src/ctests/Makefile ++++ b/src/ctests/Makefile +@@ -1,6 +1,6 @@ + # File: ctests/Makefile + INCLUDE = -I.. -I. -I../testlib +-PAPILIB = ../libpapi.a ++PAPILIB=$(LIBRARY) + CC = gcc + CC_R = $(CC) -pthread + CFLAGS = -g -O -Wall diff --git a/SOURCES/papi-schedule.patch b/SOURCES/papi-schedule.patch new file mode 100644 index 0000000..9c79623 --- /dev/null +++ b/SOURCES/papi-schedule.patch @@ -0,0 +1,198 @@ +commit 4718b4816fbb891c0adfca6412c99257c216e925 +Author: William Cohen +Date: Thu May 19 14:28:42 2016 -0400 + + Force all processors to check event schedulability by reading the counters + + There are situations where the perf_event_open syscall will return a + file descriptor for a set of events even when they cannot be scheduled + together. This occurs on 32-bit and 64-bit ARM processors and MIPS + processors. This problem also occurs on linux kernels older than + 2.6.33 and when the watchdog timer steals a performance counter. To + check that the performance counters are properly setup PAPI needs to + check that the counter values can be successfully read. Rather than + trying to avoid this test PAPI will now always do it. + + Signed-off-by: William Cohen + +diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c +index d9c2c77..5698173 100644 +--- a/src/components/perf_event/perf_event.c ++++ b/src/components/perf_event/perf_event.c +@@ -58,9 +58,6 @@ + #define PERF_EVENTS_OPENED 0x01 + #define PERF_EVENTS_RUNNING 0x02 + +-/* Static globals */ +-int nmi_watchdog_active; +- + /* Forward declaration */ + papi_vector_t _perf_event_vector; + +@@ -180,34 +177,6 @@ pe_vendor_fixups(papi_vector_t *vector) + /******** Kernel Version Dependent Routines **********************/ + /******************************************************************/ + +-/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch +- * implementations (e.g. x86) which don't do a static event scheduability +- * check in sys_perf_event_open. +- * This was fixed for x86 in the 2.6.33 kernel +- * +- * Also! Kernels newer than 2.6.34 will fail in a similar way +- * if the nmi_watchdog has stolen a performance counter +- * and we try to use the maximum number of counters. +- * A sys_perf_event_open() will seem to succeed but will fail +- * at read time. So re-use this work around code. +- */ +-static int +-bug_check_scheduability(void) { +- +-#if defined(__powerpc__) +- /* PowerPC not affected by this bug */ +-#elif defined(__mips__) +- /* MIPS as of kernel 3.1 does not properly detect schedulability */ +- return 1; +-#else +- if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1; +-#endif +- +- if (nmi_watchdog_active) return 1; +- +- return 0; +-} +- + /* PERF_FORMAT_GROUP allows reading an entire group's counts at once */ + /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */ + /* from attached processes. We are lazy and disable it for all cases */ +@@ -508,68 +477,65 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx ) + long long papi_pe_buffer[READ_BUFFER_SIZE]; + int i,group_leader_fd; + +- if (bug_check_scheduability()) { ++ /* If the kernel isn't tracking scheduability right */ ++ /* Then we need to start/stop/read to force the event */ ++ /* to be scheduled and see if an error condition happens. */ ++ ++ /* get the proper fd to start */ ++ group_leader_fd=ctl->events[idx].group_leader_fd; ++ if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd; ++ ++ /* start the event */ ++ retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL ); ++ if (retval == -1) { ++ PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); ++ return PAPI_ESYS; ++ } + +- /* If the kernel isn't tracking scheduability right */ +- /* Then we need to start/stop/read to force the event */ +- /* to be scheduled and see if an error condition happens. */ ++ /* stop the event */ ++ retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL ); ++ if (retval == -1) { ++ PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" ); ++ return PAPI_ESYS; ++ } + +- /* get the proper fd to start */ +- group_leader_fd=ctl->events[idx].group_leader_fd; +- if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd; ++ /* See if a read returns any results */ ++ cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer)); ++ if ( cnt == -1 ) { ++ SUBDBG( "read returned an error! Should never happen.\n" ); ++ return PAPI_ESYS; ++ } + +- /* start the event */ +- retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL ); +- if (retval == -1) { +- PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); +- return PAPI_ESYS; +- } ++ if ( cnt == 0 ) { ++ /* We read 0 bytes if we could not schedule the event */ ++ /* The kernel should have detected this at open */ ++ /* but various bugs (including NMI watchdog) */ ++ /* result in this behavior */ + +- /* stop the event */ +- retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL ); +- if (retval == -1) { +- PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" ); +- return PAPI_ESYS; +- } ++ return PAPI_ECNFLCT; + +- /* See if a read returns any results */ +- cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer)); +- if ( cnt == -1 ) { +- SUBDBG( "read returned an error! Should never happen.\n" ); +- return PAPI_ESYS; +- } ++ } else { + +- if ( cnt == 0 ) { +- /* We read 0 bytes if we could not schedule the event */ +- /* The kernel should have detected this at open */ +- /* but various bugs (including NMI watchdog) */ +- /* result in this behavior */ +- +- return PAPI_ECNFLCT; +- +- } else { +- +- /* Reset all of the counters (opened so far) back to zero */ +- /* from the above brief enable/disable call pair. */ +- +- /* We have to reset all events because reset of group leader */ +- /* does not reset all. */ +- /* we assume that the events are being added one by one and that */ +- /* we do not need to reset higher events (doing so may reset ones */ +- /* that have not been initialized yet. */ +- +- /* Note... PERF_EVENT_IOC_RESET does not reset time running */ +- /* info if multiplexing, so we should avoid coming here if */ +- /* we are multiplexing the event. */ +- for( i = 0; i < idx; i++) { +- retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); +- if (retval == -1) { +- PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d " +- "(fd %d)failed", +- i,ctl->num_events,idx,ctl->events[i].event_fd); +- return PAPI_ESYS; +- } +- } ++ /* Reset all of the counters (opened so far) back to zero */ ++ /* from the above brief enable/disable call pair. */ ++ ++ /* We have to reset all events because reset of group leader */ ++ /* does not reset all. */ ++ /* we assume that the events are being added one by one and that */ ++ /* we do not need to reset higher events (doing so may reset ones */ ++ /* that have not been initialized yet. */ ++ ++ /* Note... PERF_EVENT_IOC_RESET does not reset time running */ ++ /* info if multiplexing, so we should avoid coming here if */ ++ /* we are multiplexing the event. */ ++ for( i = 0; i < idx; i++) { ++ retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); ++ if (retval == -1) { ++ PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d " ++ "(fd %d)failed", ++ i,ctl->num_events,idx,ctl->events[i].event_fd); ++ return PAPI_ESYS; ++ } + } + } + return PAPI_OK; +@@ -1658,8 +1624,7 @@ _pe_init_component( int cidx ) + } + + /* Detect NMI watchdog which can steal counters */ +- nmi_watchdog_active=_linux_detect_nmi_watchdog(); +- if (nmi_watchdog_active) { ++ if (_linux_detect_nmi_watchdog()) { + SUBDBG("The Linux nmi_watchdog is using one of the performance " + "counters, reducing the total number available.\n"); + } diff --git a/SOURCES/papi-sys_mem_info.patch b/SOURCES/papi-sys_mem_info.patch new file mode 100644 index 0000000..2e43f2a --- /dev/null +++ b/SOURCES/papi-sys_mem_info.patch @@ -0,0 +1,33 @@ +From 3cf33279bcfc69b41bbfae827e4a1e6b3917d7a2 Mon Sep 17 00:00:00 2001 +From: Carl Love +Date: Fri, 10 Oct 2014 13:07:11 -0500 +Subject: [PATCH] PPC64 sys_mem_info array size is wrong + +The variable sys_mem_info is an array of type PAPI_mh_info_t. It is +statically declared as size 4. The data for POWER8 is statically +declared in entry 4 of the array which is beyond the allocated array. The +array should be declared without a size so the compiler will automatically +determine the correct size based on the number of elements being +initialized. This patch makes the change. + +Signed-off-by: Carl Love +--- + src/linux-memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/linux-memory.c b/src/linux-memory.c +index bf6c420..1686299 100644 +--- a/src/linux-memory.c ++++ b/src/linux-memory.c +@@ -315,7 +315,7 @@ ia64_get_memory_info( PAPI_hw_info_t * hw_info ) + + #if defined(__powerpc__) + +-PAPI_mh_info_t sys_mem_info[4] = { ++PAPI_mh_info_t sys_mem_info[] = { + {2, // 970 begin + { + { // level 1 begins +-- +1.8.4.2 + diff --git a/SOURCES/papi-xgene.patch b/SOURCES/papi-xgene.patch new file mode 100644 index 0000000..1318b9b --- /dev/null +++ b/SOURCES/papi-xgene.patch @@ -0,0 +1,34 @@ +diff --git a/src/papi_events.csv b/src/papi_events.csv +index dbbc8d8..1d91bb0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1497,6 +1497,29 @@ PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_WRITE_ACCESS + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_READ_REFILL + PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_WRITE_REFILL + # ++CPU,arm_xgene ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,INST_SPEC_EXEC_VFP ++PRESET,PAPI_VEC_INS,NOT_DERIVED,INST_SPEC_EXEC_SIMD ++PRESET,PAPI_BR_INS,NOT_DERIVED,INST_SPEC_EXEC_SOFT_PC ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCH_MISPRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,DATA_MEM_READ_ACCESS ++PRESET,PAPI_SR_INS,NOT_DERIVED,DATA_MEM_WRITE_ACCESS ++PRESET,PAPI_L1_DCA,DERIVED_ADD,L1D_READ_ACCESS,L1D_WRITE_ACCESS ++PRESET,PAPI_L1_DCM,DERIVED_ADD,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_READ_ACCESS ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_WRITE_ACCESS ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_READ_ACCESS ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_WRITE_ACCESS ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_READ_REFILL ++PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_WRITE_REFILL ++# + CPU,mips_74k + # + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES diff --git a/SPECS/papi.spec b/SPECS/papi.spec new file mode 100644 index 0000000..aee5057 --- /dev/null +++ b/SPECS/papi.spec @@ -0,0 +1,397 @@ +%bcond_with bundled_libpfm +Summary: Performance Application Programming Interface +Name: papi +Version: 5.2.0 +Release: 24%{?dist} +License: BSD +Group: Development/System +URL: http://icl.cs.utk.edu/papi/ +Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root +Patch100: papi-intel.patch +Patch200: papi-aarch64.patch +Patch201: papi-xgene.patch +Patch300: papi-coverity.patch +Patch400: papi-hsw_ep.patch +Patch401: papi-kvmrapl.patch +Patch500: papi-sys_mem_info.patch +Patch700: papi-krentel.patch +Patch900: papi-inficonst.patch +Patch1000: papi-bz1263666.patch +Patch1001: papi-bz1326977.patch +Patch1002: papi-bz1277931.patch +Patch1003: papi-bz1313088.patch +Patch1004: papi-postfixcalc.patch +Patch1005: papi-errmsg.patch +Patch1006: papi-schedule.patch +Patch1010: papi-rhbz1362591.patch +Patch1011: papi-ppc64_cache.patch +Patch1012: papi-intel_knl.patch +Patch2000: papi-avoid_libpfm_enum.patch +Patch2001: papi-power9.patch +BuildRequires: autoconf +BuildRequires: doxygen +BuildRequires: ncurses-devel +BuildRequires: gcc-gfortran +BuildRequires: kernel-headers >= 2.6.32 +BuildRequires: chrpath +BuildRequires: lm_sensors-devel +%if %{without bundled_libpfm} +BuildRequires: libpfm-devel >= 4.4.0-9 +BuildRequires: libpfm-static >= 4.4.0-9 +%endif +# Following required for net component +BuildRequires: net-tools +# Following required for inifiband component +BuildRequires: libibmad-devel +#Right now libpfm does not know anything about s390 and will fail +ExcludeArch: s390 s390x + +%description +PAPI provides a programmer interface to monitor the performance of +running programs. + +%package devel +Summary: Header files for the compiling programs with PAPI +Group: Development/System +Requires: papi = %{version}-%{release} +%description devel +PAPI-devel includes the C header files that specify the PAPI user-space +libraries and interfaces. This is required for rebuilding any program +that uses PAPI. + +%package testsuite +Summary: Set of tests for checking PAPI functionality +Group: Development/System +Requires: papi = %{version}-%{release} +%description testsuite +PAPI-testuiste includes compiled versions of papi tests to ensure +that PAPI functions on particular hardware. + +%package static +Summary: Static libraries for the compiling programs with PAPI +Group: Development/System +Requires: papi = %{version}-%{release} +%description static +PAPI-static includes the static versions of the library files for +the PAPI user-space libraries and interfaces. + +%prep +%setup -q +%patch100 -p1 +%patch200 -p1 +%patch201 -p1 +%patch300 -p1 +%patch400 -p1 +%patch401 -p1 +%patch500 -p1 +%patch700 -p1 +%patch900 -p1 +%patch1000 -p1 +%patch1001 -p1 +%patch1002 -p1 +%patch1003 -p1 +%patch1004 -p1 +%patch1005 -p1 +%patch1006 -p1 +%patch1010 -p1 -b .rhbz1362591 +%patch1011 -p1 -b .ppc64cache +%patch1012 -p1 -b .knl +%patch2000 -p1 -b .max +%patch2001 -p1 -b .power9 + +%build +%if %{without bundled_libpfm} +# Build our own copy of libpfm. +%global libpfm_config --with-pfm-incdir=%{_includedir} --with-pfm-libdir=%{_libdir} +%endif + +cd src +autoconf +%configure --with-perf-events \ +%{?libpfm_config} \ +--with-static-lib=yes --with-shared-lib=yes --with-shlib \ +--with-components="appio coretemp example lmsensors lustre micpower mx net rapl stealtime" +# implicit enabled components: perf_event perf_event_uncore +#components currently left out because of build configure/build issues +# --with-components="bgpm coretemp_freebsd cuda host_micpower nvml vmware" + +pushd components +#pushd cuda; ./configure; popd +#pushd host_micpower; ./configure; popd +#pushd infiniband; ./configure; popd +pushd lmsensors; \ + %configure --with-sensors_incdir=/usr/include/sensors \ + --with-sensors_libdir=%{_libdir}; \ + popd +#pushd vmware; ./configure; popd +popd + +#DBG workaround to make sure libpfm just uses the normal CFLAGS +DBG="" make %{?_smp_mflags} + +#generate updated versions of the documentation +#DBG workaround to make sure libpfm just uses the normal CFLAGS +pushd ../doc +DBG="" make +DBG="" make install +popd + +%install +rm -rf $RPM_BUILD_ROOT +cd src +make DESTDIR=$RPM_BUILD_ROOT LDCONFIG=/bin/true install-all + +chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* + +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%{_bindir}/* +%{_libdir}/*.so.* +%dir /usr/share/papi +/usr/share/papi/papi_events.csv +%doc INSTALL.txt README LICENSE.txt RELEASENOTES.txt +%doc %{_mandir}/man1/* + +%files devel +%defattr(-,root,root,-) +%{_includedir}/*.h +%if %{with bundled_libpfm} +%{_includedir}/perfmon/*.h +%endif +%{_libdir}/*.so +%doc %{_mandir}/man3/* + +%files testsuite +%defattr(-,root,root,-) +/usr/share/papi/run_tests* +/usr/share/papi/ctests +/usr/share/papi/ftests +/usr/share/papi/components +/usr/share/papi/testlib + +%files static +%defattr(-,root,root,-) +%{_libdir}/*.a + +%changelog +* Fri Jun 23 2017 William Cohen - 5.2.0-24 +- Add power9 support. rhbz1368708 + +* Fri Jun 9 2017 William Cohen - 5.2.0-23 +- Avoid using PFM_PMU_MAX. + +* Wed Apr 26 2017 William Cohen - 5.2.0-22 +- Disable infiniband component. rhbz1445777 + +* Wed Mar 22 2017 William Cohen - 5.2.0-21 +- Correct ppc64 events. rhbz1385008 +- Add events for Intel KNL. rhbz1412952 + +* Tue Mar 21 2017 William Cohen - 5.2.0-20 +- Dynamically link the papi ctests. rhbz1362591 + +* Tue Aug 2 2016 William Cohen - 5.2.0-19 +- Rebuild with libpfm-4.7.0. + +* Wed Jul 27 2016 William Cohen - 5.2.0-18 +- Check schedulability on aarch64. + +* Tue Jul 26 2016 William Cohen - 5.2.0-17 +- Eliminate possible stack smashing. + +* Tue Jun 21 2016 William Cohen - 5.2.0-16 +- Correct ftests/tenth test behavior on power. rhbz1313088 + +* Thu May 12 2016 William Cohen - 5.2.0-15 +- Update papi L1 cache events on POWER7 +- Prevent papi-testsuite segfaults +- Identify kernels that support rdpmc +- Correct papi-testsuite byte_profile and sprofile tests on ppc64le +- Update PAPI_L1_TCM event on Haswells + +* Mon Aug 10 2015 William Cohen - 5.2.0-14 +- Fix build for newer infiband libs. rhbz1251645 + +* Mon Jun 29 2015 William Cohen - 5.2.0-13 +- Bump and rebuild. + +* Fri Jun 26 2015 William Cohen - 5.2.0-12 +- Improved check that rapl is supported. rhbz1228832 + +* Thu Jun 4 2015 William Cohen - 5.2.0-11 +- Correct krentel test. rhbz1191199 + +* Thu Oct 16 2014 William Cohen - 5.2.0-10 +- Bump and rebuid for chained build. rhbz1126091 + +* Wed Oct 15 2014 Frank Ch. Eigler - 5.2.0-9 +- Make sys_mem_info size dynamic. Related rhbz1152641. + +* Tue Oct 14 2014 William Cohen - 5.2.0-8 +- Recognize hsw_ep from libpfm. Related rhbz1126090 + +* Thu Sep 25 2014 William Cohen - 5.2.0-7 +- Update papi presets for Intel processors. Resolves rhbz1126090 +- Eliminate some errors found by coverity. Releated rhbz1057778 + +* Thu Sep 4 2014 William Cohen - 5.2.0-6 +- Add ARM aarch64 (APM X-Gene and Cortex A57) support. Resolves rhbz963458. + +* Tue Jan 14 2014 William Cohen - 5.2.0-5 +- Add presets for Intel Silvermont. + +* Mon Jan 13 2014 William Cohen - 5.2.0-4 +- Add presets for Haswell and Ivy Bridge. + +* Fri Dec 27 2013 Daniel Mach - 5.2.0-3 +- Mass rebuild 2013-12-27 + +* Wed Aug 14 2013 William Cohen - 5.2.0-2 +- Enable infiniband and stealtime components. + +* Wed Aug 07 2013 William Cohen - 5.2.0-1 +- Rebase to 5.2.0 + +* Sat Aug 03 2013 Fedora Release Engineering - 5.1.1-8 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + +* Wed Jul 24 2013 William Cohen - 5.1.1-7 +- rhbz830275 - Add support for POWER8 processor to PAPI + +* Mon Jul 22 2013 William Cohen - 5.1.1-6 +- Add autoconf buildrequires. + +* Mon Jul 22 2013 William Cohen - 5.1.1-5 +- rhbz986673 - /usr/lib64/libpapi.so is unowned +- Package files in /usr/share/papi only once. +- Avoid dependency problem with parallel make of man pages. + +* Fri Jul 19 2013 William Cohen - 5.1.1-4 +- Correct changelog. + +* Fri Jul 5 2013 William Cohen - 5.1.1-3 +- Add man page corrections/updates. + +* Fri Jun 28 2013 William Cohen - 5.1.1-2 +- Add testsuite subpackage. + +* Thu May 30 2013 William Cohen - 5.1.1-1 +- Rebase to 5.1.1 + +* Mon Apr 15 2013 William Cohen - 5.1.0.2-2 +- Fix arm FTBS rhbz 951806. + +* Tue Apr 9 2013 William Cohen - 5.1.0.2-1 +- Rebase to 5.1.0.2 + +* Thu Feb 14 2013 Fedora Release Engineering - 5.0.1-6 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Mon Jan 14 2013 William Cohen - 5.0.1-5 +- Add armv7 cortex a15 presets. + +* Tue Dec 04 2012 William Cohen - 5.0.1-4 +- Disable ldconfig on install. + +* Thu Nov 08 2012 William Cohen - 5.0.1-3 +- Avoid duplicated shared library. + +* Wed Oct 03 2012 William Cohen - 5.0.1-2 +- Make sure using compatible version of libpfm. + +* Thu Sep 20 2012 William Cohen - 5.0.1-1 +- Rebase to 5.0.1. + +* Mon Sep 10 2012 William Cohen - 5.0.0-6 +- Back port fixes for Intel Ivy Bridge event presets. + +* Thu Aug 30 2012 William Cohen - 5.0.0-5 +- Fixes to make papi with unbundled libpfm. + +* Mon Aug 27 2012 William Cohen - 5.0.0-2 +- Keep libpfm unbundled. + +* Fri Aug 24 2012 William Cohen - 5.0.0-1 +- Rebase to 5.0.0. + +* Fri Jul 20 2012 Fedora Release Engineering - 4.4.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Mon Jun 11 2012 William Cohen - 4.4.0-4 +- Use siginfo_t rather than struct siginfo. + +* Mon Jun 11 2012 William Cohen - 4.4.0-3 +- Correct build requires. + +* Mon Jun 11 2012 William Cohen - 4.4.0-2 +- Unbundle libpfm4 from papi. +- Correct description spellings. +- Remove unused test section. + +* Fri Apr 20 2012 William Cohen - 4.4.0-1 +- Rebase to 4.4.0. + +* Fri Mar 9 2012 William Cohen - 4.2.1-2 +- Fix overrun in lmsensor component. (rhbz797692) + +* Tue Feb 14 2012 William Cohen - 4.2.1-1 +- Rebase to 4.2.1. + +* Fri Jan 13 2012 Fedora Release Engineering - 4.2.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + +* Wed Nov 02 2011 William Cohen - 4.2.0-3 +- Remove unwanted man1/*.c.1 files. (rhbz749725) + +* Mon Oct 31 2011 William Cohen - 4.2.0-2 +- Include appropirate man pages with papi rpm. (rhbz749725) +- Rebase to papi-4.2.0, fixup for coretemp component. (rhbz746851) + +* Thu Oct 27 2011 William Cohen - 4.2.0-1 +- Rebase to papi-4.2.0. + +* Fri Aug 12 2011 William Cohen - 4.1.3-3 +- Provide papi-static. + +* Thu May 12 2011 William Cohen - 4.1.3-2 +- Use corrected papi-4.1.3. + +* Thu May 12 2011 William Cohen - 4.1.3-1 +- Rebase to papi-4.1.3 + +* Tue Feb 08 2011 Fedora Release Engineering - 4.1.2.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Mon Jan 24 2011 William Cohen - 4.1.2.1-1 +- Rebase to papi-4.1.2.1 + +* Fri Oct 1 2010 William Cohen - 4.1.1-1 +- Rebase to papi-4.1.1 + +* Tue Jun 22 2010 William Cohen - 4.1.0-1 +- Rebase to papi-4.1.0 + +* Mon May 17 2010 William Cohen - 4.0.0-5 +- Test run with upstream cvs version. + +* Wed Feb 10 2010 William Cohen - 4.0.0-4 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (correct ExcludeArch). + +* Wed Feb 10 2010 William Cohen - 4.0.0-3 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (bump nvr). + +* Wed Feb 10 2010 William Cohen - 4.0.0-2 +- correct the ctests/shlib test +- have PAPI_set_multiplex() return proper value +- properly handle event unit masks +- correct PAPI_name_to_code() to match events +- Resolves: rhbz562935 Rebase to papi-4.0.0 + +* Wed Jan 13 2010 William Cohen - 4.0.0-1 +- Generate papi.spec file for papi-4.0.0.