|
|
a2eaef |
commit 4718b4816fbb891c0adfca6412c99257c216e925
|
|
|
a2eaef |
Author: William Cohen <wcohen@redhat.com>
|
|
|
a2eaef |
Date: Thu May 19 14:28:42 2016 -0400
|
|
|
a2eaef |
|
|
|
a2eaef |
Force all processors to check event schedulability by reading the counters
|
|
|
a2eaef |
|
|
|
a2eaef |
There are situations where the perf_event_open syscall will return a
|
|
|
a2eaef |
file descriptor for a set of events even when they cannot be scheduled
|
|
|
a2eaef |
together. This occurs on 32-bit and 64-bit ARM processors and MIPS
|
|
|
a2eaef |
processors. This problem also occurs on linux kernels older than
|
|
|
a2eaef |
2.6.33 and when the watchdog timer steals a performance counter. To
|
|
|
a2eaef |
check that the performance counters are properly setup PAPI needs to
|
|
|
a2eaef |
check that the counter values can be successfully read. Rather than
|
|
|
a2eaef |
trying to avoid this test PAPI will now always do it.
|
|
|
a2eaef |
|
|
|
a2eaef |
Signed-off-by: William Cohen <wcohen@redhat.com>
|
|
|
a2eaef |
|
|
|
a2eaef |
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
|
|
a2eaef |
index d9c2c77..5698173 100644
|
|
|
a2eaef |
--- a/src/components/perf_event/perf_event.c
|
|
|
a2eaef |
+++ b/src/components/perf_event/perf_event.c
|
|
|
a2eaef |
@@ -58,9 +58,6 @@
|
|
|
a2eaef |
#define PERF_EVENTS_OPENED 0x01
|
|
|
a2eaef |
#define PERF_EVENTS_RUNNING 0x02
|
|
|
a2eaef |
|
|
|
a2eaef |
-/* Static globals */
|
|
|
a2eaef |
-int nmi_watchdog_active;
|
|
|
a2eaef |
-
|
|
|
a2eaef |
/* Forward declaration */
|
|
|
a2eaef |
papi_vector_t _perf_event_vector;
|
|
|
a2eaef |
|
|
|
a2eaef |
@@ -180,34 +177,6 @@ pe_vendor_fixups(papi_vector_t *vector)
|
|
|
a2eaef |
/******** Kernel Version Dependent Routines **********************/
|
|
|
a2eaef |
/******************************************************************/
|
|
|
a2eaef |
|
|
|
a2eaef |
-/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch
|
|
|
a2eaef |
- * implementations (e.g. x86) which don't do a static event scheduability
|
|
|
a2eaef |
- * check in sys_perf_event_open.
|
|
|
a2eaef |
- * This was fixed for x86 in the 2.6.33 kernel
|
|
|
a2eaef |
- *
|
|
|
a2eaef |
- * Also! Kernels newer than 2.6.34 will fail in a similar way
|
|
|
a2eaef |
- * if the nmi_watchdog has stolen a performance counter
|
|
|
a2eaef |
- * and we try to use the maximum number of counters.
|
|
|
a2eaef |
- * A sys_perf_event_open() will seem to succeed but will fail
|
|
|
a2eaef |
- * at read time. So re-use this work around code.
|
|
|
a2eaef |
- */
|
|
|
a2eaef |
-static int
|
|
|
a2eaef |
-bug_check_scheduability(void) {
|
|
|
a2eaef |
-
|
|
|
a2eaef |
-#if defined(__powerpc__)
|
|
|
a2eaef |
- /* PowerPC not affected by this bug */
|
|
|
a2eaef |
-#elif defined(__mips__)
|
|
|
a2eaef |
- /* MIPS as of kernel 3.1 does not properly detect schedulability */
|
|
|
a2eaef |
- return 1;
|
|
|
a2eaef |
-#else
|
|
|
a2eaef |
- if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
|
|
|
a2eaef |
-#endif
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- if (nmi_watchdog_active) return 1;
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- return 0;
|
|
|
a2eaef |
-}
|
|
|
a2eaef |
-
|
|
|
a2eaef |
/* PERF_FORMAT_GROUP allows reading an entire group's counts at once */
|
|
|
a2eaef |
/* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */
|
|
|
a2eaef |
/* from attached processes. We are lazy and disable it for all cases */
|
|
|
a2eaef |
@@ -508,68 +477,65 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx )
|
|
|
a2eaef |
long long papi_pe_buffer[READ_BUFFER_SIZE];
|
|
|
a2eaef |
int i,group_leader_fd;
|
|
|
a2eaef |
|
|
|
a2eaef |
- if (bug_check_scheduability()) {
|
|
|
a2eaef |
+ /* If the kernel isn't tracking scheduability right */
|
|
|
a2eaef |
+ /* Then we need to start/stop/read to force the event */
|
|
|
a2eaef |
+ /* to be scheduled and see if an error condition happens. */
|
|
|
a2eaef |
+
|
|
|
a2eaef |
+ /* get the proper fd to start */
|
|
|
a2eaef |
+ group_leader_fd=ctl->events[idx].group_leader_fd;
|
|
|
a2eaef |
+ if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
|
|
|
a2eaef |
+
|
|
|
a2eaef |
+ /* start the event */
|
|
|
a2eaef |
+ retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
|
|
|
a2eaef |
+ if (retval == -1) {
|
|
|
a2eaef |
+ PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
|
|
|
a2eaef |
+ return PAPI_ESYS;
|
|
|
a2eaef |
+ }
|
|
|
a2eaef |
|
|
|
a2eaef |
- /* If the kernel isn't tracking scheduability right */
|
|
|
a2eaef |
- /* Then we need to start/stop/read to force the event */
|
|
|
a2eaef |
- /* to be scheduled and see if an error condition happens. */
|
|
|
a2eaef |
+ /* stop the event */
|
|
|
a2eaef |
+ retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
|
|
|
a2eaef |
+ if (retval == -1) {
|
|
|
a2eaef |
+ PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
|
|
|
a2eaef |
+ return PAPI_ESYS;
|
|
|
a2eaef |
+ }
|
|
|
a2eaef |
|
|
|
a2eaef |
- /* get the proper fd to start */
|
|
|
a2eaef |
- group_leader_fd=ctl->events[idx].group_leader_fd;
|
|
|
a2eaef |
- if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
|
|
|
a2eaef |
+ /* See if a read returns any results */
|
|
|
a2eaef |
+ cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
|
|
|
a2eaef |
+ if ( cnt == -1 ) {
|
|
|
a2eaef |
+ SUBDBG( "read returned an error! Should never happen.\n" );
|
|
|
a2eaef |
+ return PAPI_ESYS;
|
|
|
a2eaef |
+ }
|
|
|
a2eaef |
|
|
|
a2eaef |
- /* start the event */
|
|
|
a2eaef |
- retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
|
|
|
a2eaef |
- if (retval == -1) {
|
|
|
a2eaef |
- PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
|
|
|
a2eaef |
- return PAPI_ESYS;
|
|
|
a2eaef |
- }
|
|
|
a2eaef |
+ if ( cnt == 0 ) {
|
|
|
a2eaef |
+ /* We read 0 bytes if we could not schedule the event */
|
|
|
a2eaef |
+ /* The kernel should have detected this at open */
|
|
|
a2eaef |
+ /* but various bugs (including NMI watchdog) */
|
|
|
a2eaef |
+ /* result in this behavior */
|
|
|
a2eaef |
|
|
|
a2eaef |
- /* stop the event */
|
|
|
a2eaef |
- retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
|
|
|
a2eaef |
- if (retval == -1) {
|
|
|
a2eaef |
- PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
|
|
|
a2eaef |
- return PAPI_ESYS;
|
|
|
a2eaef |
- }
|
|
|
a2eaef |
+ return PAPI_ECNFLCT;
|
|
|
a2eaef |
|
|
|
a2eaef |
- /* See if a read returns any results */
|
|
|
a2eaef |
- cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
|
|
|
a2eaef |
- if ( cnt == -1 ) {
|
|
|
a2eaef |
- SUBDBG( "read returned an error! Should never happen.\n" );
|
|
|
a2eaef |
- return PAPI_ESYS;
|
|
|
a2eaef |
- }
|
|
|
a2eaef |
+ } else {
|
|
|
a2eaef |
|
|
|
a2eaef |
- if ( cnt == 0 ) {
|
|
|
a2eaef |
- /* We read 0 bytes if we could not schedule the event */
|
|
|
a2eaef |
- /* The kernel should have detected this at open */
|
|
|
a2eaef |
- /* but various bugs (including NMI watchdog) */
|
|
|
a2eaef |
- /* result in this behavior */
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- return PAPI_ECNFLCT;
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- } else {
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- /* Reset all of the counters (opened so far) back to zero */
|
|
|
a2eaef |
- /* from the above brief enable/disable call pair. */
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- /* We have to reset all events because reset of group leader */
|
|
|
a2eaef |
- /* does not reset all. */
|
|
|
a2eaef |
- /* we assume that the events are being added one by one and that */
|
|
|
a2eaef |
- /* we do not need to reset higher events (doing so may reset ones */
|
|
|
a2eaef |
- /* that have not been initialized yet. */
|
|
|
a2eaef |
-
|
|
|
a2eaef |
- /* Note... PERF_EVENT_IOC_RESET does not reset time running */
|
|
|
a2eaef |
- /* info if multiplexing, so we should avoid coming here if */
|
|
|
a2eaef |
- /* we are multiplexing the event. */
|
|
|
a2eaef |
- for( i = 0; i < idx; i++) {
|
|
|
a2eaef |
- retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
|
|
|
a2eaef |
- if (retval == -1) {
|
|
|
a2eaef |
- PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
|
|
|
a2eaef |
- "(fd %d)failed",
|
|
|
a2eaef |
- i,ctl->num_events,idx,ctl->events[i].event_fd);
|
|
|
a2eaef |
- return PAPI_ESYS;
|
|
|
a2eaef |
- }
|
|
|
a2eaef |
- }
|
|
|
a2eaef |
+ /* Reset all of the counters (opened so far) back to zero */
|
|
|
a2eaef |
+ /* from the above brief enable/disable call pair. */
|
|
|
a2eaef |
+
|
|
|
a2eaef |
+ /* We have to reset all events because reset of group leader */
|
|
|
a2eaef |
+ /* does not reset all. */
|
|
|
a2eaef |
+ /* we assume that the events are being added one by one and that */
|
|
|
a2eaef |
+ /* we do not need to reset higher events (doing so may reset ones */
|
|
|
a2eaef |
+ /* that have not been initialized yet. */
|
|
|
a2eaef |
+
|
|
|
a2eaef |
+ /* Note... PERF_EVENT_IOC_RESET does not reset time running */
|
|
|
a2eaef |
+ /* info if multiplexing, so we should avoid coming here if */
|
|
|
a2eaef |
+ /* we are multiplexing the event. */
|
|
|
a2eaef |
+ for( i = 0; i < idx; i++) {
|
|
|
a2eaef |
+ retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
|
|
|
a2eaef |
+ if (retval == -1) {
|
|
|
a2eaef |
+ PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
|
|
|
a2eaef |
+ "(fd %d)failed",
|
|
|
a2eaef |
+ i,ctl->num_events,idx,ctl->events[i].event_fd);
|
|
|
a2eaef |
+ return PAPI_ESYS;
|
|
|
a2eaef |
+ }
|
|
|
a2eaef |
}
|
|
|
a2eaef |
}
|
|
|
a2eaef |
return PAPI_OK;
|
|
|
a2eaef |
@@ -1658,8 +1624,7 @@ _pe_init_component( int cidx )
|
|
|
a2eaef |
}
|
|
|
a2eaef |
|
|
|
a2eaef |
/* Detect NMI watchdog which can steal counters */
|
|
|
a2eaef |
- nmi_watchdog_active=_linux_detect_nmi_watchdog();
|
|
|
a2eaef |
- if (nmi_watchdog_active) {
|
|
|
a2eaef |
+ if (_linux_detect_nmi_watchdog()) {
|
|
|
a2eaef |
SUBDBG("The Linux nmi_watchdog is using one of the performance "
|
|
|
a2eaef |
"counters, reducing the total number available.\n");
|
|
|
a2eaef |
}
|