a2eaef
commit 4718b4816fbb891c0adfca6412c99257c216e925
a2eaef
Author: William Cohen <wcohen@redhat.com>
a2eaef
Date:   Thu May 19 14:28:42 2016 -0400
a2eaef
a2eaef
    Force all processors to check event schedulability by reading the counters
a2eaef
    
a2eaef
    There are situations where the perf_event_open syscall will return a
a2eaef
    file descriptor for a set of events even when they cannot be scheduled
a2eaef
    together.  This occurs on 32-bit and 64-bit ARM processors and MIPS
a2eaef
    processors.  This problem also occurs on linux kernels older than
a2eaef
    2.6.33 and when the watchdog timer steals a performance counter.  To
a2eaef
    check that the performance counters are properly setup PAPI needs to
a2eaef
    check that the counter values can be successfully read.  Rather than
a2eaef
    trying to avoid this test PAPI will now always do it.
a2eaef
    
a2eaef
    Signed-off-by: William Cohen <wcohen@redhat.com>
a2eaef
a2eaef
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
a2eaef
index d9c2c77..5698173 100644
a2eaef
--- a/src/components/perf_event/perf_event.c
a2eaef
+++ b/src/components/perf_event/perf_event.c
a2eaef
@@ -58,9 +58,6 @@
a2eaef
 #define PERF_EVENTS_OPENED  0x01
a2eaef
 #define PERF_EVENTS_RUNNING 0x02
a2eaef
 
a2eaef
-/* Static globals */
a2eaef
-int nmi_watchdog_active;
a2eaef
-
a2eaef
 /* Forward declaration */
a2eaef
 papi_vector_t _perf_event_vector;
a2eaef
 
a2eaef
@@ -180,34 +177,6 @@ pe_vendor_fixups(papi_vector_t *vector)
a2eaef
 /******** Kernel Version Dependent Routines  **********************/
a2eaef
 /******************************************************************/
a2eaef
 
a2eaef
-/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch
a2eaef
- * implementations (e.g. x86) which don't do a static event scheduability
a2eaef
- * check in sys_perf_event_open.
a2eaef
- * This was fixed for x86 in the 2.6.33 kernel
a2eaef
- *
a2eaef
- * Also! Kernels newer than 2.6.34 will fail in a similar way
a2eaef
- *       if the nmi_watchdog has stolen a performance counter
a2eaef
- *       and we try to use the maximum number of counters.
a2eaef
- *       A sys_perf_event_open() will seem to succeed but will fail
a2eaef
- *       at read time.  So re-use this work around code.
a2eaef
- */
a2eaef
-static int
a2eaef
-bug_check_scheduability(void) {
a2eaef
-
a2eaef
-#if defined(__powerpc__)
a2eaef
-  /* PowerPC not affected by this bug */
a2eaef
-#elif defined(__mips__)
a2eaef
-  /* MIPS as of kernel 3.1 does not properly detect schedulability */
a2eaef
-  return 1;
a2eaef
-#else
a2eaef
-  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
a2eaef
-#endif
a2eaef
-
a2eaef
-  if (nmi_watchdog_active) return 1;
a2eaef
-
a2eaef
-  return 0;
a2eaef
-}
a2eaef
-
a2eaef
 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once   */
a2eaef
 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results   */
a2eaef
 /*  from attached processes.  We are lazy and disable it for all cases */
a2eaef
@@ -508,68 +477,65 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx )
a2eaef
    long long papi_pe_buffer[READ_BUFFER_SIZE];
a2eaef
    int i,group_leader_fd;
a2eaef
 
a2eaef
-   if (bug_check_scheduability()) {
a2eaef
+   /* If the kernel isn't tracking scheduability right       */
a2eaef
+   /* Then we need to start/stop/read to force the event     */
a2eaef
+   /* to be scheduled and see if an error condition happens. */
a2eaef
+
a2eaef
+   /* get the proper fd to start */
a2eaef
+   group_leader_fd=ctl->events[idx].group_leader_fd;
a2eaef
+   if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
a2eaef
+
a2eaef
+   /* start the event */
a2eaef
+   retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
a2eaef
+   if (retval == -1) {
a2eaef
+      PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
a2eaef
+      return PAPI_ESYS;
a2eaef
+   }
a2eaef
 
a2eaef
-      /* If the kernel isn't tracking scheduability right       */
a2eaef
-      /* Then we need to start/stop/read to force the event     */
a2eaef
-      /* to be scheduled and see if an error condition happens. */
a2eaef
+   /* stop the event */
a2eaef
+   retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
a2eaef
+   if (retval == -1) {
a2eaef
+      PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
a2eaef
+      return PAPI_ESYS;
a2eaef
+   }
a2eaef
 
a2eaef
-      /* get the proper fd to start */
a2eaef
-      group_leader_fd=ctl->events[idx].group_leader_fd;
a2eaef
-      if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
a2eaef
+   /* See if a read returns any results */
a2eaef
+   cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
a2eaef
+   if ( cnt == -1 ) {
a2eaef
+      SUBDBG( "read returned an error!  Should never happen.\n" );
a2eaef
+      return PAPI_ESYS;
a2eaef
+   }
a2eaef
 
a2eaef
-      /* start the event */
a2eaef
-      retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
a2eaef
-      if (retval == -1) {
a2eaef
-	 PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
a2eaef
-	 return PAPI_ESYS;
a2eaef
-      }
a2eaef
+   if ( cnt == 0 ) {
a2eaef
+      /* We read 0 bytes if we could not schedule the event */
a2eaef
+      /* The kernel should have detected this at open       */
a2eaef
+      /* but various bugs (including NMI watchdog)          */
a2eaef
+      /* result in this behavior                            */
a2eaef
 
a2eaef
-      /* stop the event */
a2eaef
-      retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
a2eaef
-      if (retval == -1) {
a2eaef
-	 PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
a2eaef
-	 return PAPI_ESYS;
a2eaef
-      }
a2eaef
+      return PAPI_ECNFLCT;
a2eaef
 
a2eaef
-      /* See if a read returns any results */
a2eaef
-      cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
a2eaef
-      if ( cnt == -1 ) {
a2eaef
-	 SUBDBG( "read returned an error!  Should never happen.\n" );
a2eaef
-	 return PAPI_ESYS;
a2eaef
-      }
a2eaef
+   } else {
a2eaef
 
a2eaef
-      if ( cnt == 0 ) {
a2eaef
-         /* We read 0 bytes if we could not schedule the event */
a2eaef
-         /* The kernel should have detected this at open       */
a2eaef
-         /* but various bugs (including NMI watchdog)          */
a2eaef
-         /* result in this behavior                            */
a2eaef
-
a2eaef
-	 return PAPI_ECNFLCT;
a2eaef
-
a2eaef
-     } else {
a2eaef
-
a2eaef
-	/* Reset all of the counters (opened so far) back to zero      */
a2eaef
-	/* from the above brief enable/disable call pair.              */
a2eaef
-
a2eaef
-	/* We have to reset all events because reset of group leader      */
a2eaef
-        /* does not reset all.                                            */
a2eaef
-	/* we assume that the events are being added one by one and that  */
a2eaef
-        /* we do not need to reset higher events (doing so may reset ones */
a2eaef
-        /* that have not been initialized yet.                            */
a2eaef
-
a2eaef
-	/* Note... PERF_EVENT_IOC_RESET does not reset time running       */
a2eaef
-	/* info if multiplexing, so we should avoid coming here if        */
a2eaef
-	/* we are multiplexing the event.                                 */
a2eaef
-        for( i = 0; i < idx; i++) {
a2eaef
-	   retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
a2eaef
-	   if (retval == -1) {
a2eaef
-	      PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
a2eaef
-			 "(fd %d)failed",
a2eaef
-			 i,ctl->num_events,idx,ctl->events[i].event_fd);
a2eaef
-	      return PAPI_ESYS;
a2eaef
-	   }
a2eaef
-	}
a2eaef
+      /* Reset all of the counters (opened so far) back to zero      */
a2eaef
+      /* from the above brief enable/disable call pair.              */
a2eaef
+
a2eaef
+      /* We have to reset all events because reset of group leader      */
a2eaef
+      /* does not reset all.                                            */
a2eaef
+      /* we assume that the events are being added one by one and that  */
a2eaef
+      /* we do not need to reset higher events (doing so may reset ones */
a2eaef
+      /* that have not been initialized yet.                            */
a2eaef
+
a2eaef
+      /* Note... PERF_EVENT_IOC_RESET does not reset time running       */
a2eaef
+      /* info if multiplexing, so we should avoid coming here if        */
a2eaef
+      /* we are multiplexing the event.                                 */
a2eaef
+      for( i = 0; i < idx; i++) {
a2eaef
+	 retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
a2eaef
+	 if (retval == -1) {
a2eaef
+	    PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
a2eaef
+		       "(fd %d)failed",
a2eaef
+		       i,ctl->num_events,idx,ctl->events[i].event_fd);
a2eaef
+	    return PAPI_ESYS;
a2eaef
+	 }
a2eaef
       }
a2eaef
    }
a2eaef
    return PAPI_OK;
a2eaef
@@ -1658,8 +1624,7 @@ _pe_init_component( int cidx )
a2eaef
   }
a2eaef
 
a2eaef
   /* Detect NMI watchdog which can steal counters */
a2eaef
-  nmi_watchdog_active=_linux_detect_nmi_watchdog();
a2eaef
-  if (nmi_watchdog_active) {
a2eaef
+  if (_linux_detect_nmi_watchdog()) {
a2eaef
     SUBDBG("The Linux nmi_watchdog is using one of the performance "
a2eaef
 	   "counters, reducing the total number available.\n");
a2eaef
   }