Blame SOURCES/papi-schedule.patch

c7ea89
commit 4718b4816fbb891c0adfca6412c99257c216e925
c7ea89
Author: William Cohen <wcohen@redhat.com>
c7ea89
Date:   Thu May 19 14:28:42 2016 -0400
c7ea89
c7ea89
    Force all processors to check event schedulability by reading the counters
c7ea89
    
c7ea89
    There are situations where the perf_event_open syscall will return a
c7ea89
    file descriptor for a set of events even when they cannot be scheduled
c7ea89
    together.  This occurs on 32-bit and 64-bit ARM processors and MIPS
c7ea89
    processors.  This problem also occurs on linux kernels older than
c7ea89
    2.6.33 and when the watchdog timer steals a performance counter.  To
c7ea89
    check that the performance counters are properly setup PAPI needs to
c7ea89
    check that the counter values can be successfully read.  Rather than
c7ea89
    trying to avoid this test PAPI will now always do it.
c7ea89
    
c7ea89
    Signed-off-by: William Cohen <wcohen@redhat.com>
c7ea89
c7ea89
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
c7ea89
index d9c2c77..5698173 100644
c7ea89
--- a/src/components/perf_event/perf_event.c
c7ea89
+++ b/src/components/perf_event/perf_event.c
c7ea89
@@ -58,9 +58,6 @@
c7ea89
 #define PERF_EVENTS_OPENED  0x01
c7ea89
 #define PERF_EVENTS_RUNNING 0x02
c7ea89
 
c7ea89
-/* Static globals */
c7ea89
-int nmi_watchdog_active;
c7ea89
-
c7ea89
 /* Forward declaration */
c7ea89
 papi_vector_t _perf_event_vector;
c7ea89
 
c7ea89
@@ -180,34 +177,6 @@ pe_vendor_fixups(papi_vector_t *vector)
c7ea89
 /******** Kernel Version Dependent Routines  **********************/
c7ea89
 /******************************************************************/
c7ea89
 
c7ea89
-/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch
c7ea89
- * implementations (e.g. x86) which don't do a static event scheduability
c7ea89
- * check in sys_perf_event_open.
c7ea89
- * This was fixed for x86 in the 2.6.33 kernel
c7ea89
- *
c7ea89
- * Also! Kernels newer than 2.6.34 will fail in a similar way
c7ea89
- *       if the nmi_watchdog has stolen a performance counter
c7ea89
- *       and we try to use the maximum number of counters.
c7ea89
- *       A sys_perf_event_open() will seem to succeed but will fail
c7ea89
- *       at read time.  So re-use this work around code.
c7ea89
- */
c7ea89
-static int
c7ea89
-bug_check_scheduability(void) {
c7ea89
-
c7ea89
-#if defined(__powerpc__)
c7ea89
-  /* PowerPC not affected by this bug */
c7ea89
-#elif defined(__mips__)
c7ea89
-  /* MIPS as of kernel 3.1 does not properly detect schedulability */
c7ea89
-  return 1;
c7ea89
-#else
c7ea89
-  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
c7ea89
-#endif
c7ea89
-
c7ea89
-  if (nmi_watchdog_active) return 1;
c7ea89
-
c7ea89
-  return 0;
c7ea89
-}
c7ea89
-
c7ea89
 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once   */
c7ea89
 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results   */
c7ea89
 /*  from attached processes.  We are lazy and disable it for all cases */
c7ea89
@@ -508,68 +477,65 @@ check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx )
c7ea89
    long long papi_pe_buffer[READ_BUFFER_SIZE];
c7ea89
    int i,group_leader_fd;
c7ea89
 
c7ea89
-   if (bug_check_scheduability()) {
c7ea89
+   /* If the kernel isn't tracking scheduability right       */
c7ea89
+   /* Then we need to start/stop/read to force the event     */
c7ea89
+   /* to be scheduled and see if an error condition happens. */
c7ea89
+
c7ea89
+   /* get the proper fd to start */
c7ea89
+   group_leader_fd=ctl->events[idx].group_leader_fd;
c7ea89
+   if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
c7ea89
+
c7ea89
+   /* start the event */
c7ea89
+   retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
c7ea89
+   if (retval == -1) {
c7ea89
+      PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
c7ea89
+      return PAPI_ESYS;
c7ea89
+   }
c7ea89
 
c7ea89
-      /* If the kernel isn't tracking scheduability right       */
c7ea89
-      /* Then we need to start/stop/read to force the event     */
c7ea89
-      /* to be scheduled and see if an error condition happens. */
c7ea89
+   /* stop the event */
c7ea89
+   retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
c7ea89
+   if (retval == -1) {
c7ea89
+      PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
c7ea89
+      return PAPI_ESYS;
c7ea89
+   }
c7ea89
 
c7ea89
-      /* get the proper fd to start */
c7ea89
-      group_leader_fd=ctl->events[idx].group_leader_fd;
c7ea89
-      if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
c7ea89
+   /* See if a read returns any results */
c7ea89
+   cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
c7ea89
+   if ( cnt == -1 ) {
c7ea89
+      SUBDBG( "read returned an error!  Should never happen.\n" );
c7ea89
+      return PAPI_ESYS;
c7ea89
+   }
c7ea89
 
c7ea89
-      /* start the event */
c7ea89
-      retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
c7ea89
-      if (retval == -1) {
c7ea89
-	 PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
c7ea89
-	 return PAPI_ESYS;
c7ea89
-      }
c7ea89
+   if ( cnt == 0 ) {
c7ea89
+      /* We read 0 bytes if we could not schedule the event */
c7ea89
+      /* The kernel should have detected this at open       */
c7ea89
+      /* but various bugs (including NMI watchdog)          */
c7ea89
+      /* result in this behavior                            */
c7ea89
 
c7ea89
-      /* stop the event */
c7ea89
-      retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
c7ea89
-      if (retval == -1) {
c7ea89
-	 PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
c7ea89
-	 return PAPI_ESYS;
c7ea89
-      }
c7ea89
+      return PAPI_ECNFLCT;
c7ea89
 
c7ea89
-      /* See if a read returns any results */
c7ea89
-      cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
c7ea89
-      if ( cnt == -1 ) {
c7ea89
-	 SUBDBG( "read returned an error!  Should never happen.\n" );
c7ea89
-	 return PAPI_ESYS;
c7ea89
-      }
c7ea89
+   } else {
c7ea89
 
c7ea89
-      if ( cnt == 0 ) {
c7ea89
-         /* We read 0 bytes if we could not schedule the event */
c7ea89
-         /* The kernel should have detected this at open       */
c7ea89
-         /* but various bugs (including NMI watchdog)          */
c7ea89
-         /* result in this behavior                            */
c7ea89
-
c7ea89
-	 return PAPI_ECNFLCT;
c7ea89
-
c7ea89
-     } else {
c7ea89
-
c7ea89
-	/* Reset all of the counters (opened so far) back to zero      */
c7ea89
-	/* from the above brief enable/disable call pair.              */
c7ea89
-
c7ea89
-	/* We have to reset all events because reset of group leader      */
c7ea89
-        /* does not reset all.                                            */
c7ea89
-	/* we assume that the events are being added one by one and that  */
c7ea89
-        /* we do not need to reset higher events (doing so may reset ones */
c7ea89
-        /* that have not been initialized yet.                            */
c7ea89
-
c7ea89
-	/* Note... PERF_EVENT_IOC_RESET does not reset time running       */
c7ea89
-	/* info if multiplexing, so we should avoid coming here if        */
c7ea89
-	/* we are multiplexing the event.                                 */
c7ea89
-        for( i = 0; i < idx; i++) {
c7ea89
-	   retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
c7ea89
-	   if (retval == -1) {
c7ea89
-	      PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
c7ea89
-			 "(fd %d)failed",
c7ea89
-			 i,ctl->num_events,idx,ctl->events[i].event_fd);
c7ea89
-	      return PAPI_ESYS;
c7ea89
-	   }
c7ea89
-	}
c7ea89
+      /* Reset all of the counters (opened so far) back to zero      */
c7ea89
+      /* from the above brief enable/disable call pair.              */
c7ea89
+
c7ea89
+      /* We have to reset all events because reset of group leader      */
c7ea89
+      /* does not reset all.                                            */
c7ea89
+      /* we assume that the events are being added one by one and that  */
c7ea89
+      /* we do not need to reset higher events (doing so may reset ones */
c7ea89
+      /* that have not been initialized yet.                            */
c7ea89
+
c7ea89
+      /* Note... PERF_EVENT_IOC_RESET does not reset time running       */
c7ea89
+      /* info if multiplexing, so we should avoid coming here if        */
c7ea89
+      /* we are multiplexing the event.                                 */
c7ea89
+      for( i = 0; i < idx; i++) {
c7ea89
+	 retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
c7ea89
+	 if (retval == -1) {
c7ea89
+	    PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
c7ea89
+		       "(fd %d)failed",
c7ea89
+		       i,ctl->num_events,idx,ctl->events[i].event_fd);
c7ea89
+	    return PAPI_ESYS;
c7ea89
+	 }
c7ea89
       }
c7ea89
    }
c7ea89
    return PAPI_OK;
c7ea89
@@ -1658,8 +1624,7 @@ _pe_init_component( int cidx )
c7ea89
   }
c7ea89
 
c7ea89
   /* Detect NMI watchdog which can steal counters */
c7ea89
-  nmi_watchdog_active=_linux_detect_nmi_watchdog();
c7ea89
-  if (nmi_watchdog_active) {
c7ea89
+  if (_linux_detect_nmi_watchdog()) {
c7ea89
     SUBDBG("The Linux nmi_watchdog is using one of the performance "
c7ea89
 	   "counters, reducing the total number available.\n");
c7ea89
   }