commit 861ff65d099c2b958321ca659786c7eb8a9a1130
Author: William Cohen <wcohen@redhat.com>
Date: Wed Nov 15 23:47:16 2017 -0500
Avoid statically limiting the number of lmsensor events allowed
Some high-end server machines provide more events than the 512 entries
limit imposed by the LM_SENSORS_MAX_COUNTERS define in the lmsensor
component (observed 577 entries on one machine). When this limit was
exceeded the lmsensor component would write beyond the array bounds
causing ctests/all_native_events to crash. Modified the lmsensor code
to dynamically allocate the required space for all the available
lmsensor entries on the machine. This allows ctests/all_native_events
to run to completion.
diff --git a/src/components/lmsensors/linux-lmsensors.c b/src/components/lmsensors/linux-lmsensors.c
index c9d8530d..07be89fd 100644
--- a/src/components/lmsensors/linux-lmsensors.c
+++ b/src/components/lmsensors/linux-lmsensors.c
@@ -43,8 +43,6 @@
/************************* DEFINES SECTION ***********************************
*******************************************************************************/
-/* this number assumes that there will never be more events than indicated */
-#define LM_SENSORS_MAX_COUNTERS 512
// time in usecs
#define LM_SENSORS_REFRESHTIME 200000
@@ -82,7 +80,6 @@ typedef struct _lmsensors_reg_alloc
typedef struct _lmsensors_control_state
{
- long_long counts[LM_SENSORS_MAX_COUNTERS]; // used for caching
long_long lastupdate;
} _lmsensors_control_state_t;
@@ -100,6 +97,7 @@ typedef struct _lmsensors_context
static _lmsensors_native_event_entry_t *lm_sensors_native_table;
/* number of events in the table*/
static int num_events = 0;
+long_long *cached_counts = NULL; // used for caching readings
@@ -304,6 +302,9 @@ _lmsensors_init_component( int cidx )
num_events = detectSensors( );
SUBDBG("Found %d sensors\n",num_events);
+ _lmsensors_vector.cmp_info.num_mpx_cntrs = num_events;
+ _lmsensors_vector.cmp_info.num_cntrs = num_events;
+
if ( ( lm_sensors_native_table =
calloc( num_events, sizeof ( _lmsensors_native_event_entry_t )))
== NULL ) {
@@ -312,6 +313,14 @@ _lmsensors_init_component( int cidx )
return PAPI_ENOMEM;
}
+ cached_counts = (long long*) calloc(num_events, sizeof(long long));
+
+ if (cached_counts == NULL) {
+ strncpy(_lmsensors_vector.cmp_info.disabled_reason,
+ "Could not malloc room",PAPI_MAX_STR_LEN);
+ return PAPI_ENOMEM;
+ }
+
if ( ( unsigned ) num_events != createNativeEvents( ) ) {
strncpy(_lmsensors_vector.cmp_info.disabled_reason,
"LM_SENSOR number mismatch",PAPI_MAX_STR_LEN);
@@ -413,8 +422,7 @@ _lmsensors_init_control_state( hwd_control_state_t *ctl )
int i;
for ( i = 0; i < num_events; i++ )
- ( ( _lmsensors_control_state_t * ) ctl )->counts[i] =
- getEventValue( i );
+ cached_counts[i] = getEventValue( i );
( ( _lmsensors_control_state_t * ) ctl )->lastupdate =
PAPI_get_real_usec( );
@@ -465,12 +473,12 @@ _lmsensors_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
if ( start - control->lastupdate > 200000 ) { // cache refresh
for ( i = 0; i < num_events; i++ ) {
- control->counts[i] = getEventValue( i );
+ cached_counts[i] = getEventValue( i );
}
control->lastupdate = PAPI_get_real_usec( );
}
- *events = control->counts;
+ *events = cached_counts;
return PAPI_OK;
}
@@ -478,6 +486,8 @@ _lmsensors_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
int
_lmsensors_shutdown_component( void )
{
+ if (cached_counts)
+ free(cached_counts);
/* Call the libsensors cleaning function before leaving */
sensors_cleanup( );
@@ -627,8 +637,8 @@ papi_vector_t _lmsensors_vector = {
.short_name = "lmsensors",
.version = "5.0",
.description = "Linux LMsensor statistics",
- .num_mpx_cntrs = LM_SENSORS_MAX_COUNTERS,
- .num_cntrs = LM_SENSORS_MAX_COUNTERS,
+ .num_mpx_cntrs = 0,
+ .num_cntrs = 0,
.default_domain = PAPI_DOM_USER,
//.available_domains = PAPI_DOM_USER,
.default_granularity = PAPI_GRN_THR,