diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c index 8d39d2c..ba265ed 100644 --- a/cxl/lib/libcxl.c +++ b/cxl/lib/libcxl.c @@ -1483,7 +1483,7 @@ struct cel_entry { __le16 effect; } __attribute__((packed)); -CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev) +CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char* uuid) { struct cxl_cmd *cmd; struct cxl_mbox_get_log *get_log_input; @@ -1491,6 +1491,12 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev) int no_cel_entries; int rc = 0; + if (!uuid) { + fprintf(stderr, "%s: Please specify log uuid argument\n", + cxl_memdev_get_devname(memdev)); + return -EINVAL; + } + cmd = cxl_cmd_new_generic(memdev, CXL_MEM_COMMAND_ID_GET_LOG); if (!cmd) { fprintf(stderr, "%s: cxl_memdev_get_cel_log returned Null output\n", @@ -1499,7 +1505,7 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev) } get_log_input = (void *) cmd->send_cmd->in.payload; - uuid_parse(CEL_UUID, get_log_input->uuid); + uuid_parse(uuid, get_log_input->uuid); get_log_input->offset = 0; get_log_input->length = cmd->memdev->payload_max; @@ -2021,7 +2027,9 @@ struct cxl_dram_event_record { u8 row[3]; __le16 column; u8 correction_mask[0x20]; - u8 reserved[0x17]; + u8 component_identifier[0x10]; + u8 sub_channel; + u8 reserved[0x6]; } __attribute__((packed)); struct cxl_memory_module_record { @@ -2160,6 +2168,16 @@ CXL_EXPORT int cxl_memdev_get_event_records(struct cxl_memdev *memdev, u8 event_ fprintf(stdout, "%*srow: 0x%02x%02x%02x\n", indent+2, "", dram_event->row[0], dram_event->row[1], dram_event->row[2]); fprintf(stdout, "%*scolumn: 0x%x\n", indent+2, "", le16_to_cpu(dram_event->column)); + for (int i=0; i < 4; i++) { + fprintf(stdout, "%*scorrection mask[%d]: 0x", indent+2, "", i); + for (int j=0; j < 8; j++) { + fprintf(stdout, "%02x", dram_event->correction_mask[i*j+j]); + } + fprintf(stdout, "\n"); + } + fprintf(stdout, "%*scomponent identifier: 0x%02x%02x%02x\n", indent+2, "", + dram_event->component_identifier[0], dram_event->component_identifier[1], + dram_event->component_identifier[2]); } } @@ -6272,14 +6290,12 @@ out: return 0; } - #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET CXL_MEM_COMMAND_ID_RAW #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_OPCODE 52737 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_PAYLOAD_OUT_SIZE 40 - struct cxl_mbox_health_counters_get_out { - __le32 temperature_threshold_exceeded; + __le32 critical_over_temperature_exceeded; __le32 power_on_events; __le32 power_on_hours; __le32 cxl_mem_link_crc_errors; @@ -6289,6 +6305,12 @@ struct cxl_mbox_health_counters_get_out { __le32 num_ddr_double_ecc_errors; __le32 link_recovery_events; __le32 time_in_throttled; + __le32 over_temperature_warning_level_exceeded; + __le32 critical_under_temperature_exceeded; + __le32 under_temperature_warning_level_exceeded; + __le32 rx_retry_request; + __le32 rcmd_qs0_hi_threshold_detect; + __le32 rcmd_qs1_hi_threshold_detect; } __attribute__((packed)); CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev) @@ -6327,16 +6349,22 @@ CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev) health_counters_get_out = (void *)cmd->send_cmd->out.payload; fprintf(stdout, "============================= get health counters ==============================\n"); - fprintf(stdout, "Number of times temperature has exceeded threshold: %d\n", le32_to_cpu(health_counters_get_out->temperature_threshold_exceeded)); - fprintf(stdout, "Number of Power On events: %d\n", le32_to_cpu(health_counters_get_out->power_on_events)); - fprintf(stdout, "Number of Power On hours: %d\n", le32_to_cpu(health_counters_get_out->power_on_hours)); - fprintf(stdout, "Number of CXL.mem Link CRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors)); - fprintf(stdout, "Number of CXL.io Link LCRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors)); - fprintf(stdout, "Number of CXL.io Link ECRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors)); - fprintf(stdout, "Number of DDR single ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors)); - fprintf(stdout, "Number of DDR double ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors)); - fprintf(stdout, "Number of Link recovery events: %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events)); - fprintf(stdout, "Amount of time spent in throttled state (in seconds): %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled)); + fprintf(stdout, "0: CRITICAL_OVER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_over_temperature_exceeded)); + fprintf(stdout, "1: OVER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->over_temperature_warning_level_exceeded)); + fprintf(stdout, "2: CRITICAL_UNDER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_under_temperature_exceeded)); + fprintf(stdout, "3: UNDER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->under_temperature_warning_level_exceeded)); + fprintf(stdout, "4: POWER_ON_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->power_on_events)); + fprintf(stdout, "5: POWER_ON_HOURS = %d\n", le32_to_cpu(health_counters_get_out->power_on_hours)); + fprintf(stdout, "6: CXL_MEM_LINK_CRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors)); + fprintf(stdout, "7: CXL_IO_LINK_LCRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors)); + fprintf(stdout, "8: CXL_IO_LINK_ECRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors)); + fprintf(stdout, "9: NUM_DDR_SINGLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors)); + fprintf(stdout, "10: NUM_DDR_DOUBLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors)); + fprintf(stdout, "11: LINK_RECOVERY_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events)); + fprintf(stdout, "12: TIME_IN_THROTTLED = %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled)); + fprintf(stdout, "13: RX_RETRY_REQUEST = %d\n", le32_to_cpu(health_counters_get_out->rx_retry_request)); + fprintf(stdout, "14: RCMD_QS0_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs0_hi_threshold_detect)); + fprintf(stdout, "15: RCMD_QS1_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs1_hi_threshold_detect)); out: cxl_cmd_unref(cmd); @@ -9964,6 +9992,31 @@ out: return rc; } +struct cxl_dimm_slot_info_out { + u8 num_dimm_slots; + u8 rsvd[3]; + u8 slot0_spd_i2c_addr; + u8 slot0_channel_id; + u8 slot0_dimm_silk_screen; + u8 slot0_dimm_present; + u8 rsvd1[12]; + u8 slot1_spd_i2c_addr; + u8 slot1_channel_id; + u8 slot1_dimm_silk_screen; + u8 slot1_dimm_present; + u8 rsvd2[12]; + u8 slot2_spd_i2c_addr; + u8 slot2_channel_id; + u8 slot2_dimm_silk_screen; + u8 slot2_dimm_present; + u8 rsvd3[12]; + u8 slot3_spd_i2c_addr; + u8 slot3_channel_id; + u8 slot3_dimm_silk_screen; + u8 slot3_dimm_present; + u8 rsvd4[12]; +} __attribute__((packed)); + #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO CXL_MEM_COMMAND_ID_RAW #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE 0xC520 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_PAYLOAD_IN_SIZE 0 @@ -9973,10 +10026,12 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev) struct cxl_cmd *cmd; struct cxl_mem_query_commands *query; struct cxl_command_info *cinfo; - u8 *dimm_slot_info; + struct cxl_dimm_slot_info_out *dimm_slot_info; + u8 *dimm_slots; int rc = 0; int offset = 0; - + int indent = 2; + char silk_screen_char; cmd = cxl_cmd_new_raw(memdev, CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE); if (!cmd) { @@ -10018,20 +10073,52 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev) return -EINVAL; } - dimm_slot_info = (u8*)cmd->send_cmd->out.payload; + dimm_slot_info = (void *)cmd->send_cmd->out.payload; + dimm_slots = (u8*)cmd->send_cmd->out.payload; fprintf(stdout, "=========================== DIMM SLOT INFO ============================\n"); fprintf(stdout, "Output Payload:\n"); for(int i=0; isend_cmd->out.size; i++){ if (i % 16 == 0) { - fprintf(stdout, "\n%04x %02x ", i+offset, dimm_slot_info[i]); + fprintf(stdout, "\n%04x %02x ", i+offset, dimm_slots[i]); } else { - fprintf(stdout, "%02x ", dimm_slot_info[i]); + fprintf(stdout, "%02x ", dimm_slots[i]); } } - fprintf(stdout, "\n"); + fprintf(stdout, "\n\n"); + + // Decoding slot info data. + fprintf(stdout, "\n\n====== DIMM SLOTS INFO DECODE ============\n"); + + fprintf(stdout, "Number of DIMM Slots: %d\n", dimm_slot_info->num_dimm_slots); + fprintf(stdout, "DIMM SPD Index: 0\n"); + fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot0_dimm_present); + silk_screen_char = dimm_slot_info->slot0_dimm_silk_screen; + fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char); + fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot0_channel_id); + fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot0_spd_i2c_addr); + fprintf(stdout, "DIMM SPD Index: 1\n"); + fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot1_dimm_present); + silk_screen_char = dimm_slot_info->slot1_dimm_silk_screen; + fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char); + fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot1_channel_id); + fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot1_spd_i2c_addr); + fprintf(stdout, "DIMM SPD Index: 2\n"); + fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot2_dimm_present); + silk_screen_char = dimm_slot_info->slot2_dimm_silk_screen; + fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char); + fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot2_channel_id); + fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot2_spd_i2c_addr); + fprintf(stdout, "DIMM SPD Index: 3\n"); + fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot3_dimm_present); + silk_screen_char = dimm_slot_info->slot3_dimm_silk_screen; + fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char); + fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot3_channel_id); + fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot3_spd_i2c_addr); + + fprintf(stdout, "\n\n"); out: cxl_cmd_unref(cmd); return rc; diff --git a/cxl/libcxl.h b/cxl/libcxl.h index 0c24579..6583af5 100644 --- a/cxl/libcxl.h +++ b/cxl/libcxl.h @@ -63,7 +63,7 @@ int cxl_memdev_transfer_fw(struct cxl_memdev *memdev, u8 action, int cxl_memdev_activate_fw(struct cxl_memdev *memdev, u8 action, u8 slot); int cxl_memdev_get_supported_logs(struct cxl_memdev *memdev); -int cxl_memdev_get_cel_log(struct cxl_memdev *memdev); +int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char *uuid); int cxl_memdev_get_event_interrupt_policy(struct cxl_memdev *memdev); int cxl_memdev_set_event_interrupt_policy(struct cxl_memdev *memdev, u32 int_policy); int cxl_memdev_get_timestamp(struct cxl_memdev *memdev); diff --git a/cxl/memdev.c b/cxl/memdev.c index 64ba7e0..fba1f75 100644 --- a/cxl/memdev.c +++ b/cxl/memdev.c @@ -84,8 +84,17 @@ static const struct option cmd_get_supported_logs_options[] = { OPT_END(), }; +static struct _log_uuid { + const char* uuid; +} log_uuid; + +#define LOG_UUID_OPTIONS() \ +OPT_STRING('l', "log_uuid", &log_uuid.uuid, "log-uuid", \ + "CEL Log UUID") + static const struct option cmd_get_cel_log_options[] = { BASE_OPTIONS(), + LOG_UUID_OPTIONS(), OPT_END(), }; @@ -2247,7 +2256,7 @@ static int action_cmd_get_cel_log(struct cxl_memdev *memdev, struct action_conte return -EBUSY; } - return cxl_memdev_get_cel_log(memdev); + return cxl_memdev_get_cel_log(memdev, log_uuid.uuid); } static int action_cmd_get_supported_logs(struct cxl_memdev *memdev, struct action_context *actx)