anitazha / rpms / ndctl

Forked from rpms/ndctl a year ago
Clone
Blob Blame History Raw
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index 8d39d2c..ba265ed 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -1483,7 +1483,7 @@ struct cel_entry {
 	__le16 effect;
 } __attribute__((packed));
 
-CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
+CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char* uuid)
 {
 	struct cxl_cmd *cmd;
 	struct cxl_mbox_get_log *get_log_input;
@@ -1491,6 +1491,12 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
 	int no_cel_entries;
 	int rc = 0;
 
+	if (!uuid) {
+		fprintf(stderr, "%s: Please specify log uuid argument\n",
+				cxl_memdev_get_devname(memdev));
+		return -EINVAL;
+	}
+
 	cmd = cxl_cmd_new_generic(memdev, CXL_MEM_COMMAND_ID_GET_LOG);
 	if (!cmd) {
 		fprintf(stderr, "%s: cxl_memdev_get_cel_log returned Null output\n",
@@ -1499,7 +1505,7 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
 	}
 
 	get_log_input = (void *) cmd->send_cmd->in.payload;
-	uuid_parse(CEL_UUID, get_log_input->uuid);
+	uuid_parse(uuid, get_log_input->uuid);
 	get_log_input->offset = 0;
 	get_log_input->length = cmd->memdev->payload_max;
 
@@ -2021,7 +2027,9 @@ struct cxl_dram_event_record {
 	u8 row[3];
 	__le16 column;
 	u8 correction_mask[0x20];
-	u8 reserved[0x17];
+        u8 component_identifier[0x10];
+        u8 sub_channel;
+	u8 reserved[0x6];
 } __attribute__((packed));
 
 struct cxl_memory_module_record {
@@ -2160,6 +2168,16 @@ CXL_EXPORT int cxl_memdev_get_event_records(struct cxl_memdev *memdev, u8 event_
 			fprintf(stdout, "%*srow: 0x%02x%02x%02x\n", indent+2, "", dram_event->row[0],
 				dram_event->row[1], dram_event->row[2]);
 			fprintf(stdout, "%*scolumn: 0x%x\n", indent+2, "", le16_to_cpu(dram_event->column));
+			for (int i=0; i < 4; i++) {
+				fprintf(stdout, "%*scorrection mask[%d]: 0x", indent+2, "", i);
+				for (int j=0; j < 8; j++) {
+					fprintf(stdout, "%02x", dram_event->correction_mask[i*j+j]);
+				}
+				fprintf(stdout, "\n");
+			}
+			fprintf(stdout, "%*scomponent identifier: 0x%02x%02x%02x\n", indent+2, "",
+				dram_event->component_identifier[0], dram_event->component_identifier[1],
+				dram_event->component_identifier[2]);
 		}
 	}
 
@@ -6272,14 +6290,12 @@ out:
 	return 0;
 }
 
-
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET CXL_MEM_COMMAND_ID_RAW
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_OPCODE 52737
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_PAYLOAD_OUT_SIZE 40
 
-
 struct cxl_mbox_health_counters_get_out {
-	__le32 temperature_threshold_exceeded;
+	__le32 critical_over_temperature_exceeded;
 	__le32 power_on_events;
 	__le32 power_on_hours;
 	__le32 cxl_mem_link_crc_errors;
@@ -6289,6 +6305,12 @@ struct cxl_mbox_health_counters_get_out {
 	__le32 num_ddr_double_ecc_errors;
 	__le32 link_recovery_events;
 	__le32 time_in_throttled;
+	__le32 over_temperature_warning_level_exceeded;
+	__le32 critical_under_temperature_exceeded;
+	__le32 under_temperature_warning_level_exceeded;
+	__le32 rx_retry_request;
+	__le32 rcmd_qs0_hi_threshold_detect;
+	__le32 rcmd_qs1_hi_threshold_detect;
 }  __attribute__((packed));
 
 CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev)
@@ -6327,16 +6349,22 @@ CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev)
 
 	health_counters_get_out = (void *)cmd->send_cmd->out.payload;
 	fprintf(stdout, "============================= get health counters ==============================\n");
-	fprintf(stdout, "Number of times temperature has exceeded threshold: %d\n", le32_to_cpu(health_counters_get_out->temperature_threshold_exceeded));
-	fprintf(stdout, "Number of Power On events: %d\n", le32_to_cpu(health_counters_get_out->power_on_events));
-	fprintf(stdout, "Number of Power On hours: %d\n", le32_to_cpu(health_counters_get_out->power_on_hours));
-	fprintf(stdout, "Number of CXL.mem Link CRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors));
-	fprintf(stdout, "Number of CXL.io Link LCRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors));
-	fprintf(stdout, "Number of CXL.io Link ECRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors));
-	fprintf(stdout, "Number of DDR single ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors));
-	fprintf(stdout, "Number of DDR double ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors));
-	fprintf(stdout, "Number of Link recovery events: %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events));
-	fprintf(stdout, "Amount of time spent in throttled state (in seconds): %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled));
+	fprintf(stdout, "0: CRITICAL_OVER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_over_temperature_exceeded));
+	fprintf(stdout, "1: OVER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->over_temperature_warning_level_exceeded));
+	fprintf(stdout, "2: CRITICAL_UNDER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_under_temperature_exceeded));
+	fprintf(stdout, "3: UNDER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->under_temperature_warning_level_exceeded));
+	fprintf(stdout, "4: POWER_ON_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->power_on_events));
+	fprintf(stdout, "5: POWER_ON_HOURS = %d\n", le32_to_cpu(health_counters_get_out->power_on_hours));
+	fprintf(stdout, "6: CXL_MEM_LINK_CRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors));
+	fprintf(stdout, "7: CXL_IO_LINK_LCRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors));
+	fprintf(stdout, "8: CXL_IO_LINK_ECRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors));
+	fprintf(stdout, "9: NUM_DDR_SINGLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors));
+	fprintf(stdout, "10: NUM_DDR_DOUBLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors));
+	fprintf(stdout, "11: LINK_RECOVERY_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events));
+	fprintf(stdout, "12: TIME_IN_THROTTLED = %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled));
+	fprintf(stdout, "13: RX_RETRY_REQUEST = %d\n", le32_to_cpu(health_counters_get_out->rx_retry_request));
+	fprintf(stdout, "14: RCMD_QS0_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs0_hi_threshold_detect));
+	fprintf(stdout, "15: RCMD_QS1_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs1_hi_threshold_detect));
 
 out:
 	cxl_cmd_unref(cmd);
@@ -9964,6 +9992,31 @@ out:
 	return rc;
 }
 
+struct cxl_dimm_slot_info_out {
+	u8 num_dimm_slots;
+	u8 rsvd[3];
+	u8 slot0_spd_i2c_addr;
+	u8 slot0_channel_id;
+	u8 slot0_dimm_silk_screen;
+	u8 slot0_dimm_present;
+	u8 rsvd1[12];
+	u8 slot1_spd_i2c_addr;
+	u8 slot1_channel_id;
+	u8 slot1_dimm_silk_screen;
+	u8 slot1_dimm_present;
+	u8 rsvd2[12];
+	u8 slot2_spd_i2c_addr;
+	u8 slot2_channel_id;
+	u8 slot2_dimm_silk_screen;
+	u8 slot2_dimm_present;
+	u8 rsvd3[12];
+	u8 slot3_spd_i2c_addr;
+	u8 slot3_channel_id;
+	u8 slot3_dimm_silk_screen;
+	u8 slot3_dimm_present;
+	u8 rsvd4[12];
+}  __attribute__((packed));
+
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO CXL_MEM_COMMAND_ID_RAW
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE 0xC520
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_PAYLOAD_IN_SIZE 0
@@ -9973,10 +10026,12 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev)
 	struct cxl_cmd *cmd;
 	struct cxl_mem_query_commands *query;
 	struct cxl_command_info *cinfo;
-	u8 *dimm_slot_info;
+	struct cxl_dimm_slot_info_out *dimm_slot_info;
+	u8 *dimm_slots;
 	int rc = 0;
 	int offset = 0;
-
+	int indent = 2;
+	char silk_screen_char;
 
 	cmd = cxl_cmd_new_raw(memdev, CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE);
 	if (!cmd) {
@@ -10018,20 +10073,52 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev)
 		return -EINVAL;
 	}
 
-	dimm_slot_info = (u8*)cmd->send_cmd->out.payload;
+	dimm_slot_info = (void *)cmd->send_cmd->out.payload;
+	dimm_slots = (u8*)cmd->send_cmd->out.payload;
 	fprintf(stdout, "=========================== DIMM SLOT INFO ============================\n");
 	fprintf(stdout, "Output Payload:\n");
 	for(int i=0; i<cmd->send_cmd->out.size; i++){
 		if (i % 16 == 0)
 		{
-			fprintf(stdout, "\n%04x  %02x ", i+offset, dimm_slot_info[i]);
+			fprintf(stdout, "\n%04x  %02x ", i+offset, dimm_slots[i]);
 		}
 		else
 		{
-			fprintf(stdout, "%02x ", dimm_slot_info[i]);
+			fprintf(stdout, "%02x ", dimm_slots[i]);
 		}
 	}
-	fprintf(stdout, "\n");
+	fprintf(stdout, "\n\n");
+
+	// Decoding slot info data.
+	fprintf(stdout, "\n\n====== DIMM SLOTS INFO DECODE ============\n");
+
+	fprintf(stdout, "Number of DIMM Slots: %d\n", dimm_slot_info->num_dimm_slots);
+	fprintf(stdout, "DIMM SPD Index: 0\n");
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot0_dimm_present);
+	silk_screen_char = dimm_slot_info->slot0_dimm_silk_screen;
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot0_channel_id);
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot0_spd_i2c_addr);
+	fprintf(stdout, "DIMM SPD Index: 1\n");
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot1_dimm_present);
+	silk_screen_char = dimm_slot_info->slot1_dimm_silk_screen;
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot1_channel_id);
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot1_spd_i2c_addr);
+	fprintf(stdout, "DIMM SPD Index: 2\n");
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot2_dimm_present);
+	silk_screen_char = dimm_slot_info->slot2_dimm_silk_screen;
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot2_channel_id);
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot2_spd_i2c_addr);
+	fprintf(stdout, "DIMM SPD Index: 3\n");
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot3_dimm_present);
+	silk_screen_char = dimm_slot_info->slot3_dimm_silk_screen;
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot3_channel_id);
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot3_spd_i2c_addr);
+
+	fprintf(stdout, "\n\n");
 out:
 	cxl_cmd_unref(cmd);
 	return rc;
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
index 0c24579..6583af5 100644
--- a/cxl/libcxl.h
+++ b/cxl/libcxl.h
@@ -63,7 +63,7 @@ int cxl_memdev_transfer_fw(struct cxl_memdev *memdev, u8 action,
 int cxl_memdev_activate_fw(struct cxl_memdev *memdev, u8 action,
 	u8 slot);
 int cxl_memdev_get_supported_logs(struct cxl_memdev *memdev);
-int cxl_memdev_get_cel_log(struct cxl_memdev *memdev);
+int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char *uuid);
 int cxl_memdev_get_event_interrupt_policy(struct cxl_memdev *memdev);
 int cxl_memdev_set_event_interrupt_policy(struct cxl_memdev *memdev, u32 int_policy);
 int cxl_memdev_get_timestamp(struct cxl_memdev *memdev);
diff --git a/cxl/memdev.c b/cxl/memdev.c
index 64ba7e0..fba1f75 100644
--- a/cxl/memdev.c
+++ b/cxl/memdev.c
@@ -84,8 +84,17 @@ static const struct option cmd_get_supported_logs_options[] = {
   OPT_END(),
 };
 
+static struct _log_uuid {
+	const char* uuid;
+} log_uuid;
+
+#define LOG_UUID_OPTIONS() \
+OPT_STRING('l', "log_uuid", &log_uuid.uuid, "log-uuid", \
+  "CEL Log UUID")
+
 static const struct option cmd_get_cel_log_options[] = {
   BASE_OPTIONS(),
+  LOG_UUID_OPTIONS(),
   OPT_END(),
 };
 
@@ -2247,7 +2256,7 @@ static int action_cmd_get_cel_log(struct cxl_memdev *memdev, struct action_conte
     return -EBUSY;
   }
 
-  return cxl_memdev_get_cel_log(memdev);
+  return cxl_memdev_get_cel_log(memdev, log_uuid.uuid);
 }
 
 static int action_cmd_get_supported_logs(struct cxl_memdev *memdev, struct action_context *actx)