anitazha / rpms / ndctl

Forked from rpms/ndctl a year ago
Clone
e0018b
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
e0018b
index 8d39d2c..ba265ed 100644
e0018b
--- a/cxl/lib/libcxl.c
e0018b
+++ b/cxl/lib/libcxl.c
e0018b
@@ -1483,7 +1483,7 @@ struct cel_entry {
e0018b
 	__le16 effect;
e0018b
 } __attribute__((packed));
e0018b
 
e0018b
-CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
e0018b
+CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char* uuid)
e0018b
 {
e0018b
 	struct cxl_cmd *cmd;
e0018b
 	struct cxl_mbox_get_log *get_log_input;
e0018b
@@ -1491,6 +1491,12 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
e0018b
 	int no_cel_entries;
e0018b
 	int rc = 0;
e0018b
 
e0018b
+	if (!uuid) {
e0018b
+		fprintf(stderr, "%s: Please specify log uuid argument\n",
e0018b
+				cxl_memdev_get_devname(memdev));
e0018b
+		return -EINVAL;
e0018b
+	}
e0018b
+
e0018b
 	cmd = cxl_cmd_new_generic(memdev, CXL_MEM_COMMAND_ID_GET_LOG);
e0018b
 	if (!cmd) {
e0018b
 		fprintf(stderr, "%s: cxl_memdev_get_cel_log returned Null output\n",
e0018b
@@ -1499,7 +1505,7 @@ CXL_EXPORT int cxl_memdev_get_cel_log(struct cxl_memdev *memdev)
e0018b
 	}
e0018b
 
e0018b
 	get_log_input = (void *) cmd->send_cmd->in.payload;
e0018b
-	uuid_parse(CEL_UUID, get_log_input->uuid);
e0018b
+	uuid_parse(uuid, get_log_input->uuid);
e0018b
 	get_log_input->offset = 0;
e0018b
 	get_log_input->length = cmd->memdev->payload_max;
e0018b
 
e0018b
@@ -2021,7 +2027,9 @@ struct cxl_dram_event_record {
e0018b
 	u8 row[3];
e0018b
 	__le16 column;
e0018b
 	u8 correction_mask[0x20];
e0018b
-	u8 reserved[0x17];
e0018b
+        u8 component_identifier[0x10];
e0018b
+        u8 sub_channel;
e0018b
+	u8 reserved[0x6];
e0018b
 } __attribute__((packed));
e0018b
 
e0018b
 struct cxl_memory_module_record {
e0018b
@@ -2160,6 +2168,16 @@ CXL_EXPORT int cxl_memdev_get_event_records(struct cxl_memdev *memdev, u8 event_
e0018b
 			fprintf(stdout, "%*srow: 0x%02x%02x%02x\n", indent+2, "", dram_event->row[0],
e0018b
 				dram_event->row[1], dram_event->row[2]);
e0018b
 			fprintf(stdout, "%*scolumn: 0x%x\n", indent+2, "", le16_to_cpu(dram_event->column));
e0018b
+			for (int i=0; i < 4; i++) {
e0018b
+				fprintf(stdout, "%*scorrection mask[%d]: 0x", indent+2, "", i);
e0018b
+				for (int j=0; j < 8; j++) {
e0018b
+					fprintf(stdout, "%02x", dram_event->correction_mask[i*j+j]);
e0018b
+				}
e0018b
+				fprintf(stdout, "\n");
e0018b
+			}
e0018b
+			fprintf(stdout, "%*scomponent identifier: 0x%02x%02x%02x\n", indent+2, "",
e0018b
+				dram_event->component_identifier[0], dram_event->component_identifier[1],
e0018b
+				dram_event->component_identifier[2]);
e0018b
 		}
e0018b
 	}
e0018b
 
e0018b
@@ -6272,14 +6290,12 @@ out:
e0018b
 	return 0;
e0018b
 }
e0018b
 
e0018b
-
e0018b
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET CXL_MEM_COMMAND_ID_RAW
e0018b
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_OPCODE 52737
e0018b
 #define CXL_MEM_COMMAND_ID_HEALTH_COUNTERS_GET_PAYLOAD_OUT_SIZE 40
e0018b
 
e0018b
-
e0018b
 struct cxl_mbox_health_counters_get_out {
e0018b
-	__le32 temperature_threshold_exceeded;
e0018b
+	__le32 critical_over_temperature_exceeded;
e0018b
 	__le32 power_on_events;
e0018b
 	__le32 power_on_hours;
e0018b
 	__le32 cxl_mem_link_crc_errors;
e0018b
@@ -6289,6 +6305,12 @@ struct cxl_mbox_health_counters_get_out {
e0018b
 	__le32 num_ddr_double_ecc_errors;
e0018b
 	__le32 link_recovery_events;
e0018b
 	__le32 time_in_throttled;
e0018b
+	__le32 over_temperature_warning_level_exceeded;
e0018b
+	__le32 critical_under_temperature_exceeded;
e0018b
+	__le32 under_temperature_warning_level_exceeded;
e0018b
+	__le32 rx_retry_request;
e0018b
+	__le32 rcmd_qs0_hi_threshold_detect;
e0018b
+	__le32 rcmd_qs1_hi_threshold_detect;
e0018b
 }  __attribute__((packed));
e0018b
 
e0018b
 CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev)
e0018b
@@ -6327,16 +6349,22 @@ CXL_EXPORT int cxl_memdev_health_counters_get(struct cxl_memdev *memdev)
e0018b
 
e0018b
 	health_counters_get_out = (void *)cmd->send_cmd->out.payload;
e0018b
 	fprintf(stdout, "============================= get health counters ==============================\n");
e0018b
-	fprintf(stdout, "Number of times temperature has exceeded threshold: %d\n", le32_to_cpu(health_counters_get_out->temperature_threshold_exceeded));
e0018b
-	fprintf(stdout, "Number of Power On events: %d\n", le32_to_cpu(health_counters_get_out->power_on_events));
e0018b
-	fprintf(stdout, "Number of Power On hours: %d\n", le32_to_cpu(health_counters_get_out->power_on_hours));
e0018b
-	fprintf(stdout, "Number of CXL.mem Link CRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors));
e0018b
-	fprintf(stdout, "Number of CXL.io Link LCRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors));
e0018b
-	fprintf(stdout, "Number of CXL.io Link ECRC errors: %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors));
e0018b
-	fprintf(stdout, "Number of DDR single ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors));
e0018b
-	fprintf(stdout, "Number of DDR double ECC errors: %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors));
e0018b
-	fprintf(stdout, "Number of Link recovery events: %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events));
e0018b
-	fprintf(stdout, "Amount of time spent in throttled state (in seconds): %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled));
e0018b
+	fprintf(stdout, "0: CRITICAL_OVER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_over_temperature_exceeded));
e0018b
+	fprintf(stdout, "1: OVER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->over_temperature_warning_level_exceeded));
e0018b
+	fprintf(stdout, "2: CRITICAL_UNDER_TEMPERATURE_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->critical_under_temperature_exceeded));
e0018b
+	fprintf(stdout, "3: UNDER_TEMPERATURE_WARNING_LEVEL_EXCEEDED = %d\n", le32_to_cpu(health_counters_get_out->under_temperature_warning_level_exceeded));
e0018b
+	fprintf(stdout, "4: POWER_ON_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->power_on_events));
e0018b
+	fprintf(stdout, "5: POWER_ON_HOURS = %d\n", le32_to_cpu(health_counters_get_out->power_on_hours));
e0018b
+	fprintf(stdout, "6: CXL_MEM_LINK_CRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_mem_link_crc_errors));
e0018b
+	fprintf(stdout, "7: CXL_IO_LINK_LCRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_lcrc_errors));
e0018b
+	fprintf(stdout, "8: CXL_IO_LINK_ECRC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->cxl_io_link_ecrc_errors));
e0018b
+	fprintf(stdout, "9: NUM_DDR_SINGLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_single_ecc_errors));
e0018b
+	fprintf(stdout, "10: NUM_DDR_DOUBLE_ECC_ERRORS = %d\n", le32_to_cpu(health_counters_get_out->num_ddr_double_ecc_errors));
e0018b
+	fprintf(stdout, "11: LINK_RECOVERY_EVENTS = %d\n", le32_to_cpu(health_counters_get_out->link_recovery_events));
e0018b
+	fprintf(stdout, "12: TIME_IN_THROTTLED = %d\n", le32_to_cpu(health_counters_get_out->time_in_throttled));
e0018b
+	fprintf(stdout, "13: RX_RETRY_REQUEST = %d\n", le32_to_cpu(health_counters_get_out->rx_retry_request));
e0018b
+	fprintf(stdout, "14: RCMD_QS0_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs0_hi_threshold_detect));
e0018b
+	fprintf(stdout, "15: RCMD_QS1_HI_THRESHOLD_DETECT = %d\n", le32_to_cpu(health_counters_get_out->rcmd_qs1_hi_threshold_detect));
e0018b
 
e0018b
 out:
e0018b
 	cxl_cmd_unref(cmd);
e0018b
@@ -9964,6 +9992,31 @@ out:
e0018b
 	return rc;
e0018b
 }
e0018b
 
e0018b
+struct cxl_dimm_slot_info_out {
e0018b
+	u8 num_dimm_slots;
e0018b
+	u8 rsvd[3];
e0018b
+	u8 slot0_spd_i2c_addr;
e0018b
+	u8 slot0_channel_id;
e0018b
+	u8 slot0_dimm_silk_screen;
e0018b
+	u8 slot0_dimm_present;
e0018b
+	u8 rsvd1[12];
e0018b
+	u8 slot1_spd_i2c_addr;
e0018b
+	u8 slot1_channel_id;
e0018b
+	u8 slot1_dimm_silk_screen;
e0018b
+	u8 slot1_dimm_present;
e0018b
+	u8 rsvd2[12];
e0018b
+	u8 slot2_spd_i2c_addr;
e0018b
+	u8 slot2_channel_id;
e0018b
+	u8 slot2_dimm_silk_screen;
e0018b
+	u8 slot2_dimm_present;
e0018b
+	u8 rsvd3[12];
e0018b
+	u8 slot3_spd_i2c_addr;
e0018b
+	u8 slot3_channel_id;
e0018b
+	u8 slot3_dimm_silk_screen;
e0018b
+	u8 slot3_dimm_present;
e0018b
+	u8 rsvd4[12];
e0018b
+}  __attribute__((packed));
e0018b
+
e0018b
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO CXL_MEM_COMMAND_ID_RAW
e0018b
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE 0xC520
e0018b
 #define CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_PAYLOAD_IN_SIZE 0
e0018b
@@ -9973,10 +10026,12 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev)
e0018b
 	struct cxl_cmd *cmd;
e0018b
 	struct cxl_mem_query_commands *query;
e0018b
 	struct cxl_command_info *cinfo;
e0018b
-	u8 *dimm_slot_info;
e0018b
+	struct cxl_dimm_slot_info_out *dimm_slot_info;
e0018b
+	u8 *dimm_slots;
e0018b
 	int rc = 0;
e0018b
 	int offset = 0;
e0018b
-
e0018b
+	int indent = 2;
e0018b
+	char silk_screen_char;
e0018b
 
e0018b
 	cmd = cxl_cmd_new_raw(memdev, CXL_MEM_COMMAND_ID_DIMM_SLOT_INFO_OPCODE);
e0018b
 	if (!cmd) {
e0018b
@@ -10018,20 +10073,52 @@ CXL_EXPORT int cxl_memdev_dimm_slot_info(struct cxl_memdev *memdev)
e0018b
 		return -EINVAL;
e0018b
 	}
e0018b
 
e0018b
-	dimm_slot_info = (u8*)cmd->send_cmd->out.payload;
e0018b
+	dimm_slot_info = (void *)cmd->send_cmd->out.payload;
e0018b
+	dimm_slots = (u8*)cmd->send_cmd->out.payload;
e0018b
 	fprintf(stdout, "=========================== DIMM SLOT INFO ============================\n");
e0018b
 	fprintf(stdout, "Output Payload:\n");
e0018b
 	for(int i=0; i<cmd->send_cmd->out.size; i++){
e0018b
 		if (i % 16 == 0)
e0018b
 		{
e0018b
-			fprintf(stdout, "\n%04x  %02x ", i+offset, dimm_slot_info[i]);
e0018b
+			fprintf(stdout, "\n%04x  %02x ", i+offset, dimm_slots[i]);
e0018b
 		}
e0018b
 		else
e0018b
 		{
e0018b
-			fprintf(stdout, "%02x ", dimm_slot_info[i]);
e0018b
+			fprintf(stdout, "%02x ", dimm_slots[i]);
e0018b
 		}
e0018b
 	}
e0018b
-	fprintf(stdout, "\n");
e0018b
+	fprintf(stdout, "\n\n");
e0018b
+
e0018b
+	// Decoding slot info data.
e0018b
+	fprintf(stdout, "\n\n====== DIMM SLOTS INFO DECODE ============\n");
e0018b
+
e0018b
+	fprintf(stdout, "Number of DIMM Slots: %d\n", dimm_slot_info->num_dimm_slots);
e0018b
+	fprintf(stdout, "DIMM SPD Index: 0\n");
e0018b
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot0_dimm_present);
e0018b
+	silk_screen_char = dimm_slot_info->slot0_dimm_silk_screen;
e0018b
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
e0018b
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot0_channel_id);
e0018b
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot0_spd_i2c_addr);
e0018b
+	fprintf(stdout, "DIMM SPD Index: 1\n");
e0018b
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot1_dimm_present);
e0018b
+	silk_screen_char = dimm_slot_info->slot1_dimm_silk_screen;
e0018b
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
e0018b
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot1_channel_id);
e0018b
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot1_spd_i2c_addr);
e0018b
+	fprintf(stdout, "DIMM SPD Index: 2\n");
e0018b
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot2_dimm_present);
e0018b
+	silk_screen_char = dimm_slot_info->slot2_dimm_silk_screen;
e0018b
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
e0018b
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot2_channel_id);
e0018b
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot2_spd_i2c_addr);
e0018b
+	fprintf(stdout, "DIMM SPD Index: 3\n");
e0018b
+	fprintf(stdout, "%*sDIMM Present: 0x%x\n", indent+2, "", dimm_slot_info->slot3_dimm_present);
e0018b
+	silk_screen_char = dimm_slot_info->slot3_dimm_silk_screen;
e0018b
+	fprintf(stdout, "%*sDIMM Silk Screen: %c\n", indent+2, "", silk_screen_char);
e0018b
+	fprintf(stdout, "%*sChannel ID: 0x%x\n", indent+2, "", dimm_slot_info->slot3_channel_id);
e0018b
+	fprintf(stdout, "%*sI2C Address: 0x%x\n", indent+2, "", dimm_slot_info->slot3_spd_i2c_addr);
e0018b
+
e0018b
+	fprintf(stdout, "\n\n");
e0018b
 out:
e0018b
 	cxl_cmd_unref(cmd);
e0018b
 	return rc;
e0018b
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
e0018b
index 0c24579..6583af5 100644
e0018b
--- a/cxl/libcxl.h
e0018b
+++ b/cxl/libcxl.h
e0018b
@@ -63,7 +63,7 @@ int cxl_memdev_transfer_fw(struct cxl_memdev *memdev, u8 action,
e0018b
 int cxl_memdev_activate_fw(struct cxl_memdev *memdev, u8 action,
e0018b
 	u8 slot);
e0018b
 int cxl_memdev_get_supported_logs(struct cxl_memdev *memdev);
e0018b
-int cxl_memdev_get_cel_log(struct cxl_memdev *memdev);
e0018b
+int cxl_memdev_get_cel_log(struct cxl_memdev *memdev, const char *uuid);
e0018b
 int cxl_memdev_get_event_interrupt_policy(struct cxl_memdev *memdev);
e0018b
 int cxl_memdev_set_event_interrupt_policy(struct cxl_memdev *memdev, u32 int_policy);
e0018b
 int cxl_memdev_get_timestamp(struct cxl_memdev *memdev);
e0018b
diff --git a/cxl/memdev.c b/cxl/memdev.c
e0018b
index 64ba7e0..fba1f75 100644
e0018b
--- a/cxl/memdev.c
e0018b
+++ b/cxl/memdev.c
e0018b
@@ -84,8 +84,17 @@ static const struct option cmd_get_supported_logs_options[] = {
e0018b
   OPT_END(),
e0018b
 };
e0018b
 
e0018b
+static struct _log_uuid {
e0018b
+	const char* uuid;
e0018b
+} log_uuid;
e0018b
+
e0018b
+#define LOG_UUID_OPTIONS() \
e0018b
+OPT_STRING('l', "log_uuid", &log_uuid.uuid, "log-uuid", \
e0018b
+  "CEL Log UUID")
e0018b
+
e0018b
 static const struct option cmd_get_cel_log_options[] = {
e0018b
   BASE_OPTIONS(),
e0018b
+  LOG_UUID_OPTIONS(),
e0018b
   OPT_END(),
e0018b
 };
e0018b
 
e0018b
@@ -2247,7 +2256,7 @@ static int action_cmd_get_cel_log(struct cxl_memdev *memdev, struct action_conte
e0018b
     return -EBUSY;
e0018b
   }
e0018b
 
e0018b
-  return cxl_memdev_get_cel_log(memdev);
e0018b
+  return cxl_memdev_get_cel_log(memdev, log_uuid.uuid);
e0018b
 }
e0018b
 
e0018b
 static int action_cmd_get_supported_logs(struct cxl_memdev *memdev, struct action_context *actx)