Blame SOURCES/kexec-tools-2.0.20-1-printk-add-support-for-lockless-ringbuffer.patch

c43617
From 4149df9005f2cdd2ecf70058dfe7d72f48c3a68c Mon Sep 17 00:00:00 2001
c43617
From: John Ogness <john.ogness@linutronix.de>
c43617
Date: Wed, 25 Nov 2020 23:26:59 +0106
c43617
Subject: [PATCH] printk: add support for lockless ringbuffer
c43617
c43617
Linux 5.10 moved to a new lockless ringbuffer. The new ringbuffer
c43617
is structured completely different to the previous iterations.
c43617
Add support for retrieving the ringbuffer using vmcoreinfo. The
c43617
new ringbuffer is detected based on the availability of the
c43617
"prb" symbol.
c43617
c43617
Signed-off-by: John Ogness <john.ogness@linutronix.de>
c43617
Signed-off-by: Simon Horman <horms@verge.net.au>
c43617
---
c43617
 util_lib/elf_info.c | 438 +++++++++++++++++++++++++++++++++++++++++++-
c43617
 1 file changed, 437 insertions(+), 1 deletion(-)
c43617
c43617
diff --git a/util_lib/elf_info.c b/util_lib/elf_info.c
c43617
index 7803a94..2f23a44 100644
c43617
--- a/util_lib/elf_info.c
c43617
+++ b/util_lib/elf_info.c
c43617
@@ -27,6 +27,32 @@ static int num_pt_loads;
c43617
 
c43617
 static char osrelease[4096];
c43617
 
c43617
+/* VMCOREINFO symbols for lockless printk ringbuffer */
c43617
+static loff_t prb_vaddr;
c43617
+static size_t printk_ringbuffer_sz;
c43617
+static size_t prb_desc_sz;
c43617
+static size_t printk_info_sz;
c43617
+static uint64_t printk_ringbuffer_desc_ring_offset;
c43617
+static uint64_t printk_ringbuffer_text_data_ring_offset;
c43617
+static uint64_t prb_desc_ring_count_bits_offset;
c43617
+static uint64_t prb_desc_ring_descs_offset;
c43617
+static uint64_t prb_desc_ring_infos_offset;
c43617
+static uint64_t prb_data_ring_size_bits_offset;
c43617
+static uint64_t prb_data_ring_data_offset;
c43617
+static uint64_t prb_desc_ring_head_id_offset;
c43617
+static uint64_t prb_desc_ring_tail_id_offset;
c43617
+static uint64_t atomic_long_t_counter_offset;
c43617
+static uint64_t prb_desc_state_var_offset;
c43617
+static uint64_t prb_desc_info_offset;
c43617
+static uint64_t prb_desc_text_blk_lpos_offset;
c43617
+static uint64_t prb_data_blk_lpos_begin_offset;
c43617
+static uint64_t prb_data_blk_lpos_next_offset;
c43617
+static uint64_t printk_info_seq_offset;
c43617
+static uint64_t printk_info_caller_id_offset;
c43617
+static uint64_t printk_info_ts_nsec_offset;
c43617
+static uint64_t printk_info_level_offset;
c43617
+static uint64_t printk_info_text_len_offset;
c43617
+
c43617
 static loff_t log_buf_vaddr;
c43617
 static loff_t log_end_vaddr;
c43617
 static loff_t log_buf_len_vaddr;
c43617
@@ -304,6 +330,7 @@ void scan_vmcoreinfo(char *start, size_t size)
c43617
 		size_t len;
c43617
 		loff_t *vaddr;
c43617
 	} symbol[] = {
c43617
+		SYMBOL(prb),
c43617
 		SYMBOL(log_buf),
c43617
 		SYMBOL(log_end),
c43617
 		SYMBOL(log_buf_len),
c43617
@@ -361,6 +388,119 @@ void scan_vmcoreinfo(char *start, size_t size)
c43617
 			*symbol[i].vaddr = vaddr;
c43617
 		}
c43617
 
c43617
+		str = "SIZE(printk_ringbuffer)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_ringbuffer_sz = strtoull(pos + strlen(str),
c43617
+							NULL, 10);
c43617
+
c43617
+		str = "SIZE(prb_desc)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_sz = strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "SIZE(printk_info)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_sz = strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_ringbuffer.desc_ring)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_ringbuffer_desc_ring_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_ringbuffer.text_data_ring)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_ringbuffer_text_data_ring_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc_ring.count_bits)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_ring_count_bits_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc_ring.descs)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_ring_descs_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc_ring.infos)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_ring_infos_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_data_ring.size_bits)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_data_ring_size_bits_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_data_ring.data)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_data_ring_data_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc_ring.head_id)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_ring_head_id_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc_ring.tail_id)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_ring_tail_id_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(atomic_long_t.counter)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			atomic_long_t_counter_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc.state_var)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_state_var_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc.info)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_info_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_desc.text_blk_lpos)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_desc_text_blk_lpos_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_data_blk_lpos.begin)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_data_blk_lpos_begin_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(prb_data_blk_lpos.next)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			prb_data_blk_lpos_next_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_info.seq)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_seq_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_info.caller_id)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_caller_id_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_info.ts_nsec)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_ts_nsec_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_info.level)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_level_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
+		str = "OFFSET(printk_info.text_len)=";
c43617
+		if (memcmp(str, pos, strlen(str)) == 0)
c43617
+			printk_info_text_len_offset =
c43617
+				strtoull(pos + strlen(str), NULL, 10);
c43617
+
c43617
 		/* Check for "SIZE(printk_log)" or older "SIZE(log)=" */
c43617
 		str = "SIZE(log)=";
c43617
 		if (memcmp(str, pos, strlen(str)) == 0)
c43617
@@ -746,9 +886,305 @@ static void dump_dmesg_structured(int fd, void (*handler)(char*, unsigned int))
c43617
 		handler(out_buf, len);
c43617
 }
c43617
 
c43617
+/* convenience struct for passing many values to helper functions */
c43617
+struct prb_map {
c43617
+	char		*prb;
c43617
+
c43617
+	char		*desc_ring;
c43617
+	unsigned long	desc_ring_count;
c43617
+	char		*descs;
c43617
+
c43617
+	char		*infos;
c43617
+
c43617
+	char		*text_data_ring;
c43617
+	unsigned long	text_data_ring_size;
c43617
+	char		*text_data;
c43617
+};
c43617
+
c43617
+/*
c43617
+ * desc_state and DESC_* definitions taken from kernel source:
c43617
+ *
c43617
+ * kernel/printk/printk_ringbuffer.h
c43617
+ *
c43617
+ * DESC_* definitions modified to provide 32-bit and 64-bit variants.
c43617
+ */
c43617
+
c43617
+/* The possible responses of a descriptor state-query. */
c43617
+enum desc_state {
c43617
+	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
c43617
+	desc_reserved	= 0x0,	/* reserved, in use by writer */
c43617
+	desc_committed	= 0x1,	/* committed by writer, could get reopened */
c43617
+	desc_finalized	= 0x2,	/* committed, no further modification allowed */
c43617
+	desc_reusable	= 0x3,	/* free, not yet used by any writer */
c43617
+};
c43617
+
c43617
+#define DESC_SV_BITS		(sizeof(uint64_t) * 8)
c43617
+#define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
c43617
+#define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
c43617
+#define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
c43617
+#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
c43617
+#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
c43617
+
c43617
+#define DESC32_SV_BITS		(sizeof(uint32_t) * 8)
c43617
+#define DESC32_FLAGS_SHIFT	(DESC32_SV_BITS - 2)
c43617
+#define DESC32_FLAGS_MASK	(3UL << DESC32_FLAGS_SHIFT)
c43617
+#define DESC32_STATE(sv)	(3UL & (sv >> DESC32_FLAGS_SHIFT))
c43617
+#define DESC32_ID_MASK		(~DESC32_FLAGS_MASK)
c43617
+#define DESC32_ID(sv)		((sv) & DESC32_ID_MASK)
c43617
+
c43617
+/*
c43617
+ * get_desc_state() taken from kernel source:
c43617
+ *
c43617
+ * kernel/printk/printk_ringbuffer.c
c43617
+ *
c43617
+ * get_desc32_state() added as 32-bit variant.
c43617
+ */
c43617
+
c43617
+/* Query the state of a descriptor. */
c43617
+static enum desc_state get_desc_state(unsigned long id,
c43617
+				      uint64_t state_val)
c43617
+{
c43617
+	if (id != DESC_ID(state_val))
c43617
+		return desc_miss;
c43617
+
c43617
+	return DESC_STATE(state_val);
c43617
+}
c43617
+
c43617
+static enum desc_state get_desc32_state(unsigned long id,
c43617
+					uint64_t state_val)
c43617
+{
c43617
+	if (id != DESC32_ID(state_val))
c43617
+		return desc_miss;
c43617
+
c43617
+	return DESC32_STATE(state_val);
c43617
+}
c43617
+
c43617
+static bool record_committed(unsigned long id, uint64_t state_var)
c43617
+{
c43617
+	enum desc_state state;
c43617
+
c43617
+	if (machine_pointer_bits() == 32)
c43617
+		state = get_desc32_state(id, state_var);
c43617
+	else
c43617
+		state = get_desc_state(id, state_var);
c43617
+
c43617
+	return (state == desc_committed || state == desc_finalized);
c43617
+}
c43617
+
c43617
+static uint64_t id_inc(uint64_t id)
c43617
+{
c43617
+	id++;
c43617
+
c43617
+	if (machine_pointer_bits() == 32)
c43617
+		return (id & DESC32_ID_MASK);
c43617
+
c43617
+	return (id & DESC_ID_MASK);
c43617
+}
c43617
+
c43617
+static uint64_t get_ulong(char *addr)
c43617
+{
c43617
+	if (machine_pointer_bits() == 32)
c43617
+		return struct_val_u32(addr, 0);
c43617
+	return struct_val_u64(addr, 0);
c43617
+}
c43617
+
c43617
+static uint64_t sizeof_ulong(void)
c43617
+{
c43617
+	return (machine_pointer_bits() >> 3);
c43617
+}
c43617
+
c43617
+static void dump_record(struct prb_map *m, unsigned long id,
c43617
+			void (*handler)(char*, unsigned int))
c43617
+{
c43617
+#define OUT_BUF_SIZE	4096
c43617
+	char out_buf[OUT_BUF_SIZE];
c43617
+	imaxdiv_t imaxdiv_usec;
c43617
+	imaxdiv_t imaxdiv_sec;
c43617
+	uint32_t offset = 0;
c43617
+	unsigned short len;
c43617
+	uint64_t state_var;
c43617
+	uint64_t ts_nsec;
c43617
+	uint64_t begin;
c43617
+	uint64_t next;
c43617
+	char *info;
c43617
+	char *text;
c43617
+	char *desc;
c43617
+	int i;
c43617
+
c43617
+	desc = m->descs + ((id % m->desc_ring_count) * prb_desc_sz);
c43617
+	info = m->infos + ((id % m->desc_ring_count) * printk_info_sz);
c43617
+
c43617
+	/* skip non-committed record */
c43617
+	state_var = get_ulong(desc + prb_desc_state_var_offset +
c43617
+					atomic_long_t_counter_offset);
c43617
+	if (!record_committed(id, state_var))
c43617
+		return;
c43617
+
c43617
+	begin = get_ulong(desc + prb_desc_text_blk_lpos_offset +
c43617
+			  prb_data_blk_lpos_begin_offset) %
c43617
+		m->text_data_ring_size;
c43617
+	next = get_ulong(desc + prb_desc_text_blk_lpos_offset +
c43617
+			 prb_data_blk_lpos_next_offset) %
c43617
+	       m->text_data_ring_size;
c43617
+
c43617
+	ts_nsec = struct_val_u64(info, printk_info_ts_nsec_offset);
c43617
+	imaxdiv_sec = imaxdiv(ts_nsec, 1000000000);
c43617
+	imaxdiv_usec = imaxdiv(imaxdiv_sec.rem, 1000);
c43617
+
c43617
+	offset += sprintf(out_buf + offset, "[%5llu.%06llu] ",
c43617
+		(long long unsigned int)imaxdiv_sec.quot,
c43617
+		(long long unsigned int)imaxdiv_usec.quot);
c43617
+
c43617
+	/* skip data-less text blocks */
c43617
+	if (begin == next)
c43617
+		goto out;
c43617
+
c43617
+	len = struct_val_u16(info, printk_info_text_len_offset);
c43617
+
c43617
+	/* handle wrapping data block */
c43617
+	if (begin > next)
c43617
+		begin = 0;
c43617
+
c43617
+	/* skip over descriptor ID */
c43617
+	begin += sizeof_ulong();
c43617
+
c43617
+	/* handle truncated messages */
c43617
+	if (next - begin < len)
c43617
+		len = next - begin;
c43617
+
c43617
+	text = m->text_data + begin;
c43617
+
c43617
+	/* escape non-printable characters */
c43617
+	for (i = 0; i < len; i++) {
c43617
+		unsigned char c = text[i];
c43617
+
c43617
+		if (!isprint(c) && !isspace(c))
c43617
+			offset += sprintf(out_buf + offset, "\\x%02x", c);
c43617
+		else
c43617
+			out_buf[offset++] = c;
c43617
+
c43617
+		if (offset >= OUT_BUF_SIZE - 64) {
c43617
+			if (handler)
c43617
+				handler(out_buf, offset);
c43617
+			offset = 0;
c43617
+		}
c43617
+	}
c43617
+out:
c43617
+	out_buf[offset++] = '\n';
c43617
+
c43617
+	if (offset && handler)
c43617
+		handler(out_buf, offset);
c43617
+}
c43617
+
c43617
+/*
c43617
+ *  Handle the lockless printk_ringbuffer.
c43617
+ */
c43617
+static void dump_dmesg_lockless(int fd, void (*handler)(char*, unsigned int))
c43617
+{
c43617
+	struct prb_map m;
c43617
+	uint64_t head_id;
c43617
+	uint64_t tail_id;
c43617
+	uint64_t kaddr;
c43617
+	uint64_t id;
c43617
+	int ret;
c43617
+
c43617
+	/* setup printk_ringbuffer */
c43617
+	kaddr = read_file_pointer(fd, vaddr_to_offset(prb_vaddr));
c43617
+	m.prb = calloc(1, printk_ringbuffer_sz);
c43617
+	if (!m.prb) {
c43617
+		fprintf(stderr, "Failed to malloc %lu bytes for prb: %s\n",
c43617
+			printk_ringbuffer_sz, strerror(errno));
c43617
+		exit(64);
c43617
+	}
c43617
+	ret = pread(fd, m.prb, printk_ringbuffer_sz, vaddr_to_offset(kaddr));
c43617
+	if (ret != printk_ringbuffer_sz) {
c43617
+		fprintf(stderr, "Failed to read prb of size %lu bytes: %s\n",
c43617
+			printk_ringbuffer_sz, strerror(errno));
c43617
+		exit(65);
c43617
+	}
c43617
+
c43617
+	/* setup descriptor ring */
c43617
+	m.desc_ring = m.prb + printk_ringbuffer_desc_ring_offset;
c43617
+	m.desc_ring_count = 1 << struct_val_u32(m.desc_ring,
c43617
+					prb_desc_ring_count_bits_offset);
c43617
+	kaddr = get_ulong(m.desc_ring + prb_desc_ring_descs_offset);
c43617
+	m.descs = calloc(1, prb_desc_sz * m.desc_ring_count);
c43617
+	if (!m.descs) {
c43617
+		fprintf(stderr, "Failed to malloc %lu bytes for descs: %s\n",
c43617
+			prb_desc_sz * m.desc_ring_count, strerror(errno));
c43617
+		exit(64);
c43617
+	}
c43617
+	ret = pread(fd, m.descs, prb_desc_sz * m.desc_ring_count,
c43617
+		    vaddr_to_offset(kaddr));
c43617
+	if (ret != prb_desc_sz * m.desc_ring_count) {
c43617
+		fprintf(stderr,
c43617
+			"Failed to read descs of size %lu bytes: %s\n",
c43617
+			prb_desc_sz * m.desc_ring_count, strerror(errno));
c43617
+		exit(65);
c43617
+	}
c43617
+
c43617
+	/* setup info ring */
c43617
+	kaddr = get_ulong(m.prb + prb_desc_ring_infos_offset);
c43617
+	m.infos = calloc(1, printk_info_sz * m.desc_ring_count);
c43617
+	if (!m.infos) {
c43617
+		fprintf(stderr, "Failed to malloc %lu bytes for infos: %s\n",
c43617
+			printk_info_sz * m.desc_ring_count, strerror(errno));
c43617
+		exit(64);
c43617
+	}
c43617
+	ret = pread(fd, m.infos, printk_info_sz * m.desc_ring_count,
c43617
+		    vaddr_to_offset(kaddr));
c43617
+	if (ret != printk_info_sz * m.desc_ring_count) {
c43617
+		fprintf(stderr,
c43617
+			"Failed to read infos of size %lu bytes: %s\n",
c43617
+			printk_info_sz * m.desc_ring_count, strerror(errno));
c43617
+		exit(65);
c43617
+	}
c43617
+
c43617
+	/* setup text data ring */
c43617
+	m.text_data_ring = m.prb + printk_ringbuffer_text_data_ring_offset;
c43617
+	m.text_data_ring_size = 1 << struct_val_u32(m.text_data_ring,
c43617
+					prb_data_ring_size_bits_offset);
c43617
+	kaddr = get_ulong(m.text_data_ring + prb_data_ring_data_offset);
c43617
+	m.text_data = calloc(1, m.text_data_ring_size);
c43617
+	if (!m.text_data) {
c43617
+		fprintf(stderr,
c43617
+			"Failed to malloc %lu bytes for text_data: %s\n",
c43617
+			m.text_data_ring_size, strerror(errno));
c43617
+		exit(64);
c43617
+	}
c43617
+	ret = pread(fd, m.text_data, m.text_data_ring_size,
c43617
+		    vaddr_to_offset(kaddr));
c43617
+	if (ret != m.text_data_ring_size) {
c43617
+		fprintf(stderr,
c43617
+			"Failed to read text_data of size %lu bytes: %s\n",
c43617
+			m.text_data_ring_size, strerror(errno));
c43617
+		exit(65);
c43617
+	}
c43617
+
c43617
+	/* ready to go */
c43617
+
c43617
+	tail_id = get_ulong(m.desc_ring + prb_desc_ring_tail_id_offset +
c43617
+						atomic_long_t_counter_offset);
c43617
+	head_id = get_ulong(m.desc_ring + prb_desc_ring_head_id_offset +
c43617
+						atomic_long_t_counter_offset);
c43617
+
c43617
+	for (id = tail_id; id != head_id; id = id_inc(id))
c43617
+		dump_record(&m, id, handler);
c43617
+
c43617
+	/* dump head record */
c43617
+	dump_record(&m, id, handler);
c43617
+
c43617
+	free(m.text_data);
c43617
+	free(m.infos);
c43617
+	free(m.descs);
c43617
+	free(m.prb);
c43617
+}
c43617
+
c43617
 void dump_dmesg(int fd, void (*handler)(char*, unsigned int))
c43617
 {
c43617
-	if (log_first_idx_vaddr)
c43617
+	if (prb_vaddr)
c43617
+		dump_dmesg_lockless(fd, handler);
c43617
+	else if (log_first_idx_vaddr)
c43617
 		dump_dmesg_structured(fd, handler);
c43617
 	else
c43617
 		dump_dmesg_legacy(fd, handler);
c43617
-- 
c43617
2.31.1
c43617