Blame SOURCES/kexec-tools-2.0.15-makedumpfile-sadump-Fix-a-KASLR-problem-of-sadump.patch

06c2a2
From e1ac694b94ebfa7204c5b1fac1a87d204b48f5b4 Mon Sep 17 00:00:00 2001
06c2a2
From: Takao Indoh <indou.takao@jp.fujitsu.com>
06c2a2
Date: Thu, 26 Oct 2017 20:32:54 +0900
06c2a2
Subject: [PATCH 3/4] [PATCH v3 3/4] sadump: Fix a KASLR problem of sadump
06c2a2
06c2a2
This patch fix a problem that makedumpfile cannot handle a dumpfile
06c2a2
which is captured by sadump in KASLR enabled kernel.
06c2a2
06c2a2
When KASLR feature is enabled, a kernel is placed on the memory randomly
06c2a2
and therefore makedumpfile cannot handle a dumpfile captured by sadump
06c2a2
because addresses of kernel symbols in System.map or vmlinux are
06c2a2
different from actual addresses.
06c2a2
06c2a2
To solve this problem, we need to calculate kaslr offset(the difference
06c2a2
between original symbol address and actual address) and phys_base, and
06c2a2
adjust symbol table of makedumpfile. In the case of dumpfile of kdump,
06c2a2
these information is included in the header, but dumpfile of sadump does
06c2a2
not have such a information.
06c2a2
06c2a2
This patch calculate kaslr offset and phys_base to solve this problem.
06c2a2
Please see the comment in the calc_kaslr_offset() for the detail idea.
06c2a2
The basic idea is getting register (IDTR and CR3) from dump header, and
06c2a2
calculate kaslr_offset/phys_base using them.
06c2a2
06c2a2
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
06c2a2
---
06c2a2
 makedumpfile.c |  10 ++++
06c2a2
 makedumpfile.h |   5 +-
06c2a2
 sadump_info.c  | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
06c2a2
 3 files changed, 155 insertions(+), 3 deletions(-)
06c2a2
06c2a2
diff --git a/makedumpfile.c b/makedumpfile.c
06c2a2
index 5f2ca7d0fbc8..41438a344574 100644
06c2a2
--- a/makedumpfile-1.6.2/makedumpfile.c
06c2a2
+++ b/makedumpfile-1.6.2/makedumpfile.c
06c2a2
@@ -1554,6 +1554,9 @@ get_symbol_info(void)
06c2a2
 	SYMBOL_INIT(demote_segment_4k, "demote_segment_4k");
06c2a2
 	SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec");
06c2a2
 
06c2a2
+	SYMBOL_INIT(divide_error, "divide_error");
06c2a2
+	SYMBOL_INIT(idt_table, "idt_table");
06c2a2
+
06c2a2
 	return TRUE;
06c2a2
 }
06c2a2
 
06c2a2
@@ -2249,6 +2252,13 @@ write_vmcoreinfo_data(void)
06c2a2
 	WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
06c2a2
 #endif
06c2a2
 
06c2a2
+	if (info->phys_base)
06c2a2
+		fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"),
06c2a2
+			info->phys_base);
06c2a2
+	if (info->kaslr_offset)
06c2a2
+		fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET,
06c2a2
+			info->kaslr_offset);
06c2a2
+
06c2a2
 	/*
06c2a2
 	 * write the source file of 1st kernel
06c2a2
 	 */
06c2a2
diff --git a/makedumpfile.h b/makedumpfile.h
06c2a2
index f48dc0b82d4a..5f814e7bc3c1 100644
06c2a2
--- a/makedumpfile-1.6.2/makedumpfile.h
06c2a2
+++ b/makedumpfile-1.6.2/makedumpfile.h
06c2a2
@@ -45,6 +45,7 @@
06c2a2
 #include "sadump_mod.h"
06c2a2
 #include <pthread.h>
06c2a2
 #include <semaphore.h>
06c2a2
+#include <inttypes.h>
06c2a2
 
06c2a2
 #define VMEMMAPSTART 0xffffea0000000000UL
06c2a2
 #define BITS_PER_WORD 64
06c2a2
@@ -1599,6 +1600,8 @@ struct symbol_table {
06c2a2
 	unsigned long long	cpu_online_mask;
06c2a2
 	unsigned long long	__cpu_online_mask;
06c2a2
 	unsigned long long	kexec_crash_image;
06c2a2
+	unsigned long long	divide_error;
06c2a2
+	unsigned long long	idt_table;
06c2a2
 
06c2a2
 	/*
06c2a2
 	 * symbols on ppc64 arch
06c2a2
@@ -1960,7 +1963,7 @@ int iomem_for_each_line(char *match, int (*callback)(void *data, int nr,
06c2a2
 						     unsigned long length),
06c2a2
 			void *data);
06c2a2
 int is_bigendian(void);
06c2a2
-
06c2a2
+int get_symbol_info(void);
06c2a2
 
06c2a2
 /*
06c2a2
  * for Xen extraction
06c2a2
diff --git a/sadump_info.c b/sadump_info.c
06c2a2
index 7dd22e704234..29ccef881370 100644
06c2a2
--- a/makedumpfile-1.6.2/sadump_info.c
06c2a2
+++ b/makedumpfile-1.6.2/sadump_info.c
06c2a2
@@ -1035,6 +1035,138 @@ sadump_get_max_mapnr(void)
06c2a2
 
06c2a2
 #ifdef __x86_64__
06c2a2
 
06c2a2
+/*
06c2a2
+ * Get address of vector0 interrupt handler (Devide Error) form Interrupt
06c2a2
+ * Descriptor Table.
06c2a2
+ */
06c2a2
+static unsigned long
06c2a2
+get_vec0_addr(ulong idtr)
06c2a2
+{
06c2a2
+	struct gate_struct64 {
06c2a2
+		uint16_t offset_low;
06c2a2
+		uint16_t segment;
06c2a2
+		uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
06c2a2
+		uint16_t offset_middle;
06c2a2
+		uint32_t offset_high;
06c2a2
+		uint32_t zero1;
06c2a2
+	} __attribute__((packed)) gate;
06c2a2
+
06c2a2
+	readmem(PADDR, idtr, &gate, sizeof(gate));
06c2a2
+
06c2a2
+	return ((ulong)gate.offset_high << 32)
06c2a2
+		+ ((ulong)gate.offset_middle << 16)
06c2a2
+		+ gate.offset_low;
06c2a2
+}
06c2a2
+
06c2a2
+/*
06c2a2
+ * Calculate kaslr_offset and phys_base
06c2a2
+ *
06c2a2
+ * kaslr_offset:
06c2a2
+ *   The difference between original address in vmlinux and actual address
06c2a2
+ *   placed randomly by kaslr feature. To be more accurate,
06c2a2
+ *   kaslr_offset = actual address  - original address
06c2a2
+ *
06c2a2
+ * phys_base:
06c2a2
+ *   Physical address where the kerenel is placed. In other words, it's a
06c2a2
+ *   physical address of __START_KERNEL_map. This is also decided randomly by
06c2a2
+ *   kaslr.
06c2a2
+ *
06c2a2
+ * kaslr offset and phys_base are calculated as follows:
06c2a2
+ *
06c2a2
+ * kaslr_offset:
06c2a2
+ * 1) Get IDTR and CR3 value from the dump header.
06c2a2
+ * 2) Get a virtual address of IDT from IDTR value
06c2a2
+ *    --- (A)
06c2a2
+ * 3) Translate (A) to physical address using CR3, which points a top of
06c2a2
+ *    page table.
06c2a2
+ *    --- (B)
06c2a2
+ * 4) Get an address of vector0 (Devide Error) interrupt handler from
06c2a2
+ *    IDT, which are pointed by (B).
06c2a2
+ *    --- (C)
06c2a2
+ * 5) Get an address of symbol "divide_error" form vmlinux
06c2a2
+ *    --- (D)
06c2a2
+ *
06c2a2
+ * Now we have two addresses:
06c2a2
+ * (C)-> Actual address of "divide_error"
06c2a2
+ * (D)-> Original address of "divide_error" in the vmlinux
06c2a2
+ *
06c2a2
+ * kaslr_offset can be calculated by the difference between these two
06c2a2
+ * value.
06c2a2
+ *
06c2a2
+ * phys_base;
06c2a2
+ * 1) Get IDT virtual address from vmlinux
06c2a2
+ *    --- (E)
06c2a2
+ *
06c2a2
+ * So phys_base can be calculated using relationship of directly mapped
06c2a2
+ * address.
06c2a2
+ *
06c2a2
+ * phys_base =
06c2a2
+ *   Physical address(B) -
06c2a2
+ *   (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
06c2a2
+ *
06c2a2
+ * Note that the address (A) cannot be used instead of (E) because (A) is
06c2a2
+ * not direct map address, it's a fixed map address.
06c2a2
+ */
06c2a2
+int
06c2a2
+calc_kaslr_offset(void)
06c2a2
+{
06c2a2
+	struct sadump_header *sh = si->sh_memory;
06c2a2
+	uint64_t idtr = 0, cr3 = 0, idtr_paddr;
06c2a2
+	struct sadump_smram_cpu_state smram, zero;
06c2a2
+	int apicid;
06c2a2
+	unsigned long divide_error_vmcore, divide_error_vmlinux;
06c2a2
+	unsigned long kaslr_offset, phys_base;
06c2a2
+
06c2a2
+	memset(&zero, 0, sizeof(zero));
06c2a2
+	for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
06c2a2
+		if (!get_smram_cpu_state(apicid, &smram)) {
06c2a2
+			ERRMSG("get_smram_cpu_state error\n");
06c2a2
+			return FALSE;
06c2a2
+		}
06c2a2
+
06c2a2
+		if (memcmp(&smram, &zero, sizeof(smram)) != 0)
06c2a2
+			break;
06c2a2
+	}
06c2a2
+	if (apicid >= sh->nr_cpus) {
06c2a2
+		ERRMSG("Can't get smram state\n");
06c2a2
+		return FALSE;
06c2a2
+	}
06c2a2
+
06c2a2
+	idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower;
06c2a2
+	cr3 = smram.Cr3;
06c2a2
+
06c2a2
+	/* Convert virtual address of IDT table to physical address */
06c2a2
+	if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR)
06c2a2
+		return FALSE;
06c2a2
+
06c2a2
+	/* Now we can calculate kaslr_offset and phys_base */
06c2a2
+	divide_error_vmlinux = SYMBOL(divide_error);
06c2a2
+	divide_error_vmcore = get_vec0_addr(idtr_paddr);
06c2a2
+	kaslr_offset = divide_error_vmcore - divide_error_vmlinux;
06c2a2
+	phys_base = idtr_paddr -
06c2a2
+		(SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map);
06c2a2
+
06c2a2
+	info->kaslr_offset = kaslr_offset;
06c2a2
+	info->phys_base = phys_base;
06c2a2
+
06c2a2
+	DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr);
06c2a2
+	DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3);
06c2a2
+	DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr);
06c2a2
+	DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n",
06c2a2
+		divide_error_vmlinux);
06c2a2
+	DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n",
06c2a2
+		divide_error_vmcore);
06c2a2
+
06c2a2
+	/* Reload symbol */
06c2a2
+	if (!get_symbol_info())
06c2a2
+		return FALSE;
06c2a2
+
06c2a2
+	DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset);
06c2a2
+	DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base);
06c2a2
+
06c2a2
+	return TRUE;
06c2a2
+}
06c2a2
+
06c2a2
 int
06c2a2
 sadump_virt_phys_base(void)
06c2a2
 {
06c2a2
@@ -1065,6 +1197,9 @@ sadump_virt_phys_base(void)
06c2a2
 	}
06c2a2
 
06c2a2
 failed:
06c2a2
+	if (calc_kaslr_offset())
06c2a2
+		return TRUE;
06c2a2
+
06c2a2
 	info->phys_base = 0;
06c2a2
 
06c2a2
 	DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n");
06c2a2
@@ -1518,10 +1653,14 @@ cpu_to_apicid(int cpu, int *apicid)
06c2a2
 		if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr),
06c2a2
 			     &early_ptr, sizeof(early_ptr)))
06c2a2
 			return FALSE;
06c2a2
-
06c2a2
+		/*
06c2a2
+		 * Note: SYMBOL(name) value is adjusted by info->kaslr_offset,
06c2a2
+		 * but per_cpu symbol does not need to be adjusted becasue it
06c2a2
+		 * is not affected by kaslr.
06c2a2
+		 */
06c2a2
 		apicid_addr = early_ptr
06c2a2
 			? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t)
06c2a2
-			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid), cpu);
06c2a2
+			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu);
06c2a2
 
06c2a2
 		if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t)))
06c2a2
 			return FALSE;
06c2a2
-- 
06c2a2
2.5.5
06c2a2