Blame SOURCES/github_d833432f_kpti_trampoline.patch

ec6d61
commit d833432f1ed2d7f507c05d3b6c3e6aa732c49e56
ec6d61
Author: Dave Anderson <anderson@redhat.com>
ec6d61
Date:   Fri Jan 19 14:17:53 2018 -0500
ec6d61
ec6d61
    Initial pass for support of kernel page table isolation.  The x86_64
ec6d61
    "bt" command may indicate "bt: cannot transition from exception stack
ec6d61
    to current process stack" if the crash callback NMI occurred while an
ec6d61
    active task was running on the new entry trampoline stack.  This has
ec6d61
    only been tested on the RHEL7 backport of the upstream patch because
ec6d61
    as of this commit, crash does not run on 4.15-rc kernels.  Further
ec6d61
    changes may be required for upstream kernels, and distributions that
ec6d61
    implement the kernel changes differently than upstream.
ec6d61
    (anderson@redhat.com)
ec6d61
ec6d61
diff --git a/defs.h b/defs.h
ec6d61
index dcd6c26..4d2fb2f 100644
ec6d61
--- a/defs.h
ec6d61
+++ b/defs.h
ec6d61
@@ -5769,6 +5769,8 @@ struct machine_specific {
ec6d61
 	ulong last_p4d_read;
ec6d61
 	struct ORC_data orc;
ec6d61
 	ulong irq_stack_gap;
ec6d61
+	ulong kpti_entry_stack;
ec6d61
+	ulong kpti_entry_stack_size;
ec6d61
 };
ec6d61
 
ec6d61
 #define KSYMS_START    (0x1)
ec6d61
@@ -5786,6 +5788,7 @@ struct machine_specific {
ec6d61
 #define RANDOMIZED  (0x1000)
ec6d61
 #define VM_5LEVEL   (0x2000)
ec6d61
 #define ORC         (0x4000)
ec6d61
+#define KPTI        (0x8000)
ec6d61
 
ec6d61
 #define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)
ec6d61
 
ec6d61
diff --git a/x86_64.c b/x86_64.c
ec6d61
index d8fade4..e924ca9 100644
ec6d61
--- a/x86_64.c
ec6d61
+++ b/x86_64.c
ec6d61
@@ -48,6 +48,7 @@ static void x86_64_back_trace_cmd(struct bt_info *);
ec6d61
 static ulong x86_64_in_exception_stack(struct bt_info *, int *);
ec6d61
 static ulong x86_64_in_irqstack(struct bt_info *);
ec6d61
 static int x86_64_in_alternate_stack(int, ulong);
ec6d61
+static ulong x86_64_in_kpti_entry_stack(int, ulong);
ec6d61
 static ulong __schedule_frame_adjust(ulong, struct bt_info *);
ec6d61
 static void x86_64_low_budget_back_trace_cmd(struct bt_info *);
ec6d61
 static void x86_64_dwarf_back_trace_cmd(struct bt_info *);
ec6d61
@@ -84,6 +85,7 @@ static void x86_64_cpu_pda_init(void);
ec6d61
 static void x86_64_per_cpu_init(void);
ec6d61
 static void x86_64_ist_init(void);
ec6d61
 static void x86_64_irq_stack_gap_init(void);
ec6d61
+static void x86_64_entry_trampoline_init(void);
ec6d61
 static void x86_64_post_init(void);
ec6d61
 static void parse_cmdline_args(void);
ec6d61
 static void x86_64_clear_machdep_cache(void);
ec6d61
@@ -641,6 +643,7 @@ x86_64_init(int when)
ec6d61
                 }
ec6d61
 		x86_64_irq_eframe_link_init();
ec6d61
 		x86_64_irq_stack_gap_init();
ec6d61
+		x86_64_entry_trampoline_init();
ec6d61
 		x86_64_framepointer_init();
ec6d61
 		x86_64_ORC_init();
ec6d61
 		x86_64_thread_return_init();
ec6d61
@@ -722,6 +725,8 @@ x86_64_dump_machdep_table(ulong arg)
ec6d61
 		fprintf(fp, "%sNESTED_NMI", others++ ? "|" : "");
ec6d61
 	if (machdep->flags & RANDOMIZED)
ec6d61
 		fprintf(fp, "%sRANDOMIZED", others++ ? "|" : "");
ec6d61
+	if (machdep->flags & KPTI)
ec6d61
+		fprintf(fp, "%sKPTI", others++ ? "|" : "");
ec6d61
         fprintf(fp, ")\n");
ec6d61
 
ec6d61
 	fprintf(fp, "             kvbase: %lx\n", machdep->kvbase);
ec6d61
@@ -973,7 +978,18 @@ x86_64_dump_machdep_table(ulong arg)
ec6d61
 			fprintf(fp, "\n   ");
ec6d61
 		fprintf(fp, "%016lx ", ms->stkinfo.ibase[c]);
ec6d61
 	}
ec6d61
-	fprintf(fp, "\n");
ec6d61
+	fprintf(fp, "\n                 kpti_entry_stack_size: %ld", ms->kpti_entry_stack_size);
ec6d61
+	fprintf(fp, "\n                      kpti_entry_stack: ");
ec6d61
+	if (machdep->flags & KPTI) {
ec6d61
+		fprintf(fp, "%lx\n   ", ms->kpti_entry_stack);
ec6d61
+		for (c = 0; c < cpus; c++) {
ec6d61
+			if (c && !(c%4))
ec6d61
+				fprintf(fp, "\n   ");
ec6d61
+			fprintf(fp, "%016lx ", ms->kpti_entry_stack + kt->__per_cpu_offset[c]);
ec6d61
+		}
ec6d61
+		fprintf(fp, "\n");
ec6d61
+	} else
ec6d61
+		fprintf(fp, "(unused)\n");
ec6d61
 }
ec6d61
 
ec6d61
 /*
ec6d61
@@ -3147,7 +3163,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
ec6d61
 	struct syment *sp, *spt;
ec6d61
 	FILE *ofp;
ec6d61
 	ulong estack, irqstack;
ec6d61
-	ulong irq_eframe;
ec6d61
+	ulong irq_eframe, kpti_eframe;
ec6d61
 	struct bt_info bt_local, *bt;
ec6d61
 	struct machine_specific *ms;
ec6d61
 	ulong last_process_stack_eframe;
ec6d61
@@ -3493,6 +3509,16 @@ in_exception_stack:
ec6d61
 		bt->stacktop = GET_STACKTOP(bt->tc->task);
ec6d61
 
ec6d61
 		if (!INSTACK(rsp, bt)) {
ec6d61
+			/*
ec6d61
+			 *  If the exception occurred while on the KPTI entry trampoline stack,
ec6d61
+			 *  just print the entry exception frame and bail out.
ec6d61
+			 */
ec6d61
+			if ((kpti_eframe = x86_64_in_kpti_entry_stack(bt->tc->processor, rsp))) {
ec6d61
+				x86_64_exception_frame(EFRAME_PRINT, kpti_eframe, 0, bt, ofp);
ec6d61
+				fprintf(fp, "--- <entry trampoline stack> ---\n");
ec6d61
+				return;
ec6d61
+			}
ec6d61
+
ec6d61
 			switch (bt->flags & (BT_EXCEPTION_STACK|BT_IRQSTACK))
ec6d61
 			{
ec6d61
 			case (BT_EXCEPTION_STACK|BT_IRQSTACK):
ec6d61
@@ -3720,7 +3746,7 @@ x86_64_dwarf_back_trace_cmd(struct bt_info *bt_in)
ec6d61
 	struct syment *sp;
ec6d61
 	FILE *ofp;
ec6d61
 	ulong estack, irqstack;
ec6d61
-	ulong irq_eframe;
ec6d61
+	ulong irq_eframe, kpti_eframe;
ec6d61
 	struct bt_info bt_local, *bt;
ec6d61
 	struct machine_specific *ms;
ec6d61
 	ulong last_process_stack_eframe;
ec6d61
@@ -3940,6 +3966,16 @@ in_exception_stack:
ec6d61
 		bt->stacktop = GET_STACKTOP(bt->tc->task);
ec6d61
 
ec6d61
 		if (!INSTACK(rsp, bt)) {
ec6d61
+			/*
ec6d61
+			 *  If the exception occurred while on the KPTI entry trampoline stack,
ec6d61
+			 *  just print the entry exception frame and bail out.
ec6d61
+			 */
ec6d61
+			if ((kpti_eframe = x86_64_in_kpti_entry_stack(bt->tc->processor, rsp))) {
ec6d61
+				x86_64_exception_frame(EFRAME_PRINT, kpti_eframe, 0, bt, ofp);
ec6d61
+				fprintf(fp, "--- <ENTRY TRAMPOLINE stack> ---\n");
ec6d61
+				return;
ec6d61
+			}
ec6d61
+
ec6d61
 			switch (bt->flags & (BT_EXCEPTION_STACK|BT_IRQSTACK))
ec6d61
 			{
ec6d61
 			case (BT_EXCEPTION_STACK|BT_IRQSTACK):
ec6d61
@@ -8661,4 +8697,71 @@ next_in_func:
ec6d61
 			goto next_in_func;
ec6d61
 }
ec6d61
 
ec6d61
+/*
ec6d61
+ *  KPTI entry stack initialization.  May vary signficantly
ec6d61
+ *  between upstream and distribution backports.
ec6d61
+ */
ec6d61
+static void 
ec6d61
+x86_64_entry_trampoline_init(void)
ec6d61
+{
ec6d61
+	struct machine_specific *ms;
ec6d61
+	struct syment *sp;
ec6d61
+
ec6d61
+	ms = machdep->machspec;
ec6d61
+
ec6d61
+	if (!kernel_symbol_exists("pti_init") &&
ec6d61
+	    !kernel_symbol_exists("kaiser_init"))
ec6d61
+		return;
ec6d61
+
ec6d61
+	/*
ec6d61
+	 *  4.15
ec6d61
+	 */
ec6d61
+	if (MEMBER_EXISTS("entry_stack", "words") && 
ec6d61
+	    MEMBER_EXISTS("entry_stack_page", "stack") &&
ec6d61
+	    (sp = per_cpu_symbol_search("per_cpu__entry_stack_storage"))) {
ec6d61
+		ms->kpti_entry_stack = sp->value + MEMBER_OFFSET("entry_stack_page", "stack");
ec6d61
+		ms->kpti_entry_stack_size = MEMBER_SIZE("entry_stack", "words");
ec6d61
+		machdep->flags |= KPTI;
ec6d61
+		return;
ec6d61
+	}
ec6d61
+
ec6d61
+	/* 
ec6d61
+	 *  RHEL
ec6d61
+	 */
ec6d61
+	if (MEMBER_EXISTS("tss_struct", "stack")) {
ec6d61
+		if (!(sp = per_cpu_symbol_search("per_cpu__init_tss")))
ec6d61
+			sp = per_cpu_symbol_search("per_cpu__cpu_tss");
ec6d61
+		ms->kpti_entry_stack = sp->value + MEMBER_OFFSET("tss_struct", "stack");
ec6d61
+		ms->kpti_entry_stack_size = MEMBER_SIZE("tss_struct", "stack");
ec6d61
+		machdep->flags |= KPTI;
ec6d61
+		return;
ec6d61
+	}
ec6d61
+}
ec6d61
+
ec6d61
+static ulong
ec6d61
+x86_64_in_kpti_entry_stack(int cpu, ulong rsp)
ec6d61
+{
ec6d61
+	ulong stack_base, stack_end;
ec6d61
+	struct machine_specific *ms;
ec6d61
+
ec6d61
+	if (!(machdep->flags & KPTI))
ec6d61
+		return 0;
ec6d61
+
ec6d61
+	ms = machdep->machspec;
ec6d61
+
ec6d61
+	if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF)) {
ec6d61
+		if (kt->__per_cpu_offset[cpu] == 0)
ec6d61
+			return 0;
ec6d61
+		stack_base = ms->kpti_entry_stack + kt->__per_cpu_offset[cpu];
ec6d61
+	} else
ec6d61
+		stack_base = ms->kpti_entry_stack; 
ec6d61
+
ec6d61
+	stack_end = stack_base + 
ec6d61
+		(ms->kpti_entry_stack_size > 0 ? ms->kpti_entry_stack_size : 512);
ec6d61
+
ec6d61
+	if ((rsp >= stack_base) && (rsp < stack_end))
ec6d61
+		return(stack_end - SIZE(pt_regs));
ec6d61
+
ec6d61
+	return 0;
ec6d61
+}
ec6d61
 #endif  /* X86_64 */