Blame SOURCES/0007-bt-x86_64-filter-out-idle-task-stack.patch

1a20ba
From 6833262bf87177d8affe4f91b2e7d2c76ecdf636 Mon Sep 17 00:00:00 2001
1a20ba
From: Qi Zheng <zhengqi.arch@bytedance.com>
1a20ba
Date: Tue, 24 May 2022 20:25:53 +0800
1a20ba
Subject: [PATCH 07/18] bt: x86_64: filter out idle task stack
1a20ba
1a20ba
When we use crash to troubleshoot softlockup and other problems,
1a20ba
we often use the 'bt -a' command to print the stacks of running
1a20ba
processes on all CPUs. But now some servers have hundreds of CPUs
1a20ba
(such as AMD machines), which causes the 'bt -a' command to output
1a20ba
a lot of process stacks. And many of these stacks are the stacks
1a20ba
of the idle process, which are not needed by us.
1a20ba
1a20ba
Therefore, in order to reduce this part of the interference information,
1a20ba
this patch adds the -n option to the bt command. When we specify
1a20ba
'-n idle' (meaning no idle), the stack of the idle process will be
1a20ba
filtered out, thus speeding up our troubleshooting.
1a20ba
1a20ba
And the option works only for crash dumps captured by kdump.
1a20ba
1a20ba
The command output is as follows:
1a20ba
crash> bt -a -n idle
1a20ba
[...]
1a20ba
PID: 0      TASK: ffff889ff8c34380  CPU: 8   COMMAND: "swapper/8"
1a20ba
1a20ba
PID: 0      TASK: ffff889ff8c32d00  CPU: 9   COMMAND: "swapper/9"
1a20ba
1a20ba
PID: 0      TASK: ffff889ff8c31680  CPU: 10  COMMAND: "swapper/10"
1a20ba
1a20ba
PID: 0      TASK: ffff889ff8c35a00  CPU: 11  COMMAND: "swapper/11"
1a20ba
1a20ba
PID: 0      TASK: ffff889ff8c3c380  CPU: 12  COMMAND: "swapper/12"
1a20ba
1a20ba
PID: 150773  TASK: ffff889fe85a1680  CPU: 13  COMMAND: "bash"
1a20ba
 #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
1a20ba
 #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
1a20ba
 #2 [ffffc9000d35bdf0] panic at ffffffff81081930
1a20ba
 #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
1a20ba
 #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
1a20ba
 #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
1a20ba
 #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
1a20ba
 #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
1a20ba
 #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
1a20ba
 #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
1a20ba
    RIP: 00007fa7abcdc274  RSP: 00007fffa731f678  RFLAGS: 00000246
1a20ba
    RAX: ffffffffffffffda  RBX: 0000000000000002  RCX: 00007fa7abcdc274
1a20ba
    RDX: 0000000000000002  RSI: 0000563ca51ee6d0  RDI: 0000000000000001
1a20ba
    RBP: 0000563ca51ee6d0   R8: 000000000000000a   R9: 00007fa7abd6be80
1a20ba
    R10: 000000000000000a  R11: 0000000000000246  R12: 00007fa7abdad760
1a20ba
    R13: 0000000000000002  R14: 00007fa7abda8760  R15: 0000000000000002
1a20ba
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
1a20ba
[...]
1a20ba
1a20ba
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
1a20ba
Acked-by: Kazuhito Hagio <k-hagio-ab@nec.com>
1a20ba
Acked-by: Lianbo Jiang <lijiang@redhat.com>
1a20ba
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
1a20ba
---
1a20ba
 defs.h   |  1 +
1a20ba
 help.c   | 33 ++++++++++++++++++++++++++++++++-
1a20ba
 kernel.c | 13 ++++++++++++-
1a20ba
 x86_64.c |  8 ++++++++
1a20ba
 4 files changed, 53 insertions(+), 2 deletions(-)
1a20ba
1a20ba
diff --git a/defs.h b/defs.h
1a20ba
index ecbced24d2e3..c8444b4e54eb 100644
1a20ba
--- a/defs.h
1a20ba
+++ b/defs.h
1a20ba
@@ -5832,6 +5832,7 @@ ulong cpu_map_addr(const char *type);
1a20ba
 #define BT_SHOW_ALL_REGS  (0x2000000000000ULL)
1a20ba
 #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
1a20ba
 #define BT_OVERFLOW_STACK (0x8000000000000ULL)
1a20ba
+#define BT_SKIP_IDLE     (0x10000000000000ULL)
1a20ba
 #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)
1a20ba
 
1a20ba
 #define BT_REF_HEXVAL         (0x1)
1a20ba
diff --git a/help.c b/help.c
1a20ba
index 51a0fe3d687c..e1bbc5abe029 100644
1a20ba
--- a/help.c
1a20ba
+++ b/help.c
1a20ba
@@ -1909,12 +1909,14 @@ char *help_bt[] = {
1a20ba
 "bt",
1a20ba
 "backtrace",
1a20ba
 "[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]"
1a20ba
-"\n     [-I ip] [-S sp] [pid | task]",
1a20ba
+"\n     [-I ip] [-S sp] [-n idle] [pid | task]",
1a20ba
 "  Display a kernel stack backtrace.  If no arguments are given, the stack",
1a20ba
 "  trace of the current context will be displayed.\n",
1a20ba
 "       -a  displays the stack traces of the active task on each CPU.",
1a20ba
 "           (only applicable to crash dumps)",
1a20ba
 "       -A  same as -a, but also displays vector registers (S390X only).",
1a20ba
+"  -n idle  filter the stack of idle tasks (x86_64).",
1a20ba
+"           (only applicable to crash dumps)",
1a20ba
 "       -p  display the stack trace of the panic task only.",
1a20ba
 "           (only applicable to crash dumps)",
1a20ba
 "   -c cpu  display the stack trace of the active task on one or more CPUs,",
1a20ba
@@ -2004,6 +2006,35 @@ char *help_bt[] = {
1a20ba
 "       DS:  002b      ESI: bfffc8a0  ES:  002b      EDI: 00000000 ",
1a20ba
 "       SS:  002b      ESP: bfffc82c  EBP: bfffd224 ",
1a20ba
 "       CS:  0023      EIP: 400d032e  ERR: 0000008e  EFLAGS: 00000246  ",
1a20ba
+" ",
1a20ba
+"  Display the stack trace of the active task(s) when the kernel panicked,",
1a20ba
+"  and filter out the stack of the idle tasks:",
1a20ba
+" ",
1a20ba
+"    %s> bt -a -n idle",
1a20ba
+"    ...",
1a20ba
+"    PID: 0      TASK: ffff889ff8c35a00  CPU: 11  COMMAND: \"swapper/11\"",
1a20ba
+" ",
1a20ba
+"    PID: 0      TASK: ffff889ff8c3c380  CPU: 12  COMMAND: \"swapper/12\"",
1a20ba
+" ",
1a20ba
+"    PID: 150773  TASK: ffff889fe85a1680  CPU: 13  COMMAND: \"bash\"",
1a20ba
+"    #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
1a20ba
+"    #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
1a20ba
+"    #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
1a20ba
+"    #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
1a20ba
+"    #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
1a20ba
+"    #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
1a20ba
+"    #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
1a20ba
+"    #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
1a20ba
+"    #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
1a20ba
+"    #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
1a20ba
+"       RIP: 00007fa7abcdc274  RSP: 00007fffa731f678  RFLAGS: 00000246",
1a20ba
+"       RAX: ffffffffffffffda  RBX: 0000000000000002  RCX: 00007fa7abcdc274",
1a20ba
+"       RDX: 0000000000000002  RSI: 0000563ca51ee6d0  RDI: 0000000000000001",
1a20ba
+"       RBP: 0000563ca51ee6d0   R8: 000000000000000a   R9: 00007fa7abd6be80",
1a20ba
+"       R10: 000000000000000a  R11: 0000000000000246  R12: 00007fa7abdad760",
1a20ba
+"       R13: 0000000000000002  R14: 00007fa7abda8760  R15: 0000000000000002",
1a20ba
+"       ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b",
1a20ba
+"    ...",
1a20ba
 "\n  Display the stack trace of the active task on CPU 0 and 1:\n",
1a20ba
 "    %s> bt -c 0,1",
1a20ba
 "    PID: 0      TASK: ffffffff81a8d020  CPU: 0   COMMAND: \"swapper\"",
1a20ba
diff --git a/kernel.c b/kernel.c
1a20ba
index d0921cf567d9..411e9da1e54f 100644
1a20ba
--- a/kernel.c
1a20ba
+++ b/kernel.c
1a20ba
@@ -2503,7 +2503,7 @@ cmd_bt(void)
1a20ba
 	if (kt->flags & USE_OPT_BT)
1a20ba
 		bt->flags |= BT_OPT_BACK_TRACE;
1a20ba
 
1a20ba
-	while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF) {
1a20ba
+	while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) != EOF) {
1a20ba
                 switch (c)
1a20ba
 		{
1a20ba
 		case 'f':
1a20ba
@@ -2672,6 +2672,13 @@ cmd_bt(void)
1a20ba
 			active++;
1a20ba
 			break;
1a20ba
 
1a20ba
+		case 'n':
1a20ba
+			if (machine_type("X86_64") && STREQ(optarg, "idle"))
1a20ba
+				bt->flags |= BT_SKIP_IDLE;
1a20ba
+			else
1a20ba
+				option_not_supported(c);
1a20ba
+			break;
1a20ba
+
1a20ba
 		case 'r':
1a20ba
 			bt->flags |= BT_RAW;
1a20ba
 			break;
1a20ba
@@ -3092,6 +3099,10 @@ back_trace(struct bt_info *bt)
1a20ba
 	} else
1a20ba
                 machdep->get_stack_frame(bt, &eip, &esp;;
1a20ba
 
1a20ba
+	/* skip idle task stack */
1a20ba
+	if (bt->flags & BT_SKIP_IDLE)
1a20ba
+		return;
1a20ba
+
1a20ba
 	if (bt->flags & BT_KSTACKP) {
1a20ba
 		bt->stkptr = esp;
1a20ba
 		return;
1a20ba
diff --git a/x86_64.c b/x86_64.c
1a20ba
index ecaefd2f46a8..cfafbcc4dabe 100644
1a20ba
--- a/x86_64.c
1a20ba
+++ b/x86_64.c
1a20ba
@@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
1a20ba
 	if (bt->flags & BT_DUMPFILE_SEARCH)
1a20ba
 		return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
1a20ba
 
1a20ba
+	if (bt->flags & BT_SKIP_IDLE)
1a20ba
+		bt->flags &= ~BT_SKIP_IDLE;
1a20ba
+
1a20ba
         if (pcp)
1a20ba
                 *pcp = x86_64_get_pc(bt);
1a20ba
         if (spp)
1a20ba
@@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp)
1a20ba
 	estack = -1;
1a20ba
 	panic = FALSE;
1a20ba
 
1a20ba
+	if (bt_in->flags & BT_SKIP_IDLE)
1a20ba
+		bt_in->flags &= ~BT_SKIP_IDLE;
1a20ba
+
1a20ba
 	panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
1a20ba
 
1a20ba
 	if (panic_task && bt->machdep) {
1a20ba
@@ -5098,6 +5104,8 @@ next_sysrq:
1a20ba
                 if (!panic_task && STREQ(sym, "crash_nmi_callback")) {
1a20ba
                         *rip = *up;
1a20ba
                         *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf);
1a20ba
+			if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task))
1a20ba
+				bt_in->flags |= BT_SKIP_IDLE;
1a20ba
                         return;
1a20ba
                 }
1a20ba
 
1a20ba
-- 
1a20ba
2.30.2
1a20ba