|
|
aefe19 |
From 6833262bf87177d8affe4f91b2e7d2c76ecdf636 Mon Sep 17 00:00:00 2001
|
|
|
aefe19 |
From: Qi Zheng <zhengqi.arch@bytedance.com>
|
|
|
aefe19 |
Date: Tue, 24 May 2022 20:25:53 +0800
|
|
|
aefe19 |
Subject: [PATCH 07/18] bt: x86_64: filter out idle task stack
|
|
|
aefe19 |
|
|
|
aefe19 |
When we use crash to troubleshoot softlockup and other problems,
|
|
|
aefe19 |
we often use the 'bt -a' command to print the stacks of running
|
|
|
aefe19 |
processes on all CPUs. But now some servers have hundreds of CPUs
|
|
|
aefe19 |
(such as AMD machines), which causes the 'bt -a' command to output
|
|
|
aefe19 |
a lot of process stacks. And many of these stacks are the stacks
|
|
|
aefe19 |
of the idle process, which are not needed by us.
|
|
|
aefe19 |
|
|
|
aefe19 |
Therefore, in order to reduce this part of the interference information,
|
|
|
aefe19 |
this patch adds the -n option to the bt command. When we specify
|
|
|
aefe19 |
'-n idle' (meaning no idle), the stack of the idle process will be
|
|
|
aefe19 |
filtered out, thus speeding up our troubleshooting.
|
|
|
aefe19 |
|
|
|
aefe19 |
And the option works only for crash dumps captured by kdump.
|
|
|
aefe19 |
|
|
|
aefe19 |
The command output is as follows:
|
|
|
aefe19 |
crash> bt -a -n idle
|
|
|
aefe19 |
[...]
|
|
|
aefe19 |
PID: 0 TASK: ffff889ff8c34380 CPU: 8 COMMAND: "swapper/8"
|
|
|
aefe19 |
|
|
|
aefe19 |
PID: 0 TASK: ffff889ff8c32d00 CPU: 9 COMMAND: "swapper/9"
|
|
|
aefe19 |
|
|
|
aefe19 |
PID: 0 TASK: ffff889ff8c31680 CPU: 10 COMMAND: "swapper/10"
|
|
|
aefe19 |
|
|
|
aefe19 |
PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: "swapper/11"
|
|
|
aefe19 |
|
|
|
aefe19 |
PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: "swapper/12"
|
|
|
aefe19 |
|
|
|
aefe19 |
PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: "bash"
|
|
|
aefe19 |
#0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
|
|
|
aefe19 |
#1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
|
|
|
aefe19 |
#2 [ffffc9000d35bdf0] panic at ffffffff81081930
|
|
|
aefe19 |
#3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
|
|
|
aefe19 |
#4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
|
|
|
aefe19 |
#5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
|
|
|
aefe19 |
#6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
|
|
|
aefe19 |
#7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
|
|
|
aefe19 |
#8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
|
|
|
aefe19 |
#9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
|
|
|
aefe19 |
RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246
|
|
|
aefe19 |
RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274
|
|
|
aefe19 |
RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001
|
|
|
aefe19 |
RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80
|
|
|
aefe19 |
R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760
|
|
|
aefe19 |
R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002
|
|
|
aefe19 |
ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
|
|
|
aefe19 |
[...]
|
|
|
aefe19 |
|
|
|
aefe19 |
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
|
|
|
aefe19 |
Acked-by: Kazuhito Hagio <k-hagio-ab@nec.com>
|
|
|
aefe19 |
Acked-by: Lianbo Jiang <lijiang@redhat.com>
|
|
|
aefe19 |
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
|
|
|
aefe19 |
---
|
|
|
aefe19 |
defs.h | 1 +
|
|
|
aefe19 |
help.c | 33 ++++++++++++++++++++++++++++++++-
|
|
|
aefe19 |
kernel.c | 13 ++++++++++++-
|
|
|
aefe19 |
x86_64.c | 8 ++++++++
|
|
|
aefe19 |
4 files changed, 53 insertions(+), 2 deletions(-)
|
|
|
aefe19 |
|
|
|
aefe19 |
diff --git a/defs.h b/defs.h
|
|
|
aefe19 |
index ecbced24d2e3..c8444b4e54eb 100644
|
|
|
aefe19 |
--- a/defs.h
|
|
|
aefe19 |
+++ b/defs.h
|
|
|
aefe19 |
@@ -5832,6 +5832,7 @@ ulong cpu_map_addr(const char *type);
|
|
|
aefe19 |
#define BT_SHOW_ALL_REGS (0x2000000000000ULL)
|
|
|
aefe19 |
#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
|
|
|
aefe19 |
#define BT_OVERFLOW_STACK (0x8000000000000ULL)
|
|
|
aefe19 |
+#define BT_SKIP_IDLE (0x10000000000000ULL)
|
|
|
aefe19 |
#define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
|
|
|
aefe19 |
|
|
|
aefe19 |
#define BT_REF_HEXVAL (0x1)
|
|
|
aefe19 |
diff --git a/help.c b/help.c
|
|
|
aefe19 |
index 51a0fe3d687c..e1bbc5abe029 100644
|
|
|
aefe19 |
--- a/help.c
|
|
|
aefe19 |
+++ b/help.c
|
|
|
aefe19 |
@@ -1909,12 +1909,14 @@ char *help_bt[] = {
|
|
|
aefe19 |
"bt",
|
|
|
aefe19 |
"backtrace",
|
|
|
aefe19 |
"[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]"
|
|
|
aefe19 |
-"\n [-I ip] [-S sp] [pid | task]",
|
|
|
aefe19 |
+"\n [-I ip] [-S sp] [-n idle] [pid | task]",
|
|
|
aefe19 |
" Display a kernel stack backtrace. If no arguments are given, the stack",
|
|
|
aefe19 |
" trace of the current context will be displayed.\n",
|
|
|
aefe19 |
" -a displays the stack traces of the active task on each CPU.",
|
|
|
aefe19 |
" (only applicable to crash dumps)",
|
|
|
aefe19 |
" -A same as -a, but also displays vector registers (S390X only).",
|
|
|
aefe19 |
+" -n idle filter the stack of idle tasks (x86_64).",
|
|
|
aefe19 |
+" (only applicable to crash dumps)",
|
|
|
aefe19 |
" -p display the stack trace of the panic task only.",
|
|
|
aefe19 |
" (only applicable to crash dumps)",
|
|
|
aefe19 |
" -c cpu display the stack trace of the active task on one or more CPUs,",
|
|
|
aefe19 |
@@ -2004,6 +2006,35 @@ char *help_bt[] = {
|
|
|
aefe19 |
" DS: 002b ESI: bfffc8a0 ES: 002b EDI: 00000000 ",
|
|
|
aefe19 |
" SS: 002b ESP: bfffc82c EBP: bfffd224 ",
|
|
|
aefe19 |
" CS: 0023 EIP: 400d032e ERR: 0000008e EFLAGS: 00000246 ",
|
|
|
aefe19 |
+" ",
|
|
|
aefe19 |
+" Display the stack trace of the active task(s) when the kernel panicked,",
|
|
|
aefe19 |
+" and filter out the stack of the idle tasks:",
|
|
|
aefe19 |
+" ",
|
|
|
aefe19 |
+" %s> bt -a -n idle",
|
|
|
aefe19 |
+" ...",
|
|
|
aefe19 |
+" PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: \"swapper/11\"",
|
|
|
aefe19 |
+" ",
|
|
|
aefe19 |
+" PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: \"swapper/12\"",
|
|
|
aefe19 |
+" ",
|
|
|
aefe19 |
+" PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: \"bash\"",
|
|
|
aefe19 |
+" #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
|
|
|
aefe19 |
+" #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
|
|
|
aefe19 |
+" #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
|
|
|
aefe19 |
+" #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
|
|
|
aefe19 |
+" #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
|
|
|
aefe19 |
+" #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
|
|
|
aefe19 |
+" #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
|
|
|
aefe19 |
+" #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
|
|
|
aefe19 |
+" #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
|
|
|
aefe19 |
+" #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
|
|
|
aefe19 |
+" RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246",
|
|
|
aefe19 |
+" RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274",
|
|
|
aefe19 |
+" RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001",
|
|
|
aefe19 |
+" RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80",
|
|
|
aefe19 |
+" R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760",
|
|
|
aefe19 |
+" R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002",
|
|
|
aefe19 |
+" ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b",
|
|
|
aefe19 |
+" ...",
|
|
|
aefe19 |
"\n Display the stack trace of the active task on CPU 0 and 1:\n",
|
|
|
aefe19 |
" %s> bt -c 0,1",
|
|
|
aefe19 |
" PID: 0 TASK: ffffffff81a8d020 CPU: 0 COMMAND: \"swapper\"",
|
|
|
aefe19 |
diff --git a/kernel.c b/kernel.c
|
|
|
aefe19 |
index d0921cf567d9..411e9da1e54f 100644
|
|
|
aefe19 |
--- a/kernel.c
|
|
|
aefe19 |
+++ b/kernel.c
|
|
|
aefe19 |
@@ -2503,7 +2503,7 @@ cmd_bt(void)
|
|
|
aefe19 |
if (kt->flags & USE_OPT_BT)
|
|
|
aefe19 |
bt->flags |= BT_OPT_BACK_TRACE;
|
|
|
aefe19 |
|
|
|
aefe19 |
- while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF) {
|
|
|
aefe19 |
+ while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) != EOF) {
|
|
|
aefe19 |
switch (c)
|
|
|
aefe19 |
{
|
|
|
aefe19 |
case 'f':
|
|
|
aefe19 |
@@ -2672,6 +2672,13 @@ cmd_bt(void)
|
|
|
aefe19 |
active++;
|
|
|
aefe19 |
break;
|
|
|
aefe19 |
|
|
|
aefe19 |
+ case 'n':
|
|
|
aefe19 |
+ if (machine_type("X86_64") && STREQ(optarg, "idle"))
|
|
|
aefe19 |
+ bt->flags |= BT_SKIP_IDLE;
|
|
|
aefe19 |
+ else
|
|
|
aefe19 |
+ option_not_supported(c);
|
|
|
aefe19 |
+ break;
|
|
|
aefe19 |
+
|
|
|
aefe19 |
case 'r':
|
|
|
aefe19 |
bt->flags |= BT_RAW;
|
|
|
aefe19 |
break;
|
|
|
aefe19 |
@@ -3092,6 +3099,10 @@ back_trace(struct bt_info *bt)
|
|
|
aefe19 |
} else
|
|
|
aefe19 |
machdep->get_stack_frame(bt, &eip, &esp;;
|
|
|
aefe19 |
|
|
|
aefe19 |
+ /* skip idle task stack */
|
|
|
aefe19 |
+ if (bt->flags & BT_SKIP_IDLE)
|
|
|
aefe19 |
+ return;
|
|
|
aefe19 |
+
|
|
|
aefe19 |
if (bt->flags & BT_KSTACKP) {
|
|
|
aefe19 |
bt->stkptr = esp;
|
|
|
aefe19 |
return;
|
|
|
aefe19 |
diff --git a/x86_64.c b/x86_64.c
|
|
|
aefe19 |
index ecaefd2f46a8..cfafbcc4dabe 100644
|
|
|
aefe19 |
--- a/x86_64.c
|
|
|
aefe19 |
+++ b/x86_64.c
|
|
|
aefe19 |
@@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
|
|
|
aefe19 |
if (bt->flags & BT_DUMPFILE_SEARCH)
|
|
|
aefe19 |
return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
|
|
|
aefe19 |
|
|
|
aefe19 |
+ if (bt->flags & BT_SKIP_IDLE)
|
|
|
aefe19 |
+ bt->flags &= ~BT_SKIP_IDLE;
|
|
|
aefe19 |
+
|
|
|
aefe19 |
if (pcp)
|
|
|
aefe19 |
*pcp = x86_64_get_pc(bt);
|
|
|
aefe19 |
if (spp)
|
|
|
aefe19 |
@@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp)
|
|
|
aefe19 |
estack = -1;
|
|
|
aefe19 |
panic = FALSE;
|
|
|
aefe19 |
|
|
|
aefe19 |
+ if (bt_in->flags & BT_SKIP_IDLE)
|
|
|
aefe19 |
+ bt_in->flags &= ~BT_SKIP_IDLE;
|
|
|
aefe19 |
+
|
|
|
aefe19 |
panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
|
|
|
aefe19 |
|
|
|
aefe19 |
if (panic_task && bt->machdep) {
|
|
|
aefe19 |
@@ -5098,6 +5104,8 @@ next_sysrq:
|
|
|
aefe19 |
if (!panic_task && STREQ(sym, "crash_nmi_callback")) {
|
|
|
aefe19 |
*rip = *up;
|
|
|
aefe19 |
*rsp = bt->stackbase + ((char *)(up) - bt->stackbuf);
|
|
|
aefe19 |
+ if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task))
|
|
|
aefe19 |
+ bt_in->flags |= BT_SKIP_IDLE;
|
|
|
aefe19 |
return;
|
|
|
aefe19 |
}
|
|
|
aefe19 |
|
|
|
aefe19 |
--
|
|
|
aefe19 |
2.30.2
|
|
|
aefe19 |
|