commit da9bd35afc2269529b029dd22815e04362e89e5b Author: Dave Anderson Date: Wed Oct 11 11:17:30 2017 -0400 Fix for the "runq" command on Linux 4.14 and later kernels that contain commit cd9e61ed1eebbcd5dfad59475d41ec58d9b64b6a, titled "rbtree: cache leftmost node internally". Without the patch, the command fails with the error message "runq: invalid structure member offset: cfs_rq_rb_leftmost". (anderson@redhat.com) diff --git a/task.c b/task.c index 88706bf..2b12af0 100644 --- a/task.c +++ b/task.c @@ -8765,10 +8765,15 @@ cfs_rq_offset_init(void) MEMBER_OFFSET_INIT(sched_rt_entity_my_q, "sched_rt_entity", "my_q"); MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq"); - MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost"); - MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running"); MEMBER_OFFSET_INIT(cfs_rq_tasks_timeline, "cfs_rq", "tasks_timeline"); + MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost"); + if (INVALID_MEMBER(cfs_rq_rb_leftmost) && + VALID_MEMBER(cfs_rq_tasks_timeline) && + MEMBER_EXISTS("rb_root_cached", "rb_leftmost")) + ASSIGN_OFFSET(cfs_rq_rb_leftmost) = OFFSET(cfs_rq_tasks_timeline) + + MEMBER_OFFSET("rb_root_cached", "rb_leftmost"); + MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running"); MEMBER_OFFSET_INIT(cfs_rq_curr, "cfs_rq", "curr"); MEMBER_OFFSET_INIT(rt_rq_active, "rt_rq", "active"); MEMBER_OFFSET_INIT(task_struct_run_list, "task_struct", commit 9e5255af26233e7ef051ebdd8bdccbd15d0d9256 Author: Dave Anderson Date: Wed Oct 11 16:11:34 2017 -0400 Fix to prevent a useless message during session inialization. Without the patch, if the highest possible node bit in the node_states[N_ONLINE] multi-word bitmask is set, then a message such as "crash: next_online_node: 256 is too large!" will be displayed. (anderson@redhat.com) diff --git a/memory.c b/memory.c index 8efe0b2..9c9a40d 100644 --- a/memory.c +++ b/memory.c @@ -17200,10 +17200,8 @@ next_online_node(int first) int i, j, node; ulong mask, *maskptr; - if ((first/BITS_PER_LONG) >= vt->node_online_map_len) { - error(INFO, "next_online_node: %d is too large!\n", first); + if ((first/BITS_PER_LONG) >= vt->node_online_map_len) return -1; - } maskptr = (ulong *)vt->node_online_map; for (i = node = 0; i < vt->node_online_map_len; i++, maskptr++) { commit 2b93c036edf2a5cc21a06a14f377cd9b365f858a Author: Dave Anderson Date: Tue Oct 17 15:40:17 2017 -0400 Additional fixes for the ARM64 "bt" command for Linux 4.14 kernels. The patch corrects the contents of in-kernel exception frame register dumps, and properly transitions the backtrace from the IRQ stack to the process stack. (takahiro.akashi@linaro.org) diff --git a/arm64.c b/arm64.c index 20c5d34..c75669b 100644 --- a/arm64.c +++ b/arm64.c @@ -72,6 +72,7 @@ static void arm64_cmd_mach(void); static void arm64_display_machine_stats(void); static int arm64_get_smp_cpus(void); static void arm64_clear_machdep_cache(void); +static int arm64_on_process_stack(struct bt_info *, ulong); static int arm64_in_alternate_stack(int, ulong); static int arm64_on_irq_stack(int, ulong); static void arm64_set_irq_stack(struct bt_info *); @@ -1333,34 +1334,64 @@ arm64_irq_stack_init(void) int i; struct syment *sp; struct gnu_request request, *req; - req = &request; struct machine_specific *ms = machdep->machspec; + ulong p; + req = &request; - if (!symbol_exists("irq_stack") || - !(sp = per_cpu_symbol_search("irq_stack")) || - !get_symbol_type("irq_stack", NULL, req) || - (req->typecode != TYPE_CODE_ARRAY) || - (req->target_typecode != TYPE_CODE_INT)) - return; - - if (CRASHDEBUG(1)) { - fprintf(fp, "irq_stack: \n"); - fprintf(fp, " type: %s\n", - (req->typecode == TYPE_CODE_ARRAY) ? "TYPE_CODE_ARRAY" : "other"); - fprintf(fp, " target_typecode: %s\n", - req->target_typecode == TYPE_CODE_INT ? "TYPE_CODE_INT" : "other"); - fprintf(fp, " target_length: %ld\n", req->target_length); - fprintf(fp, " length: %ld\n", req->length); - } - - ms->irq_stack_size = req->length; - if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) - error(FATAL, "cannot malloc irq_stack addresses\n"); + if (symbol_exists("irq_stack") && + (sp = per_cpu_symbol_search("irq_stack")) && + get_symbol_type("irq_stack", NULL, req)) { + /* before v4.14 or CONFIG_VMAP_STACK disabled */ + if (CRASHDEBUG(1)) { + fprintf(fp, "irq_stack: \n"); + fprintf(fp, " type: %s\n", + (req->typecode == TYPE_CODE_ARRAY) ? + "TYPE_CODE_ARRAY" : "other"); + fprintf(fp, " target_typecode: %s\n", + req->target_typecode == TYPE_CODE_INT ? + "TYPE_CODE_INT" : "other"); + fprintf(fp, " target_length: %ld\n", + req->target_length); + fprintf(fp, " length: %ld\n", req->length); + } + + if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) + error(FATAL, "cannot malloc irq_stack addresses\n"); + ms->irq_stack_size = req->length; + machdep->flags |= IRQ_STACKS; - for (i = 0; i < kt->cpus; i++) - ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; + for (i = 0; i < kt->cpus; i++) + ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; + } else if (symbol_exists("irq_stack_ptr") && + (sp = per_cpu_symbol_search("irq_stack_ptr")) && + get_symbol_type("irq_stack_ptr", NULL, req)) { + /* v4.14 and later with CONFIG_VMAP_STACK enabled */ + if (CRASHDEBUG(1)) { + fprintf(fp, "irq_stack_ptr: \n"); + fprintf(fp, " type: %x, %s\n", + (int)req->typecode, + (req->typecode == TYPE_CODE_PTR) ? + "TYPE_CODE_PTR" : "other"); + fprintf(fp, " target_typecode: %x, %s\n", + (int)req->target_typecode, + req->target_typecode == TYPE_CODE_INT ? + "TYPE_CODE_INT" : "other"); + fprintf(fp, " target_length: %ld\n", + req->target_length); + fprintf(fp, " length: %ld\n", req->length); + } + + if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) + error(FATAL, "cannot malloc irq_stack addresses\n"); + ms->irq_stack_size = 16384; + machdep->flags |= IRQ_STACKS; - machdep->flags |= IRQ_STACKS; + for (i = 0; i < kt->cpus; i++) { + p = kt->__per_cpu_offset[i] + sp->value; + readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong), + "IRQ stack pointer", RETURN_ON_ERROR); + } + } } /* @@ -1750,11 +1781,20 @@ arm64_display_full_frame(struct bt_info *bt, ulong sp) if (bt->frameptr == sp) return; - if (!INSTACK(sp, bt) || !INSTACK(bt->frameptr, bt)) { - if (sp == 0) - sp = bt->stacktop - USER_EFRAME_OFFSET; - else - return; + if (INSTACK(bt->frameptr, bt)) { + if (INSTACK(sp, bt)) { + ; /* normal case */ + } else { + if (sp == 0) + /* interrupt in user mode */ + sp = bt->stacktop - USER_EFRAME_OFFSET; + else + /* interrupt in kernel mode */ + sp = bt->stacktop; + } + } else { + /* IRQ exception frame */ + return; } words = (sp - bt->frameptr) / sizeof(ulong); @@ -1860,6 +1900,9 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame) if ((frame->fp == 0) && (frame->pc == 0)) return FALSE; + if (!(machdep->flags & IRQ_STACKS)) + return TRUE; + /* * The kernel's manner of determining the end of the IRQ stack: * @@ -1872,7 +1915,25 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame) * irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack) */ - if (machdep->flags & IRQ_STACKS) { + if (machdep->flags & UNW_4_14) { + if ((bt->flags & BT_IRQSTACK) && + !arm64_on_irq_stack(bt->tc->processor, frame->fp)) { + if (arm64_on_process_stack(bt, frame->fp)) { + arm64_set_process_stack(bt); + + frame->sp = frame->fp - SIZE(pt_regs) + 16; + /* for switch_stack */ + /* fp still points to irq stack */ + bt->bptr = fp; + /* for display_full_frame */ + /* sp points to process stack */ + bt->frameptr = frame->sp; + } else { + /* irq -> user */ + return FALSE; + } + } + } else { /* !UNW_4_14 */ ms = machdep->machspec; irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16; @@ -1896,7 +1957,7 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame) return FALSE; } } - } + } /* UNW_4_14 */ return TRUE; } @@ -2086,10 +2147,17 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, * We are on process stack. Just add a faked frame */ - if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs); - else { + if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) { + if (MEMBER_EXISTS("pt_regs", "stackframe")) { + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs) - 16; + frame->fp = ext_frame.fp; + } else { + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs); + frame->fp = frame->sp; + } + } else { /* * FIXME: very exceptional case * We are already back on process stack, but @@ -2109,10 +2177,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, * Really ugly */ frame->sp = frame->fp + 0x20; + frame->fp = frame->sp; fprintf(ofp, " (Next exception frame might be wrong)\n"); } - frame->fp = frame->sp; } else { /* We are on IRQ stack */ @@ -2122,9 +2190,15 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, if (ext_frame.fp != irq_stack_ptr) { /* (2) Just add a faked frame */ - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs); - frame->fp = frame->sp; + if (MEMBER_EXISTS("pt_regs", "stackframe")) { + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs); + frame->fp = ext_frame.fp; + } else { + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs) - 16; + frame->fp = frame->sp; + } } else { /* * (3) @@ -2303,12 +2377,17 @@ arm64_back_trace_cmd(struct bt_info *bt) if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) { if (!(bt->flags & BT_IRQSTACK) || - (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) - exception_frame = stackframe.fp - SIZE(pt_regs); + (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) { + if (MEMBER_EXISTS("pt_regs", "stackframe")) + /* v4.14 or later */ + exception_frame = stackframe.fp - SIZE(pt_regs) + 16; + else + exception_frame = stackframe.fp - SIZE(pt_regs); + } } if ((bt->flags & BT_IRQSTACK) && - !arm64_on_irq_stack(bt->tc->processor, stackframe.sp)) { + !arm64_on_irq_stack(bt->tc->processor, stackframe.fp)) { bt->flags &= ~BT_IRQSTACK; if (arm64_switch_stack(bt, &stackframe, ofp) == USER_MODE) break; @@ -2424,6 +2503,8 @@ user_space: * otherwise show an exception frame. * Since exception entry code doesn't have a real * stackframe, we fake a dummy frame here. + * Note: Since we have a real stack frame in pt_regs, + * We no longer need a dummy frame on v4.14 or later. */ if (!arm64_in_exp_entry(stackframe.pc)) continue; @@ -2669,7 +2750,9 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp if (frame->fp == 0) return USER_MODE; - arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); + if (!(machdep->flags & UNW_4_14)) + arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); + return KERNEL_MODE; } @@ -3363,6 +3446,20 @@ arm64_clear_machdep_cache(void) { } static int +arm64_on_process_stack(struct bt_info *bt, ulong stkptr) +{ + ulong stackbase, stacktop; + + stackbase = GET_STACKBASE(bt->task); + stacktop = GET_STACKTOP(bt->task); + + if ((stkptr >= stackbase) && (stkptr < stacktop)) + return TRUE; + + return FALSE; +} + +static int arm64_on_irq_stack(int cpu, ulong stkptr) { return arm64_in_alternate_stack(cpu, stkptr); commit 30950ba8885fb39a1ed7b071cdb225e3ec38e7b3 Author: Dave Anderson Date: Tue Oct 17 16:20:19 2017 -0400 Implemented a new "search -T" option, which is identical to the "search -t" option, except that the search is restricted to the kernel stacks of active tasks. (atomlin@redhat.com) diff --git a/help.c b/help.c index 2d80202..a9aab37 100644 --- a/help.c +++ b/help.c @@ -2862,7 +2862,7 @@ NULL char *help_search[] = { "search", "search memory", -"[-s start] [ -[kKV] | -u | -p | -t ] [-e end | -l length] [-m mask]\n" +"[-s start] [ -[kKV] | -u | -p | -t | -T ] [-e end | -l length] [-m mask]\n" " [-x count] -[cwh] [value | (expression) | symbol | string] ...", " This command searches for a given value within a range of user virtual, kernel", " virtual, or physical memory space. If no end nor length value is entered, ", @@ -2893,6 +2893,7 @@ char *help_search[] = { " -t Search only the kernel stack pages of every task. If one or more", " matches are found in a task's kernel stack, precede the output", " with a task-identifying header.", +" -T Same as -t, except only the active task(s) are considered.", " -e end Stop the search at this hexadecimal user or kernel virtual", " address, kernel symbol, or physical address. The end address", " must be appropriate for the memory type specified.", diff --git a/memory.c b/memory.c index 9c9a40d..fb534e8 100644 --- a/memory.c +++ b/memory.c @@ -13882,7 +13882,7 @@ cmd_search(void) ulong value, mask, len; ulong uvaddr_start, uvaddr_end; ulong kvaddr_start, kvaddr_end, range_end; - int sflag, Kflag, Vflag, pflag, tflag; + int sflag, Kflag, Vflag, pflag, Tflag, tflag; struct searchinfo searchinfo; struct syment *sp; struct node_table *nt; @@ -13896,7 +13896,7 @@ cmd_search(void) context = max = 0; start = end = 0; - value = mask = sflag = pflag = Kflag = Vflag = memtype = len = tflag = 0; + value = mask = sflag = pflag = Kflag = Vflag = memtype = len = Tflag = tflag = 0; kvaddr_start = kvaddr_end = 0; uvaddr_start = UNINITIALIZED; uvaddr_end = COMMON_VADDR_SPACE() ? (ulong)(-1) : machdep->kvbase; @@ -13933,7 +13933,7 @@ cmd_search(void) searchinfo.mode = SEARCH_ULONG; /* default search */ - while ((c = getopt(argcnt, args, "tl:ukKVps:e:v:m:hwcx:")) != EOF) { + while ((c = getopt(argcnt, args, "Ttl:ukKVps:e:v:m:hwcx:")) != EOF) { switch(c) { case 'u': @@ -14038,12 +14038,19 @@ cmd_search(void) context = dtoi(optarg, FAULT_ON_ERROR, NULL); break; + case 'T': case 't': if (XEN_HYPER_MODE()) error(FATAL, - "-t option is not applicable to the " - "Xen hypervisor\n"); - tflag++; + "-%c option is not applicable to the " + "Xen hypervisor\n", c); + if (c == 'T') + Tflag++; + else if (c == 't') + tflag++; + if (tflag && Tflag) + error(FATAL, + "-t and -T options are mutually exclusive\n"); break; default: @@ -14052,10 +14059,11 @@ cmd_search(void) } } - if (tflag && (memtype || start || end || len)) + if ((tflag || Tflag) && (memtype || start || end || len)) error(FATAL, - "-t option cannot be used with other " - "memory-selection options\n"); + "-%c option cannot be used with other " + "memory-selection options\n", + tflag ? 't' : 'T'); if (XEN_HYPER_MODE()) { memtype = KVADDR; @@ -14328,10 +14336,12 @@ cmd_search(void) break; } - if (tflag) { + if (tflag || Tflag) { searchinfo.tasks_found = 0; tc = FIRST_CONTEXT(); for (i = 0; i < RUNNING_TASKS(); i++, tc++) { + if (Tflag && !is_task_active(tc->task)) + continue; searchinfo.vaddr_start = GET_STACKBASE(tc->task); searchinfo.vaddr_end = GET_STACKTOP(tc->task); searchinfo.task_context = tc; commit 090bf28907782549ba980c588979372061764aa7 Author: Dave Anderson Date: Fri Oct 20 14:23:36 2017 -0400 Removal of the ARM64 "bt -o" option for Linux 4.14 and later kernels, along with several cleanups/readability improvements. (takahiro.akashi@linaro.org) diff --git a/arm64.c b/arm64.c index c75669b..7904f65 100644 --- a/arm64.c +++ b/arm64.c @@ -612,6 +612,7 @@ arm64_dump_machdep_table(ulong arg) fprintf(fp, " exp_entry2_end: %lx\n", ms->exp_entry2_end); fprintf(fp, " panic_task_regs: %lx\n", (ulong)ms->panic_task_regs); fprintf(fp, " user_eframe_offset: %ld\n", ms->user_eframe_offset); + fprintf(fp, " kern_eframe_offset: %ld\n", ms->kern_eframe_offset); fprintf(fp, " PTE_PROT_NONE: %lx\n", ms->PTE_PROT_NONE); fprintf(fp, " PTE_FILE: "); if (ms->PTE_FILE) @@ -1383,7 +1384,7 @@ arm64_irq_stack_init(void) if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) error(FATAL, "cannot malloc irq_stack addresses\n"); - ms->irq_stack_size = 16384; + ms->irq_stack_size = ARM64_IRQ_STACK_SIZE; machdep->flags |= IRQ_STACKS; for (i = 0; i < kt->cpus; i++) { @@ -1410,10 +1411,13 @@ arm64_stackframe_init(void) MEMBER_OFFSET_INIT(elf_prstatus_pr_pid, "elf_prstatus", "pr_pid"); MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus", "pr_reg"); - if (MEMBER_EXISTS("pt_regs", "stackframe")) + if (MEMBER_EXISTS("pt_regs", "stackframe")) { machdep->machspec->user_eframe_offset = SIZE(pt_regs); - else + machdep->machspec->kern_eframe_offset = SIZE(pt_regs) - 16; + } else { machdep->machspec->user_eframe_offset = SIZE(pt_regs) + 16; + machdep->machspec->kern_eframe_offset = SIZE(pt_regs); + } machdep->machspec->__exception_text_start = symbol_value("__exception_text_start"); @@ -1503,6 +1507,7 @@ arm64_stackframe_init(void) #define USER_MODE (2) #define USER_EFRAME_OFFSET (machdep->machspec->user_eframe_offset) +#define KERN_EFRAME_OFFSET (machdep->machspec->kern_eframe_offset) /* * PSR bits @@ -1793,7 +1798,7 @@ arm64_display_full_frame(struct bt_info *bt, ulong sp) sp = bt->stacktop; } } else { - /* IRQ exception frame */ + /* This is a transition case from irq to process stack. */ return; } @@ -1903,61 +1908,73 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame) if (!(machdep->flags & IRQ_STACKS)) return TRUE; - /* - * The kernel's manner of determining the end of the IRQ stack: - * - * #define THREAD_SIZE 16384 - * #define THREAD_START_SP (THREAD_SIZE - 16) - * #define IRQ_STACK_START_SP THREAD_START_SP - * #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) - * #define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) - * - * irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); - * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack) - */ + if (!(machdep->flags & IRQ_STACKS)) + return TRUE; + if (machdep->flags & UNW_4_14) { if ((bt->flags & BT_IRQSTACK) && !arm64_on_irq_stack(bt->tc->processor, frame->fp)) { if (arm64_on_process_stack(bt, frame->fp)) { arm64_set_process_stack(bt); - frame->sp = frame->fp - SIZE(pt_regs) + 16; - /* for switch_stack */ - /* fp still points to irq stack */ + frame->sp = frame->fp - KERN_EFRAME_OFFSET; + /* + * for switch_stack + * fp still points to irq stack + */ bt->bptr = fp; - /* for display_full_frame */ - /* sp points to process stack */ - bt->frameptr = frame->sp; + /* + * for display_full_frame + * sp points to process stack + * + * If we want to see pt_regs, + * comment out the below. + * bt->frameptr = frame->sp; + */ } else { /* irq -> user */ return FALSE; } } - } else { /* !UNW_4_14 */ - ms = machdep->machspec; - irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16; - - if (frame->sp == irq_stack_ptr) { - orig_sp = GET_STACK_ULONG(irq_stack_ptr - 8); - arm64_set_process_stack(bt); - if (INSTACK(orig_sp, bt) && (INSTACK(frame->fp, bt) || (frame->fp == 0))) { - ptregs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(orig_sp))]; - frame->sp = orig_sp; - frame->pc = ptregs->pc; - bt->bptr = fp; - if (CRASHDEBUG(1)) - error(INFO, - "arm64_unwind_frame: switch stacks: fp: %lx sp: %lx pc: %lx\n", - frame->fp, frame->sp, frame->pc); - } else { - error(WARNING, - "arm64_unwind_frame: on IRQ stack: oriq_sp: %lx%s fp: %lx%s\n", - orig_sp, INSTACK(orig_sp, bt) ? "" : " (?)", - frame->fp, INSTACK(frame->fp, bt) ? "" : " (?)"); - return FALSE; - } + + return TRUE; + } + + /* + * The kernel's manner of determining the end of the IRQ stack: + * + * #define THREAD_SIZE 16384 + * #define THREAD_START_SP (THREAD_SIZE - 16) + * #define IRQ_STACK_START_SP THREAD_START_SP + * #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) + * #define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) + * + * irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); + * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack) + */ + ms = machdep->machspec; + irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16; + + if (frame->sp == irq_stack_ptr) { + orig_sp = GET_STACK_ULONG(irq_stack_ptr - 8); + arm64_set_process_stack(bt); + if (INSTACK(orig_sp, bt) && (INSTACK(frame->fp, bt) || (frame->fp == 0))) { + ptregs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(orig_sp))]; + frame->sp = orig_sp; + frame->pc = ptregs->pc; + bt->bptr = fp; + if (CRASHDEBUG(1)) + error(INFO, + "arm64_unwind_frame: switch stacks: fp: %lx sp: %lx pc: %lx\n", + frame->fp, frame->sp, frame->pc); + } else { + error(WARNING, + "arm64_unwind_frame: on IRQ stack: oriq_sp: %lx%s fp: %lx%s\n", + orig_sp, INSTACK(orig_sp, bt) ? "" : " (?)", + frame->fp, INSTACK(frame->fp, bt) ? "" : " (?)"); + return FALSE; } - } /* UNW_4_14 */ + } return TRUE; } @@ -2147,17 +2164,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, * We are on process stack. Just add a faked frame */ - if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) { - if (MEMBER_EXISTS("pt_regs", "stackframe")) { - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs) - 16; - frame->fp = ext_frame.fp; - } else { - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs); - frame->fp = frame->sp; - } - } else { + if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs); + else { /* * FIXME: very exceptional case * We are already back on process stack, but @@ -2177,10 +2187,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, * Really ugly */ frame->sp = frame->fp + 0x20; - frame->fp = frame->sp; fprintf(ofp, " (Next exception frame might be wrong)\n"); } + frame->fp = frame->sp; } else { /* We are on IRQ stack */ @@ -2190,15 +2200,9 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame, if (ext_frame.fp != irq_stack_ptr) { /* (2) Just add a faked frame */ - if (MEMBER_EXISTS("pt_regs", "stackframe")) { - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs); - frame->fp = ext_frame.fp; - } else { - frame->sp = ext_frame.fp - - sizeof(struct arm64_pt_regs) - 16; - frame->fp = frame->sp; - } + frame->sp = ext_frame.fp + - sizeof(struct arm64_pt_regs); + frame->fp = frame->sp; } else { /* * (3) @@ -2285,6 +2289,11 @@ arm64_back_trace_cmd(struct bt_info *bt) FILE *ofp; if (bt->flags & BT_OPT_BACK_TRACE) { + if (machdep->flags & UNW_4_14) { + option_not_supported('o'); + return; + } + arm64_back_trace_cmd_v2(bt); return; } @@ -2346,7 +2355,7 @@ arm64_back_trace_cmd(struct bt_info *bt) goto complete_user; if (DUMPFILE() && is_task_active(bt->task)) { - exception_frame = stackframe.fp - SIZE(pt_regs); + exception_frame = stackframe.fp - KERN_EFRAME_OFFSET; if (arm64_is_kernel_exception_frame(bt, exception_frame)) arm64_print_exception_frame(bt, exception_frame, KERNEL_MODE, ofp); @@ -2377,13 +2386,8 @@ arm64_back_trace_cmd(struct bt_info *bt) if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) { if (!(bt->flags & BT_IRQSTACK) || - (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) { - if (MEMBER_EXISTS("pt_regs", "stackframe")) - /* v4.14 or later */ - exception_frame = stackframe.fp - SIZE(pt_regs) + 16; - else - exception_frame = stackframe.fp - SIZE(pt_regs); - } + (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) + exception_frame = stackframe.fp - KERN_EFRAME_OFFSET; } if ((bt->flags & BT_IRQSTACK) && @@ -2503,8 +2507,6 @@ user_space: * otherwise show an exception frame. * Since exception entry code doesn't have a real * stackframe, we fake a dummy frame here. - * Note: Since we have a real stack frame in pt_regs, - * We no longer need a dummy frame on v4.14 or later. */ if (!arm64_in_exp_entry(stackframe.pc)) continue; diff --git a/defs.h b/defs.h index 7768895..a694a66 100644 --- a/defs.h +++ b/defs.h @@ -3038,6 +3038,7 @@ typedef signed int s32; #define ARM64_VMEMMAP_END (ARM64_VMEMMAP_VADDR + GIGABYTES(8UL) - 1) #define ARM64_STACK_SIZE (16384) +#define ARM64_IRQ_STACK_SIZE ARM64_STACK_SIZE #define _SECTION_SIZE_BITS 30 #define _MAX_PHYSMEM_BITS 40 @@ -3117,6 +3118,8 @@ struct machine_specific { ulong kimage_text; ulong kimage_end; ulong user_eframe_offset; + /* for v4.14 or later */ + ulong kern_eframe_offset; }; struct arm64_stackframe { diff --git a/help.c b/help.c index a9aab37..f9c5792 100644 --- a/help.c +++ b/help.c @@ -1799,7 +1799,8 @@ char *help_bt[] = { " It does so by verifying the thread_info.task pointer, ensuring that", " the thread_info.cpu is a valid cpu number, and checking the end of ", " the stack for the STACK_END_MAGIC value.", -" -o arm64: use optional backtrace method.", +" -o arm64: use optional backtrace method; not supported on Linux 4.14 or", +" later kernels.", " x86: use old backtrace method, permissible only on kernels that were", " compiled without the -fomit-frame_pointer.", " x86_64: use old backtrace method, which dumps potentially stale", diff --git a/task.c b/task.c index 2b12af0..362822c 100644 --- a/task.c +++ b/task.c @@ -6750,6 +6750,8 @@ panic_search(void) fd->keyword_array[0] = FOREACH_BT; if (machine_type("S390X")) fd->flags |= FOREACH_o_FLAG; + else if (machine_type("ARM64")) + fd->flags |= FOREACH_t_FLAG; else fd->flags |= (FOREACH_t_FLAG|FOREACH_o_FLAG); commit 45b74b89530d611b3fa95a1041e158fbb865fa84 Author: Dave Anderson Date: Mon Oct 23 11:15:39 2017 -0400 Fix for support of KASLR enabled kernels captured by the SADUMP dumpfile facility. SADUMP dumpfile headers do not contain phys_base or VMCOREINFO notes, so without this patch, the crash session fails during initialization with the message "crash: seek error: kernel virtual address:
type: "page_offset_base". This patch calculates the phys_base value and the KASLR offset using the IDTR and CR3 registers from the dumpfile header. (indou.takao@jp.fujitsu.com) diff --git a/defs.h b/defs.h index a694a66..76e5512 100644 --- a/defs.h +++ b/defs.h @@ -2591,6 +2591,9 @@ struct symbol_table_data { ulong last_section_end; ulong _stext_vmlinux; struct downsized downsized; + ulong divide_error_vmlinux; + ulong idt_table_vmlinux; + ulong saved_command_line_vmlinux; }; /* flags for st */ @@ -6312,6 +6315,7 @@ void sadump_set_zero_excluded(void); void sadump_unset_zero_excluded(void); struct sadump_data; struct sadump_data *get_sadump_data(void); +int sadump_calc_kaslr_offset(ulong *); /* * qemu.c diff --git a/sadump.c b/sadump.c index a96ba9c..2ccfa82 100644 --- a/sadump.c +++ b/sadump.c @@ -1558,12 +1558,17 @@ sadump_display_regs(int cpu, FILE *ofp) */ int sadump_phys_base(ulong *phys_base) { - if (SADUMP_VALID()) { + if (SADUMP_VALID() && !sd->phys_base) { if (CRASHDEBUG(1)) error(NOTE, "sadump: does not save phys_base.\n"); return FALSE; } + if (sd->phys_base) { + *phys_base = sd->phys_base; + return TRUE; + } + return FALSE; } @@ -1649,3 +1654,461 @@ get_sadump_data(void) { return sd; } + +#ifdef X86_64 +static int +get_sadump_smram_cpu_state_any(struct sadump_smram_cpu_state *smram) +{ + ulong offset; + struct sadump_header *sh = sd->dump_header; + int apicid; + struct sadump_smram_cpu_state scs, zero; + + offset = sd->sub_hdr_offset + sizeof(uint32_t) + + sd->dump_header->nr_cpus * sizeof(struct sadump_apic_state); + + memset(&zero, 0, sizeof(zero)); + + for (apicid = 0; apicid < sh->nr_cpus; ++apicid) { + if (!read_device(&scs, sizeof(scs), &offset)) { + error(INFO, "sadump: cannot read sub header " + "cpu_state\n"); + return FALSE; + } + if (memcmp(&scs, &zero, sizeof(scs)) != 0) { + *smram = scs; + return TRUE; + } + } + + return FALSE; +} + +/* + * Get address of vector0 interrupt handler (Devide Error) from Interrupt + * Descriptor Table. + */ +static ulong +get_vec0_addr(ulong idtr) +{ + struct gate_struct64 { + uint16_t offset_low; + uint16_t segment; + uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + uint16_t offset_middle; + uint32_t offset_high; + uint32_t zero1; + } __attribute__((packed)) gate; + + readmem(idtr, PHYSADDR, &gate, sizeof(gate), "idt_table", FAULT_ON_ERROR); + + return ((ulong)gate.offset_high << 32) + + ((ulong)gate.offset_middle << 16) + + gate.offset_low; +} + +/* + * Parse a string of [size[KMG] ]offset[KMG] + * Import from Linux kernel(lib/cmdline.c) + */ +static ulong memparse(char *ptr, char **retptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +/* + * Find "elfcorehdr=" in the boot parameter of kernel and return the address + * of elfcorehdr. + */ +static ulong +get_elfcorehdr(ulong cr3, ulong kaslr_offset) +{ + char cmdline[BUFSIZE], *ptr; + ulong cmdline_vaddr; + ulong cmdline_paddr; + ulong buf_vaddr, buf_paddr; + char *end; + ulong elfcorehdr_addr = 0, elfcorehdr_size = 0; + int verbose = CRASHDEBUG(1)? 1: 0; + + cmdline_vaddr = st->saved_command_line_vmlinux + kaslr_offset; + if (!kvtop(NULL, cmdline_vaddr, &cmdline_paddr, verbose)) + return 0; + + if (CRASHDEBUG(1)) { + fprintf(fp, "cmdline vaddr=%lx\n", cmdline_vaddr); + fprintf(fp, "cmdline paddr=%lx\n", cmdline_paddr); + } + + if (!readmem(cmdline_paddr, PHYSADDR, &buf_vaddr, sizeof(ulong), + "saved_command_line", RETURN_ON_ERROR)) + return 0; + + if (!kvtop(NULL, buf_vaddr, &buf_paddr, verbose)) + return 0; + + if (CRASHDEBUG(1)) { + fprintf(fp, "cmdline buffer vaddr=%lx\n", buf_vaddr); + fprintf(fp, "cmdline buffer paddr=%lx\n", buf_paddr); + } + + memset(cmdline, 0, BUFSIZE); + if (!readmem(buf_paddr, PHYSADDR, cmdline, BUFSIZE, + "saved_command_line", RETURN_ON_ERROR)) + return 0; + + ptr = strstr(cmdline, "elfcorehdr="); + if (!ptr) + return 0; + + if (CRASHDEBUG(1)) + fprintf(fp, "2nd kernel detected\n"); + + ptr += strlen("elfcorehdr="); + elfcorehdr_addr = memparse(ptr, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } + + if (CRASHDEBUG(1)) { + fprintf(fp, "elfcorehdr_addr=%lx\n", elfcorehdr_addr); + fprintf(fp, "elfcorehdr_size=%lx\n", elfcorehdr_size); + } + + return elfcorehdr_addr; +} + + /* + * Get vmcoreinfo from elfcorehdr. + * Some codes are imported from Linux kernel(fs/proc/vmcore.c) + */ +static int +get_vmcoreinfo(ulong elfcorehdr, ulong *addr, int *len) +{ + unsigned char e_ident[EI_NIDENT]; + Elf64_Ehdr ehdr; + Elf64_Phdr phdr; + Elf64_Nhdr nhdr; + ulong ptr; + ulong nhdr_offset = 0; + int i; + + if (!readmem(elfcorehdr, PHYSADDR, e_ident, EI_NIDENT, + "EI_NIDENT", RETURN_ON_ERROR)) + return FALSE; + + if (e_ident[EI_CLASS] != ELFCLASS64) { + error(INFO, "Only ELFCLASS64 is supportd\n"); + return FALSE; + } + + if (!readmem(elfcorehdr, PHYSADDR, &ehdr, sizeof(ehdr), + "Elf64_Ehdr", RETURN_ON_ERROR)) + return FALSE; + + /* Sanity Check */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || + ehdr.e_ident[EI_CLASS] != ELFCLASS64 || + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || + ehdr.e_ehsize != sizeof(Elf64_Ehdr) || + ehdr.e_phentsize != sizeof(Elf64_Phdr) || + ehdr.e_phnum == 0) { + error(INFO, "Invalid elf header\n"); + return FALSE; + } + + ptr = elfcorehdr + ehdr.e_phoff; + for (i = 0; i < ehdr.e_phnum; i++) { + ulong offset; + char name[16]; + + if (!readmem(ptr, PHYSADDR, &phdr, sizeof(phdr), + "Elf64_Phdr", RETURN_ON_ERROR)) + return FALSE; + + ptr += sizeof(phdr); + if (phdr.p_type != PT_NOTE) + continue; + + offset = phdr.p_offset; + if (!readmem(offset, PHYSADDR, &nhdr, sizeof(nhdr), + "Elf64_Nhdr", RETURN_ON_ERROR)) + return FALSE; + + offset += DIV_ROUND_UP(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))* + sizeof(Elf64_Word); + memset(name, 0, sizeof(name)); + if (!readmem(offset, PHYSADDR, name, sizeof(name), + "Elf64_Nhdr name", RETURN_ON_ERROR)) + return FALSE; + + if(!strcmp(name, "VMCOREINFO")) { + nhdr_offset = offset; + break; + } + } + + if (!nhdr_offset) + return FALSE; + + *addr = nhdr_offset + + DIV_ROUND_UP(nhdr.n_namesz, sizeof(Elf64_Word))* + sizeof(Elf64_Word); + *len = nhdr.n_descsz; + + if (CRASHDEBUG(1)) { + fprintf(fp, "vmcoreinfo addr=%lx\n", *addr); + fprintf(fp, "vmcoreinfo len=%d\n", *len); + } + + return TRUE; +} + +/* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel. + * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo. + * + * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter + * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel. + * There is nothing to do. + * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo + * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo. + */ +static int +get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset, + ulong *kaslr_offset, ulong *phys_base) +{ + ulong elfcorehdr_addr = 0; + ulong vmcoreinfo_addr; + int vmcoreinfo_len; + char *buf, *pos; + int ret = FALSE; + + /* Find "elfcorehdr=" in the kernel boot parameter */ + elfcorehdr_addr = get_elfcorehdr(cr3, orig_kaslr_offset); + if (!elfcorehdr_addr) + return FALSE; + + /* Get vmcoreinfo from the address of "elfcorehdr=" */ + if (!get_vmcoreinfo(elfcorehdr_addr, &vmcoreinfo_addr, &vmcoreinfo_len)) + return FALSE; + + if (!vmcoreinfo_len) + return FALSE; + + if (CRASHDEBUG(1)) + fprintf(fp, "Find vmcoreinfo in kdump memory\n"); + + buf = GETBUF(vmcoreinfo_len); + if (!readmem(vmcoreinfo_addr, PHYSADDR, buf, vmcoreinfo_len, + "vmcoreinfo", RETURN_ON_ERROR)) + goto quit; + + /* Get phys_base form vmcoreinfo */ + pos = strstr(buf, "NUMBER(phys_base)="); + if (!pos) + goto quit; + *phys_base = strtoull(pos + strlen("NUMBER(phys_base)="), NULL, 0); + + /* Get kaslr_offset form vmcoreinfo */ + pos = strstr(buf, "KERNELOFFSET="); + if (!pos) + goto quit; + *kaslr_offset = strtoull(pos + strlen("KERNELOFFSET="), NULL, 16); + + ret = TRUE; + +quit: + FREEBUF(buf); + return ret; +} + +/* + * Calculate kaslr_offset and phys_base + * + * kaslr_offset: + * The difference between original address in System.map or vmlinux and + * actual address placed randomly by kaslr feature. To be more accurate, + * kaslr_offset = actual address - original address + * + * phys_base: + * Physical address where the kerenel is placed. In other words, it's a + * physical address of __START_KERNEL_map. This is also decided randomly by + * kaslr. + * + * kaslr offset and phys_base are calculated as follows: + * + * kaslr_offset: + * 1) Get IDTR and CR3 value from the dump header. + * 2) Get a virtual address of IDT from IDTR value + * --- (A) + * 3) Translate (A) to physical address using CR3, which points a top of + * page table. + * --- (B) + * 4) Get an address of vector0 (Devide Error) interrupt handler from + * IDT, which are pointed by (B). + * --- (C) + * 5) Get an address of symbol "divide_error" form vmlinux + * --- (D) + * + * Now we have two addresses: + * (C)-> Actual address of "divide_error" + * (D)-> Original address of "divide_error" in the vmlinux + * + * kaslr_offset can be calculated by the difference between these two + * value. + * + * phys_base; + * 1) Get IDT virtual address from vmlinux + * --- (E) + * + * So phys_base can be calculated using relationship of directly mapped + * address. + * + * phys_base = + * Physical address(B) - + * (Virtual address(E) + kaslr_offset - __START_KERNEL_map) + * + * Note that the address (A) cannot be used instead of (E) because (A) is + * not direct map address, it's a fixed map address. + * + * This solution works in most every case, but does not work in the + * following case. + * + * 1) If the dump is captured on early stage of kernel boot, IDTR points + * early IDT table(early_idts) instead of normal IDT(idt_table). + * 2) If the dump is captured whle kdump is working, IDTR points + * IDT table of 2nd kernel, not 1st kernel. + * + * Current implementation does not support the case 1), need + * enhancement in the future. For the case 2), get kaslr_offset and + * phys_base as follows. + * + * 1) Get kaslr_offset and phys_base using the above solution. + * 2) Get kernel boot parameter from "saved_command_line" + * 3) If "elfcorehdr=" is not included in boot parameter, we are in the + * first kernel, nothing to do any more. + * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd + * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and + * get kaslr_offset and phys_base from vmcoreinfo. + */ +int +sadump_calc_kaslr_offset(ulong *kaslr_offset) +{ + ulong phys_base = 0; + struct sadump_smram_cpu_state scs; + uint64_t idtr = 0, cr3 = 0, idtr_paddr; + ulong divide_error_vmcore; + ulong kaslr_offset_kdump, phys_base_kdump; + int ret = FALSE; + int verbose = CRASHDEBUG(1)? 1: 0; + + if (!machine_type("X86_64")) + return FALSE; + + memset(&scs, 0, sizeof(scs)); + get_sadump_smram_cpu_state_any(&scs); + cr3 = scs.Cr3; + idtr = ((uint64_t)scs.IdtUpper)<<32 | (uint64_t)scs.IdtLower; + + /* + * Set up for kvtop. + * + * calc_kaslr_offset() is called before machdep_init(PRE_GDB), so some + * variables are not initialized yet. Set up them here to call kvtop(). + * + * TODO: XEN and 5-level is not supported + */ + vt->kernel_pgd[0] = cr3; + machdep->machspec->last_pml4_read = vt->kernel_pgd[0]; + machdep->machspec->physical_mask_shift = __PHYSICAL_MASK_SHIFT_2_6; + machdep->machspec->pgdir_shift = PGDIR_SHIFT; + if (!readmem(cr3, PHYSADDR, machdep->machspec->pml4, PAGESIZE(), + "cr3", RETURN_ON_ERROR)) + goto quit; + + /* Convert virtual address of IDT table to physical address */ + if (!kvtop(NULL, idtr, &idtr_paddr, verbose)) + goto quit; + + /* Now we can calculate kaslr_offset and phys_base */ + divide_error_vmcore = get_vec0_addr(idtr_paddr); + *kaslr_offset = divide_error_vmcore - st->divide_error_vmlinux; + phys_base = idtr_paddr - + (st->idt_table_vmlinux + *kaslr_offset - __START_KERNEL_map); + + if (CRASHDEBUG(1)) { + fprintf(fp, "calc_kaslr_offset: idtr=%lx\n", idtr); + fprintf(fp, "calc_kaslr_offset: cr3=%lx\n", cr3); + fprintf(fp, "calc_kaslr_offset: idtr(phys)=%lx\n", idtr_paddr); + fprintf(fp, "calc_kaslr_offset: divide_error(vmlinux): %lx\n", + st->divide_error_vmlinux); + fprintf(fp, "calc_kaslr_offset: divide_error(vmcore): %lx\n", + divide_error_vmcore); + } + + /* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd + * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base + * from vmcoreinfo + */ + if (get_kaslr_offset_from_vmcoreinfo( + cr3, *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) { + *kaslr_offset = kaslr_offset_kdump; + phys_base = phys_base_kdump; + } + + if (CRASHDEBUG(1)) { + fprintf(fp, "calc_kaslr_offset: kaslr_offset=%lx\n", + *kaslr_offset); + fprintf(fp, "calc_kaslr_offset: phys_base=%lx\n", phys_base); + } + + sd->phys_base = phys_base; + ret = TRUE; +quit: + vt->kernel_pgd[0] = 0; + machdep->machspec->last_pml4_read = 0; + return ret; +} +#else +int +sadump_calc_kaslr_offset(ulong *kaslr_offset) +{ + return FALSE; +} +#endif /* X86_64 */ diff --git a/sadump.h b/sadump.h index 7f8e384..681f5e4 100644 --- a/sadump.h +++ b/sadump.h @@ -219,6 +219,7 @@ struct sadump_data { ulonglong backup_offset; uint64_t max_mapnr; + ulong phys_base; }; struct sadump_data *sadump_get_sadump_data(void); diff --git a/symbols.c b/symbols.c index 02cb34e..b2f2796 100644 --- a/symbols.c +++ b/symbols.c @@ -624,6 +624,9 @@ kaslr_init(void) st->_stext_vmlinux = UNINITIALIZED; } } + + if (SADUMP_DUMPFILE()) + kt->flags2 |= KASLR_CHECK; } /* @@ -637,6 +640,19 @@ derive_kaslr_offset(bfd *abfd, int dynamic, bfd_byte *start, bfd_byte *end, unsigned long relocate; ulong _stext_relocated; + if (SADUMP_DUMPFILE()) { + ulong kaslr_offset = 0; + + sadump_calc_kaslr_offset(&kaslr_offset); + + if (kaslr_offset) { + kt->relocate = kaslr_offset * -1; + kt->flags |= RELOC_SET; + } + + return; + } + if (ACTIVE()) { _stext_relocated = symbol_value_from_proc_kallsyms("_stext"); if (_stext_relocated == BADVAL) @@ -3052,6 +3068,16 @@ dump_symbol_table(void) else fprintf(fp, "\n"); + if (SADUMP_DUMPFILE()) { + fprintf(fp, "divide_error_vmlinux: %lx\n", st->divide_error_vmlinux); + fprintf(fp, " idt_table_vmlinux: %lx\n", st->idt_table_vmlinux); + fprintf(fp, "saved_command_line_vmlinux: %lx\n", st->saved_command_line_vmlinux); + } else { + fprintf(fp, "divide_error_vmlinux: (unused)\n"); + fprintf(fp, " idt_table_vmlinux: (unused)\n"); + fprintf(fp, "saved_command_line_vmlinux: (unused)\n"); + } + fprintf(fp, " symval_hash[%d]: %lx\n", SYMVAL_HASH, (ulong)&st->symval_hash[0]); @@ -12246,6 +12272,24 @@ numeric_forward(const void *P_x, const void *P_y) } } + if (SADUMP_DUMPFILE()) { + /* Need for kaslr_offset and phys_base */ + if (STREQ(x->name, "divide_error")) + st->divide_error_vmlinux = valueof(x); + else if (STREQ(y->name, "divide_error")) + st->divide_error_vmlinux = valueof(y); + + if (STREQ(x->name, "idt_table")) + st->idt_table_vmlinux = valueof(x); + else if (STREQ(y->name, "idt_table")) + st->idt_table_vmlinux = valueof(y); + + if (STREQ(x->name, "saved_command_line")) + st->saved_command_line_vmlinux = valueof(x); + else if (STREQ(y->name, "saved_command_line")) + st->saved_command_line_vmlinux = valueof(y); + } + xs = bfd_get_section(x); ys = bfd_get_section(y); diff --git a/x86_64.c b/x86_64.c index 6e60dda..2f9e6db 100644 --- a/x86_64.c +++ b/x86_64.c @@ -194,6 +194,9 @@ x86_64_init(int when) machdep->machspec->kernel_image_size = dtol(string, QUIET, NULL); free(string); } + if (SADUMP_DUMPFILE()) + /* Need for calculation of kaslr_offset and phys_base */ + machdep->kvtop = x86_64_kvtop; break; case PRE_GDB: @@ -2019,6 +2022,22 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo ulong pte; physaddr_t physpage; + if (SADUMP_DUMPFILE() && !(machdep->flags & KSYMS_START)) { + /* + * In the case of sadump, to calculate kaslr_offset and + * phys_base, kvtop is called during symtab_init(). In this + * stage phys_base is not initialized yet and x86_64_VTOP() + * does not work. Jump to the code of pagetable translation. + */ + FILL_PML4(); + pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr); + if (verbose) { + fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]); + fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4); + } + goto start_vtop_with_pagetable; + } + if (!IS_KVADDR(kvaddr)) return FALSE; @@ -2065,6 +2084,8 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4); } } + +start_vtop_with_pagetable: if (!(*pml4) & _PAGE_PRESENT) goto no_kpage; pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK; commit 4550bf32a5ec1d9b7b6d5099aaee6e8e363a7827 Author: Dave Anderson Date: Wed Oct 25 11:04:53 2017 -0400 Implemented a new "ps -y policy" option to filter the task display by scheduling policy. Applicable to both standalone ps invocation as well as via foreach. (oleksandr@redhat.com) diff --git a/defs.h b/defs.h index 76e5512..4b4e331 100644 --- a/defs.h +++ b/defs.h @@ -1139,6 +1139,7 @@ extern struct machdep_table *machdep; #define FOREACH_a_FLAG (0x4000000) #define FOREACH_G_FLAG (0x8000000) #define FOREACH_F_FLAG2 (0x10000000) +#define FOREACH_y_FLAG (0x20000000) #define FOREACH_PS_EXCLUSIVE \ (FOREACH_g_FLAG|FOREACH_a_FLAG|FOREACH_t_FLAG|FOREACH_c_FLAG|FOREACH_p_FLAG|FOREACH_l_FLAG|FOREACH_r_FLAG|FOREACH_m_FLAG) @@ -1162,6 +1163,7 @@ struct foreach_data { int comms; int args; int regexs; + int policy; }; struct reference { @@ -1992,6 +1994,7 @@ struct offset_table { /* stash of commonly-used offsets */ long mod_arch_specific_num_orcs; long mod_arch_specific_orc_unwind_ip; long mod_arch_specific_orc_unwind; + long task_struct_policy; }; struct size_table { /* stash of commonly-used sizes */ @@ -2141,6 +2144,7 @@ struct size_table { /* stash of commonly-used sizes */ long sk_buff_head_qlen; long sk_buff_len; long orc_entry; + long task_struct_policy; }; struct array_table { @@ -4576,6 +4580,13 @@ enum type_code { */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define SCHED_NORMAL 0 +#define SCHED_FIFO 1 +#define SCHED_RR 2 +#define SCHED_BATCH 3 +#define SCHED_ISO 4 +#define SCHED_IDLE 5 +#define SCHED_DEADLINE 6 extern long _ZOMBIE_; #define IS_ZOMBIE(task) (task_state(task) & _ZOMBIE_) @@ -4603,6 +4614,7 @@ extern long _ZOMBIE_; #define PS_NO_HEADER (0x10000) #define PS_MSECS (0x20000) #define PS_SUMMARY (0x40000) +#define PS_POLICY (0x80000) #define PS_EXCLUSIVE (PS_TGID_LIST|PS_ARGV_ENVP|PS_TIMES|PS_CHILD_LIST|PS_PPID_LIST|PS_LAST_RUN|PS_RLIMIT|PS_MSECS|PS_SUMMARY) @@ -4620,6 +4632,7 @@ struct psinfo { } regex_data[MAX_PS_ARGS]; int regexs; ulong *cpus; + int policy; }; #define IS_A_NUMBER(X) (decimal(X, 0) || hexadecimal(X, 0)) @@ -4823,7 +4836,7 @@ char *strip_ending_char(char *, char); char *strip_beginning_char(char *, char); char *strip_comma(char *); char *strip_hex(char *); -char *upper_case(char *, char *); +char *upper_case(const char *, char *); char *first_nonspace(char *); char *first_space(char *); char *replace_string(char *, char *, char); diff --git a/help.c b/help.c index f9c5792..efa55e0 100644 --- a/help.c +++ b/help.c @@ -844,7 +844,7 @@ char *help_foreach[] = { " net run the \"net\" command (optional flags: -s -S -R -d -x)", " set run the \"set\" command", " ps run the \"ps\" command (optional flags: -G -s -p -c -t -l -a", -" -g -r)", +" -g -r -y)", " sig run the \"sig\" command (optional flag: -g)", " vtop run the \"vtop\" command (optional flags: -c -u -k)\n", " flag Pass this optional flag to the command selected.", @@ -1250,7 +1250,7 @@ NULL char *help_ps[] = { "ps", "display process status information", -"[-k|-u|-G] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S]\n [pid | task | command] ...", +"[-k|-u|-G|-y policy] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S]\n [pid | task | command] ...", " This command displays process status for selected, or all, processes" , " in the system. If no arguments are entered, the process data is", " is displayed for all processes. Specific processes may be selected", @@ -1267,6 +1267,16 @@ char *help_ps[] = { " -k restrict the output to only kernel threads.", " -u restrict the output to only user tasks.", " -G display only the thread group leader in a thread group.", +" -y policy restrict the output to tasks having a specified scheduling policy", +" expressed by its integer value or by its (case-insensitive) name;", +" multiple policies may be entered in a comma-separated list:", +" 0 or NORMAL", +" 1 or FIFO", +" 2 or RR", +" 3 or BATCH", +" 4 or ISO", +" 5 or IDLE", +" 6 or DEADLINE", " ", " The process identifier types may be mixed. For each task, the following", " items are displayed:", diff --git a/symbols.c b/symbols.c index b2f2796..f7599e8 100644 --- a/symbols.c +++ b/symbols.c @@ -8584,6 +8584,8 @@ dump_offset_table(char *spec, ulong makestruct) OFFSET(task_struct_prio)); fprintf(fp, " task_struct_on_rq: %ld\n", OFFSET(task_struct_on_rq)); + fprintf(fp, " task_struct_policy: %ld\n", + OFFSET(task_struct_policy)); fprintf(fp, " thread_info_task: %ld\n", OFFSET(thread_info_task)); @@ -10211,6 +10213,7 @@ dump_offset_table(char *spec, ulong makestruct) fprintf(fp, " pt_regs: %ld\n", SIZE(pt_regs)); fprintf(fp, " task_struct: %ld\n", SIZE(task_struct)); fprintf(fp, " task_struct_flags: %ld\n", SIZE(task_struct_flags)); + fprintf(fp, " task_struct_policy: %ld\n", SIZE(task_struct_policy)); fprintf(fp, " thread_info: %ld\n", SIZE(thread_info)); fprintf(fp, " softirq_state: %ld\n", SIZE(softirq_state)); diff --git a/task.c b/task.c index 362822c..5754159 100644 --- a/task.c +++ b/task.c @@ -109,6 +109,24 @@ static void show_ps_summary(ulong); static void irqstacks_init(void); static void parse_task_thread(int argcnt, char *arglist[], struct task_context *); static void stack_overflow_check_init(void); +static int has_sched_policy(ulong, ulong); +static ulong task_policy(ulong); +static ulong sched_policy_bit_from_str(const char *); +static ulong make_sched_policy(const char *); + +static struct sched_policy_info { + ulong value; + char *name; +} sched_policy_info[] = { + { SCHED_NORMAL, "NORMAL" }, + { SCHED_FIFO, "FIFO" }, + { SCHED_RR, "RR" }, + { SCHED_BATCH, "BATCH" }, + { SCHED_ISO, "ISO" }, + { SCHED_IDLE, "IDLE" }, + { SCHED_DEADLINE, "DEADLINE" }, + { ULONG_MAX, NULL } +}; /* * Figure out how much space will be required to hold the task context @@ -273,6 +291,8 @@ task_init(void) MEMBER_OFFSET_INIT(task_struct_next_run, "task_struct", "next_run"); MEMBER_OFFSET_INIT(task_struct_flags, "task_struct", "flags"); MEMBER_SIZE_INIT(task_struct_flags, "task_struct", "flags"); + MEMBER_OFFSET_INIT(task_struct_policy, "task_struct", "policy"); + MEMBER_SIZE_INIT(task_struct_policy, "task_struct", "policy"); MEMBER_OFFSET_INIT(task_struct_pidhash_next, "task_struct", "pidhash_next"); MEMBER_OFFSET_INIT(task_struct_pgrp, "task_struct", "pgrp"); @@ -2974,7 +2994,7 @@ cmd_ps(void) cpuspec = NULL; flag = 0; - while ((c = getopt(argcnt, args, "SgstcpkuGlmarC:")) != EOF) { + while ((c = getopt(argcnt, args, "SgstcpkuGlmarC:y:")) != EOF) { switch(c) { case 'k': @@ -3075,6 +3095,11 @@ cmd_ps(void) make_cpumask(cpuspec, psinfo.cpus, FAULT_ON_ERROR, NULL); break; + case 'y': + flag |= PS_POLICY; + psinfo.policy = make_sched_policy(optarg); + break; + default: argerrs++; break; @@ -3218,6 +3243,8 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi) return; if ((flag & PS_KERNEL) && !is_kernel_thread(tc->task)) return; + if ((flag & PS_POLICY) && !has_sched_policy(tc->task, psi->policy)) + return; if (flag & PS_GROUP) { if (flag & (PS_LAST_RUN|PS_MSECS)) error(FATAL, "-G not supported with -%c option\n", @@ -3336,7 +3363,7 @@ show_ps(ulong flag, struct psinfo *psi) tc = FIRST_CONTEXT(); for (i = 0; i < RUNNING_TASKS(); i++, tc++) - show_ps_data(flag, tc, NULL); + show_ps_data(flag, tc, psi); return; } @@ -3391,7 +3418,7 @@ show_ps(ulong flag, struct psinfo *psi) if (flag & PS_TIMES) show_task_times(tc, flag); else - show_ps_data(flag, tc, NULL); + show_ps_data(flag, tc, psi); } } } @@ -3546,7 +3573,7 @@ show_milliseconds(struct task_context *tc, struct psinfo *psi) sprintf(format, "[%c%dll%c] ", '%', c, pc->output_radix == 10 ? 'u' : 'x'); - if (psi) { + if (psi && psi->cpus) { for (c = others = 0; c < kt->cpus; c++) { if (!NUM_IN_BITMAP(psi->cpus, c)) continue; @@ -5366,6 +5393,27 @@ task_flags(ulong task) } /* + * Return task's policy as bitmask bit. + */ +static ulong +task_policy(ulong task) +{ + ulong policy = 0; + + fill_task_struct(task); + + if (!tt->last_task_read) + return policy; + + if (SIZE(task_struct_policy) == sizeof(unsigned int)) + policy = 1 << UINT(tt->task_struct + OFFSET(task_struct_policy)); + else + policy = 1 << ULONG(tt->task_struct + OFFSET(task_struct_policy)); + + return policy; +} + +/* * Return a task's tgid. */ ulong @@ -5797,7 +5845,7 @@ cmd_foreach(void) BZERO(&foreach_data, sizeof(struct foreach_data)); fd = &foreach_data; - while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaG")) != EOF) { + while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaGy:")) != EOF) { switch(c) { case 'R': @@ -5892,6 +5940,11 @@ cmd_foreach(void) fd->flags |= FOREACH_G_FLAG; break; + case 'y': + fd->flags |= FOREACH_y_FLAG; + fd->policy = make_sched_policy(optarg); + break; + default: argerrs++; break; @@ -6554,6 +6607,10 @@ foreach(struct foreach_data *fd) cmdflags |= PS_GROUP; if (fd->flags & FOREACH_s_FLAG) cmdflags |= PS_KSTACKP; + if (fd->flags & FOREACH_y_FLAG) { + cmdflags |= PS_POLICY; + psinfo.policy = fd->policy; + } /* * mutually exclusive flags */ @@ -7389,6 +7446,82 @@ is_kernel_thread(ulong task) } /* + * Checks if task policy corresponds to given mask. + */ +static int +has_sched_policy(ulong task, ulong policy) +{ + return !!(task_policy(task) & policy); +} + +/* + * Converts sched policy name into mask bit. + */ +static ulong +sched_policy_bit_from_str(const char *policy_str) +{ + struct sched_policy_info *info = NULL; + ulong policy = 0; + int found = 0; + char *upper = NULL; + /* + * Once kernel gets more than 10 scheduling policies, + * sizes of these arrays should be adjusted + */ + char digit[2] = { 0, 0 }; + char hex[4] = { 0, 0, 0, 0 }; + + upper = GETBUF(strlen(policy_str) + 1); + upper_case(policy_str, upper); + + for (info = sched_policy_info; info->name; info++) { + snprintf(digit, sizeof digit, "%lu", info->value); + /* + * Not using %#lX format here since "0X" prefix + * is not prepended if 0 value is given + */ + snprintf(hex, sizeof hex, "0X%lX", info->value); + if (strncmp(upper, info->name, strlen(info->name)) == 0 || + strncmp(upper, digit, sizeof digit) == 0 || + strncmp(upper, hex, sizeof hex) == 0) { + policy = 1 << info->value; + found = 1; + break; + } + } + + FREEBUF(upper); + + if (!found) + error(FATAL, + "%s: invalid scheduling policy\n", policy_str); + + return policy; +} + +/* + * Converts sched policy string set into bitmask. + */ +static ulong +make_sched_policy(const char *policy_str) +{ + ulong policy = 0; + char *iter = NULL; + char *orig = NULL; + char *cur = NULL; + + iter = STRDUPBUF(policy_str); + orig = iter; + + while ((cur = strsep(&iter, ","))) + policy |= sched_policy_bit_from_str(cur); + + FREEBUF(orig); + + return policy; +} + +/* * Gather an arry of pointers to the per-cpu idle tasks. The tasklist * argument must be at least the size of ulong[NR_CPUS]. There may be * junk in everything after the first entry on a single CPU box, so the diff --git a/tools.c b/tools.c index 886d7fb..186b703 100644 --- a/tools.c +++ b/tools.c @@ -423,9 +423,10 @@ strip_hex(char *line) * Turn a string into upper-case. */ char * -upper_case(char *s, char *buf) +upper_case(const char *s, char *buf) { - char *p1, *p2; + const char *p1; + char *p2; p1 = s; p2 = buf; commit d3909692e9f64e4a1ac440afa81e9efd6e9ea0b4 Author: Dave Anderson Date: Thu Oct 26 16:54:28 2017 -0400 Fix for the "kmem -[sS]" options on Linux 4.14 and later kernels that contain commit 2482ddec670fb83717d129012bc558777cb159f7, titled "mm: add SLUB free list pointer obfuscation". Without the patch, there will numerous error messages of the type "kmem: slab:
invalid freepointer: ". (anderson@redhat.com) diff --git a/defs.h b/defs.h index 4b4e331..967fce0 100644 --- a/defs.h +++ b/defs.h @@ -1995,6 +1995,7 @@ struct offset_table { /* stash of commonly-used offsets */ long mod_arch_specific_orc_unwind_ip; long mod_arch_specific_orc_unwind; long task_struct_policy; + long kmem_cache_random; }; struct size_table { /* stash of commonly-used sizes */ diff --git a/memory.c b/memory.c index fb534e8..9926199 100644 --- a/memory.c +++ b/memory.c @@ -75,7 +75,7 @@ struct meminfo { /* general purpose memory information structure */ ulong container; int *freelist; int freelist_index_size; - + ulong random; }; /* @@ -293,6 +293,7 @@ static void dump_per_cpu_offsets(void); static void dump_page_flags(ulonglong); static ulong kmem_cache_nodelists(ulong); static void dump_hstates(void); +static ulong freelist_ptr(struct meminfo *, ulong, ulong); /* * Memory display modes specific to this file. @@ -726,6 +727,7 @@ vm_init(void) MEMBER_OFFSET_INIT(kmem_cache_red_left_pad, "kmem_cache", "red_left_pad"); MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name"); MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags"); + MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random"); MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist"); MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page"); MEMBER_OFFSET_INIT(kmem_cache_cpu_node, "kmem_cache_cpu", "node"); @@ -18000,6 +18002,9 @@ dump_kmem_cache_slub(struct meminfo *si) si->slabsize = (PAGESIZE() << order); si->inuse = si->num_slabs = 0; si->slab_offset = offset; + si->random = VALID_MEMBER(kmem_cache_random) ? + ULONG(si->cache_buf + OFFSET(kmem_cache_random)) : 0; + if (!get_kmem_cache_slub_data(GET_SLUB_SLABS, si) || !get_kmem_cache_slub_data(GET_SLUB_OBJECTS, si)) si->flags |= SLAB_GATHER_FAILURE; @@ -18587,6 +18592,15 @@ count_free_objects(struct meminfo *si, ulong freelist) return c; } +static ulong +freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr) +{ + if (si->random) + /* CONFIG_SLAB_FREELIST_HARDENED */ + return (ptr ^ si->random ^ ptr_addr); + else + return ptr; +} static ulong get_freepointer(struct meminfo *si, void *object) @@ -18601,7 +18615,7 @@ get_freepointer(struct meminfo *si, void *object) return BADADDR; } - return nextfree; + return (freelist_ptr(si, nextfree, vaddr)); } static void diff --git a/symbols.c b/symbols.c index f7599e8..8a4c878 100644 --- a/symbols.c +++ b/symbols.c @@ -9378,6 +9378,8 @@ dump_offset_table(char *spec, ulong makestruct) OFFSET(kmem_cache_cpu_cache)); fprintf(fp, " kmem_cache_oo: %ld\n", OFFSET(kmem_cache_oo)); + fprintf(fp, " kmem_cache_random: %ld\n", + OFFSET(kmem_cache_random)); fprintf(fp, " kmem_cache_node_nr_partial: %ld\n", OFFSET(kmem_cache_node_nr_partial)); commit e81db08bc69fb1a7a7e48f892c2038d992a71f6d Author: Dave Anderson Date: Fri Oct 27 14:10:43 2017 -0400 Fix for the validation of the bits located in the least signficant bits of mem_section.section_mem_map pointers. Without the patch, the validation functions always returned valid, due to a coding error found by clang. However, it was never really a problem because it is extremely unlikely that an existing mem_section would ever be invalid. (oleksandr@redhat.com, anderson@redhat.com) diff --git a/memory.c b/memory.c index 9926199..60594a4 100644 --- a/memory.c +++ b/memory.c @@ -17003,8 +17003,8 @@ valid_section(ulong addr) if ((mem_section = read_mem_section(addr))) return (ULONG(mem_section + - OFFSET(mem_section_section_mem_map)) && - SECTION_MARKED_PRESENT); + OFFSET(mem_section_section_mem_map)) + & SECTION_MARKED_PRESENT); return 0; } @@ -17012,11 +17012,17 @@ int section_has_mem_map(ulong addr) { char *mem_section; + ulong kernel_version_bit; + + if (THIS_KERNEL_VERSION >= LINUX(2,6,24)) + kernel_version_bit = SECTION_HAS_MEM_MAP; + else + kernel_version_bit = SECTION_MARKED_PRESENT; if ((mem_section = read_mem_section(addr))) return (ULONG(mem_section + OFFSET(mem_section_section_mem_map)) - && SECTION_HAS_MEM_MAP); + & kernel_version_bit); return 0; } commit 0f40db8fbac538ea448bbb2beb44912e4c43a54a Author: Dave Anderson Date: Mon Oct 30 14:20:41 2017 -0400 Fix for the x86_64 kernel virtual address to physical address translation mechanism. Without the patch, when verifying that the PAGE_PRESENT bit is set in the top-level page table, it would always test positively, and the translation would continue parsing the remainder of the page tables. This would virtually never be a problem in practice because if the top-level page table entry existed, its PAGE_PRESENT bit would be set. (oleksandr@redhat.com, anderson@redhat.com) diff --git a/x86_64.c b/x86_64.c index 2f9e6db..7d01140 100644 --- a/x86_64.c +++ b/x86_64.c @@ -2086,7 +2086,7 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo } start_vtop_with_pagetable: - if (!(*pml4) & _PAGE_PRESENT) + if (!(*pml4 & _PAGE_PRESENT)) goto no_kpage; pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK; FILL_PGD(pgd_paddr, PHYSADDR, PAGESIZE()); @@ -2187,7 +2187,7 @@ x86_64_kvtop_xen_wpt(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, i fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]); fprintf(fp, "PAGE DIRECTORY: %lx [machine]\n", *pml4); } - if (!(*pml4) & _PAGE_PRESENT) + if (!(*pml4 & _PAGE_PRESENT)) goto no_kpage; pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK; pgd_paddr = xen_m2p(pgd_paddr); commit 9339874f3764fe99a408aec1a814b19c77f5dfe1 Author: Dave Anderson Date: Mon Oct 30 14:33:15 2017 -0400 Removed a check for a negative block_size value which is always a non-negative unsigned value in the SADUMP header parsing function. (oleksandr@redhat.com) diff --git a/sadump.c b/sadump.c index 2ccfa82..6b912d4 100644 --- a/sadump.c +++ b/sadump.c @@ -157,9 +157,6 @@ read_dump_header(char *file) } restart: - if (block_size < 0) - return FALSE; - if (!read_device(sph, block_size, &offset)) { error(INFO, "sadump: cannot read partition header\n"); goto err; commit b2d1bba766118fddf43235f0bed483dff32ac6e0 Author: Dave Anderson Date: Mon Oct 30 14:46:32 2017 -0400 Removed a check for an impossible negative value when calculating the beginning address when applying the context value specified by the "search -x " option. (oleksandr@redhat.com) diff --git a/memory.c b/memory.c index 60594a4..ebd671a 100644 --- a/memory.c +++ b/memory.c @@ -14467,7 +14467,7 @@ display_with_pre_and_post(void *bufptr, ulonglong addr, struct searchinfo *si) } amount = ctx * t; - addr_d = addr - amount < 0 ? 0 : addr - amount; + addr_d = addr - amount; display_memory(addr_d, ctx, flag, memtype, NULL); commit e2efacdd9b7b229747a78c743b2acc6d15280a8a Author: Dave Anderson Date: Mon Oct 30 16:49:48 2017 -0400 Implemented a new "timer -C " option that restricts the timer or hrtimer output to the timer queue data associated with one or more cpus. For multiple cpus, the cpu-specifier uses the standard comma or dash separated list format. (oleksandr@redhat.com) diff --git a/help.c b/help.c index efa55e0..f7f61a1 100644 --- a/help.c +++ b/help.c @@ -2387,7 +2387,7 @@ NULL char *help_timer[] = { "timer", "timer queue data", -"[-r]", +"[-r][-C cpu]", " This command displays the timer queue entries, both old- and new-style,", " in chronological order. In the case of the old-style timers, the", " timer_table array index is shown; in the case of the new-style timers, ", @@ -2397,6 +2397,8 @@ char *help_timer[] = { " chronological order. In the case of the old-style hrtimers, the", " expiration time is a single value; in the new-style hrtimers, the", " expiration time is a range.", +" -C cpu Restrict the output to one or more CPUs, where multiple cpu[s] can", +" be specified, for example, as \"1,3,5\", \"1-3\", or \"1,3,5-7,10\".", "\nEXAMPLES", " %s> timer", " JIFFIES", diff --git a/kernel.c b/kernel.c index 8e95573..4638495 100644 --- a/kernel.c +++ b/kernel.c @@ -38,18 +38,18 @@ static void display_bh_1(void); static void display_bh_2(void); static void display_bh_3(void); static void display_bh_4(void); -static void dump_hrtimer_data(void); +static void dump_hrtimer_data(const ulong *cpus); static void dump_hrtimer_clock_base(const void *, const int); static void dump_hrtimer_base(const void *, const int); static void dump_active_timers(const void *, ulonglong); static int get_expires_len(const int, const ulong *, const int); static void print_timer(const void *); static ulonglong ktime_to_ns(const void *); -static void dump_timer_data(void); -static void dump_timer_data_tvec_bases_v1(void); -static void dump_timer_data_tvec_bases_v2(void); -static void dump_timer_data_tvec_bases_v3(void); -static void dump_timer_data_timer_bases(void); +static void dump_timer_data(const ulong *cpus); +static void dump_timer_data_tvec_bases_v1(const ulong *cpus); +static void dump_timer_data_tvec_bases_v2(const ulong *cpus); +static void dump_timer_data_tvec_bases_v3(const ulong *cpus); +static void dump_timer_data_timer_bases(const ulong *cpus); struct tv_range; static void init_tv_ranges(struct tv_range *, int, int, int); static int do_timer_list(ulong,int, ulong *, void *,ulong *,struct tv_range *); @@ -7353,16 +7353,24 @@ cmd_timer(void) { int c; int rflag; + char *cpuspec; + ulong *cpus = NULL; rflag = 0; - while ((c = getopt(argcnt, args, "r")) != EOF) { + while ((c = getopt(argcnt, args, "rC:")) != EOF) { switch(c) { case 'r': rflag = 1; break; + case 'C': + cpuspec = optarg; + cpus = get_cpumask_buf(); + make_cpumask(cpuspec, cpus, FAULT_ON_ERROR, NULL); + break; + default: argerrs++; break; @@ -7373,15 +7381,18 @@ cmd_timer(void) cmd_usage(pc->curcmd, SYNOPSIS); if (rflag) - dump_hrtimer_data(); + dump_hrtimer_data(cpus); else - dump_timer_data(); + dump_timer_data(cpus); + + if (cpus) + FREEBUF(cpus); } static void -dump_hrtimer_data(void) +dump_hrtimer_data(const ulong *cpus) { - int i, j; + int i, j, k = 0; int hrtimer_max_clock_bases, max_hrtimer_bases; struct syment * hrtimer_bases; @@ -7405,7 +7416,10 @@ dump_hrtimer_data(void) hrtimer_bases = per_cpu_symbol_search("hrtimer_bases"); for (i = 0; i < kt->cpus; i++) { - if (i) + if (cpus && !NUM_IN_BITMAP(cpus, i)) + continue; + + if (k++) fprintf(fp, "\n"); if (hide_offline_cpu(i)) { @@ -7752,7 +7766,7 @@ struct tv_range { #define TVN (6) static void -dump_timer_data(void) +dump_timer_data(const ulong *cpus) { int i; ulong timer_active; @@ -7773,16 +7787,16 @@ dump_timer_data(void) struct tv_range tv[TVN]; if (kt->flags2 & TIMER_BASES) { - dump_timer_data_timer_bases(); + dump_timer_data_timer_bases(cpus); return; } else if (kt->flags2 & TVEC_BASES_V3) { - dump_timer_data_tvec_bases_v3(); + dump_timer_data_tvec_bases_v3(cpus); return; } else if (kt->flags & TVEC_BASES_V2) { - dump_timer_data_tvec_bases_v2(); + dump_timer_data_tvec_bases_v2(cpus); return; } else if (kt->flags & TVEC_BASES_V1) { - dump_timer_data_tvec_bases_v1(); + dump_timer_data_tvec_bases_v1(cpus); return; } @@ -7924,7 +7938,7 @@ dump_timer_data(void) */ static void -dump_timer_data_tvec_bases_v1(void) +dump_timer_data_tvec_bases_v1(const ulong *cpus) { int i, cpu, tdx, flen; struct timer_data *td; @@ -7947,6 +7961,11 @@ dump_timer_data_tvec_bases_v1(void) cpu = 0; next_cpu: + if (cpus && !NUM_IN_BITMAP(cpus, cpu)) { + if (++cpu < kt->cpus) + goto next_cpu; + return; + } count = 0; td = (struct timer_data *)NULL; @@ -8039,7 +8058,7 @@ next_cpu: */ static void -dump_timer_data_tvec_bases_v2(void) +dump_timer_data_tvec_bases_v2(const ulong *cpus) { int i, cpu, tdx, flen; struct timer_data *td; @@ -8073,6 +8092,11 @@ dump_timer_data_tvec_bases_v2(void) cpu = 0; next_cpu: + if (cpus && !NUM_IN_BITMAP(cpus, cpu)) { + if (++cpu < kt->cpus) + goto next_cpu; + return; + } /* * hide data of offline cpu and goto next cpu */ @@ -8185,7 +8209,7 @@ next_cpu: * Linux 4.2 timers use new tvec_root, tvec and timer_list structures */ static void -dump_timer_data_tvec_bases_v3(void) +dump_timer_data_tvec_bases_v3(const ulong *cpus) { int i, cpu, tdx, flen; struct timer_data *td; @@ -8216,6 +8240,11 @@ dump_timer_data_tvec_bases_v3(void) cpu = 0; next_cpu: + if (cpus && !NUM_IN_BITMAP(cpus, cpu)) { + if (++cpu < kt->cpus) + goto next_cpu; + return; + } /* * hide data of offline cpu and goto next cpu */ @@ -8758,9 +8787,9 @@ do_timer_list_v4(struct timer_bases_data *data) * Linux 4.8 timers use new timer_bases[][] */ static void -dump_timer_data_timer_bases(void) +dump_timer_data_timer_bases(const ulong *cpus) { - int i, cpu, flen, base, nr_bases, found, display; + int i, cpu, flen, base, nr_bases, found, display, j = 0; struct syment *sp; ulong timer_base, jiffies, function; struct timer_bases_data data; @@ -8785,6 +8814,11 @@ dump_timer_data_timer_bases(void) RJUST|LONG_DEC,MKSTR(jiffies))); next_cpu: + if (cpus && !NUM_IN_BITMAP(cpus, cpu)) { + if (++cpu < kt->cpus) + goto next_cpu; + goto done; + } /* * hide data of offline cpu and goto next cpu */ @@ -8803,7 +8837,7 @@ next_cpu: else timer_base = sp->value; - if (cpu) + if (j++) fprintf(fp, "\n"); next_base: