Alexander van Heukelum | 6fcbede | 2008-09-30 13:12:15 +0200 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs |
| 4 | */ |
| 5 | #include <linux/kallsyms.h> |
| 6 | #include <linux/kprobes.h> |
| 7 | #include <linux/uaccess.h> |
| 8 | #include <linux/utsname.h> |
| 9 | #include <linux/hardirq.h> |
| 10 | #include <linux/kdebug.h> |
| 11 | #include <linux/module.h> |
| 12 | #include <linux/ptrace.h> |
| 13 | #include <linux/kexec.h> |
| 14 | #include <linux/bug.h> |
| 15 | #include <linux/nmi.h> |
| 16 | |
| 17 | #include <asm/stacktrace.h> |
| 18 | |
| 19 | int panic_on_unrecovered_nmi; |
| 20 | int kstack_depth_to_print = 12; |
| 21 | static unsigned int code_bytes = 64; |
| 22 | static int die_counter; |
| 23 | |
| 24 | void printk_address(unsigned long address, int reliable) |
| 25 | { |
| 26 | printk(" [<%016lx>] %s%pS\n", |
| 27 | address, reliable ? "" : "? ", (void *) address); |
| 28 | } |
| 29 | |
| 30 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
| 31 | unsigned *usedp, char **idp) |
| 32 | { |
| 33 | static char ids[][8] = { |
| 34 | [DEBUG_STACK - 1] = "#DB", |
| 35 | [NMI_STACK - 1] = "NMI", |
| 36 | [DOUBLEFAULT_STACK - 1] = "#DF", |
| 37 | [STACKFAULT_STACK - 1] = "#SS", |
| 38 | [MCE_STACK - 1] = "#MC", |
| 39 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 40 | [N_EXCEPTION_STACKS ... |
| 41 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" |
| 42 | #endif |
| 43 | }; |
| 44 | unsigned k; |
| 45 | |
| 46 | /* |
| 47 | * Iterate over all exception stacks, and figure out whether |
| 48 | * 'stack' is in one of them: |
| 49 | */ |
| 50 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
| 51 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; |
| 52 | /* |
| 53 | * Is 'stack' above this exception frame's end? |
| 54 | * If yes then skip to the next frame. |
| 55 | */ |
| 56 | if (stack >= end) |
| 57 | continue; |
| 58 | /* |
| 59 | * Is 'stack' above this exception frame's start address? |
| 60 | * If yes then we found the right frame. |
| 61 | */ |
| 62 | if (stack >= end - EXCEPTION_STKSZ) { |
| 63 | /* |
| 64 | * Make sure we only iterate through an exception |
| 65 | * stack once. If it comes up for the second time |
| 66 | * then there's something wrong going on - just |
| 67 | * break out and return NULL: |
| 68 | */ |
| 69 | if (*usedp & (1U << k)) |
| 70 | break; |
| 71 | *usedp |= 1U << k; |
| 72 | *idp = ids[k]; |
| 73 | return (unsigned long *)end; |
| 74 | } |
| 75 | /* |
| 76 | * If this is a debug stack, and if it has a larger size than |
| 77 | * the usual exception stacks, then 'stack' might still |
| 78 | * be within the lower portion of the debug stack: |
| 79 | */ |
| 80 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 81 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { |
| 82 | unsigned j = N_EXCEPTION_STACKS - 1; |
| 83 | |
| 84 | /* |
| 85 | * Black magic. A large debug stack is composed of |
| 86 | * multiple exception stack entries, which we |
| 87 | * iterate through now. Dont look: |
| 88 | */ |
| 89 | do { |
| 90 | ++j; |
| 91 | end -= EXCEPTION_STKSZ; |
| 92 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); |
| 93 | } while (stack < end - EXCEPTION_STKSZ); |
| 94 | if (*usedp & (1U << j)) |
| 95 | break; |
| 96 | *usedp |= 1U << j; |
| 97 | *idp = ids[j]; |
| 98 | return (unsigned long *)end; |
| 99 | } |
| 100 | #endif |
| 101 | } |
| 102 | return NULL; |
| 103 | } |
| 104 | |
| 105 | /* |
| 106 | * x86-64 can have up to three kernel stacks: |
| 107 | * process stack |
| 108 | * interrupt stack |
| 109 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
| 110 | */ |
| 111 | |
| 112 | static inline int valid_stack_ptr(struct thread_info *tinfo, |
| 113 | void *p, unsigned int size, void *end) |
| 114 | { |
| 115 | void *t = tinfo; |
| 116 | if (end) { |
| 117 | if (p < end && p >= (end-THREAD_SIZE)) |
| 118 | return 1; |
| 119 | else |
| 120 | return 0; |
| 121 | } |
| 122 | return p > t && p < t + THREAD_SIZE - size; |
| 123 | } |
| 124 | |
| 125 | /* The form of the top of the frame on the stack */ |
| 126 | struct stack_frame { |
| 127 | struct stack_frame *next_frame; |
| 128 | unsigned long return_address; |
| 129 | }; |
| 130 | |
| 131 | static inline unsigned long |
| 132 | print_context_stack(struct thread_info *tinfo, |
| 133 | unsigned long *stack, unsigned long bp, |
| 134 | const struct stacktrace_ops *ops, void *data, |
| 135 | unsigned long *end) |
| 136 | { |
| 137 | struct stack_frame *frame = (struct stack_frame *)bp; |
| 138 | |
| 139 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { |
| 140 | unsigned long addr; |
| 141 | |
| 142 | addr = *stack; |
| 143 | if (__kernel_text_address(addr)) { |
| 144 | if ((unsigned long) stack == bp + 8) { |
| 145 | ops->address(data, addr, 1); |
| 146 | frame = frame->next_frame; |
| 147 | bp = (unsigned long) frame; |
| 148 | } else { |
| 149 | ops->address(data, addr, bp == 0); |
| 150 | } |
| 151 | } |
| 152 | stack++; |
| 153 | } |
| 154 | return bp; |
| 155 | } |
| 156 | |
| 157 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
| 158 | unsigned long *stack, unsigned long bp, |
| 159 | const struct stacktrace_ops *ops, void *data) |
| 160 | { |
| 161 | const unsigned cpu = get_cpu(); |
| 162 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
| 163 | unsigned used = 0; |
| 164 | struct thread_info *tinfo; |
| 165 | |
| 166 | if (!task) |
| 167 | task = current; |
| 168 | |
| 169 | if (!stack) { |
| 170 | unsigned long dummy; |
| 171 | stack = &dummy; |
| 172 | if (task && task != current) |
| 173 | stack = (unsigned long *)task->thread.sp; |
| 174 | } |
| 175 | |
| 176 | #ifdef CONFIG_FRAME_POINTER |
| 177 | if (!bp) { |
| 178 | if (task == current) { |
| 179 | /* Grab bp right from our regs */ |
| 180 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
| 181 | } else { |
| 182 | /* bp is the last reg pushed by switch_to */ |
| 183 | bp = *(unsigned long *) task->thread.sp; |
| 184 | } |
| 185 | } |
| 186 | #endif |
| 187 | |
| 188 | /* |
| 189 | * Print function call entries in all stacks, starting at the |
| 190 | * current stack address. If the stacks consist of nested |
| 191 | * exceptions |
| 192 | */ |
| 193 | tinfo = task_thread_info(task); |
| 194 | for (;;) { |
| 195 | char *id; |
| 196 | unsigned long *estack_end; |
| 197 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
| 198 | &used, &id); |
| 199 | |
| 200 | if (estack_end) { |
| 201 | if (ops->stack(data, id) < 0) |
| 202 | break; |
| 203 | |
| 204 | bp = print_context_stack(tinfo, stack, bp, ops, |
| 205 | data, estack_end); |
| 206 | ops->stack(data, "<EOE>"); |
| 207 | /* |
| 208 | * We link to the next stack via the |
| 209 | * second-to-last pointer (index -2 to end) in the |
| 210 | * exception stack: |
| 211 | */ |
| 212 | stack = (unsigned long *) estack_end[-2]; |
| 213 | continue; |
| 214 | } |
| 215 | if (irqstack_end) { |
| 216 | unsigned long *irqstack; |
| 217 | irqstack = irqstack_end - |
| 218 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
| 219 | |
| 220 | if (stack >= irqstack && stack < irqstack_end) { |
| 221 | if (ops->stack(data, "IRQ") < 0) |
| 222 | break; |
| 223 | bp = print_context_stack(tinfo, stack, bp, |
| 224 | ops, data, irqstack_end); |
| 225 | /* |
| 226 | * We link to the next stack (which would be |
| 227 | * the process stack normally) the last |
| 228 | * pointer (index -1 to end) in the IRQ stack: |
| 229 | */ |
| 230 | stack = (unsigned long *) (irqstack_end[-1]); |
| 231 | irqstack_end = NULL; |
| 232 | ops->stack(data, "EOI"); |
| 233 | continue; |
| 234 | } |
| 235 | } |
| 236 | break; |
| 237 | } |
| 238 | |
| 239 | /* |
| 240 | * This handles the process stack: |
| 241 | */ |
| 242 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); |
| 243 | put_cpu(); |
| 244 | } |
| 245 | EXPORT_SYMBOL(dump_trace); |
| 246 | |
| 247 | static void |
| 248 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) |
| 249 | { |
| 250 | print_symbol(msg, symbol); |
| 251 | printk("\n"); |
| 252 | } |
| 253 | |
| 254 | static void print_trace_warning(void *data, char *msg) |
| 255 | { |
| 256 | printk("%s\n", msg); |
| 257 | } |
| 258 | |
| 259 | static int print_trace_stack(void *data, char *name) |
| 260 | { |
| 261 | printk(" <%s> ", name); |
| 262 | return 0; |
| 263 | } |
| 264 | |
| 265 | static void print_trace_address(void *data, unsigned long addr, int reliable) |
| 266 | { |
| 267 | touch_nmi_watchdog(); |
| 268 | printk_address(addr, reliable); |
| 269 | } |
| 270 | |
| 271 | static const struct stacktrace_ops print_trace_ops = { |
| 272 | .warning = print_trace_warning, |
| 273 | .warning_symbol = print_trace_warning_symbol, |
| 274 | .stack = print_trace_stack, |
| 275 | .address = print_trace_address, |
| 276 | }; |
| 277 | |
| 278 | static void |
| 279 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
| 280 | unsigned long *stack, unsigned long bp, char *log_lvl) |
| 281 | { |
| 282 | printk("Call Trace:\n"); |
| 283 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
| 284 | } |
| 285 | |
| 286 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
| 287 | unsigned long *stack, unsigned long bp) |
| 288 | { |
| 289 | show_trace_log_lvl(task, regs, stack, bp, ""); |
| 290 | } |
| 291 | |
| 292 | static void |
| 293 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
| 294 | unsigned long *sp, unsigned long bp, char *log_lvl) |
| 295 | { |
| 296 | unsigned long *stack; |
| 297 | int i; |
| 298 | const int cpu = smp_processor_id(); |
| 299 | unsigned long *irqstack_end = |
| 300 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
| 301 | unsigned long *irqstack = |
| 302 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); |
| 303 | |
| 304 | /* |
| 305 | * debugging aid: "show_stack(NULL, NULL);" prints the |
| 306 | * back trace for this cpu. |
| 307 | */ |
| 308 | |
| 309 | if (sp == NULL) { |
| 310 | if (task) |
| 311 | sp = (unsigned long *)task->thread.sp; |
| 312 | else |
| 313 | sp = (unsigned long *)&sp; |
| 314 | } |
| 315 | |
| 316 | stack = sp; |
| 317 | for (i = 0; i < kstack_depth_to_print; i++) { |
| 318 | if (stack >= irqstack && stack <= irqstack_end) { |
| 319 | if (stack == irqstack_end) { |
| 320 | stack = (unsigned long *) (irqstack_end[-1]); |
| 321 | printk(" <EOI> "); |
| 322 | } |
| 323 | } else { |
| 324 | if (((long) stack & (THREAD_SIZE-1)) == 0) |
| 325 | break; |
| 326 | } |
| 327 | if (i && ((i % 4) == 0)) |
| 328 | printk("\n"); |
| 329 | printk(" %016lx", *stack++); |
| 330 | touch_nmi_watchdog(); |
| 331 | } |
| 332 | printk("\n"); |
| 333 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
| 334 | } |
| 335 | |
| 336 | void show_stack(struct task_struct *task, unsigned long *sp) |
| 337 | { |
| 338 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
| 339 | } |
| 340 | |
| 341 | /* |
| 342 | * The architecture-independent dump_stack generator |
| 343 | */ |
| 344 | void dump_stack(void) |
| 345 | { |
| 346 | unsigned long bp = 0; |
| 347 | unsigned long stack; |
| 348 | |
| 349 | #ifdef CONFIG_FRAME_POINTER |
| 350 | if (!bp) |
| 351 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
| 352 | #endif |
| 353 | |
| 354 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
| 355 | current->pid, current->comm, print_tainted(), |
| 356 | init_utsname()->release, |
| 357 | (int)strcspn(init_utsname()->version, " "), |
| 358 | init_utsname()->version); |
| 359 | show_trace(NULL, NULL, &stack, bp); |
| 360 | } |
| 361 | EXPORT_SYMBOL(dump_stack); |
| 362 | |
| 363 | void show_registers(struct pt_regs *regs) |
| 364 | { |
| 365 | int i; |
| 366 | unsigned long sp; |
| 367 | const int cpu = smp_processor_id(); |
| 368 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
| 369 | |
| 370 | sp = regs->sp; |
| 371 | printk("CPU %d ", cpu); |
| 372 | __show_regs(regs, 1); |
| 373 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
| 374 | cur->comm, cur->pid, task_thread_info(cur), cur); |
| 375 | |
| 376 | /* |
| 377 | * When in-kernel, we also print out the stack and code at the |
| 378 | * time of the fault.. |
| 379 | */ |
| 380 | if (!user_mode(regs)) { |
| 381 | unsigned int code_prologue = code_bytes * 43 / 64; |
| 382 | unsigned int code_len = code_bytes; |
| 383 | unsigned char c; |
| 384 | u8 *ip; |
| 385 | |
| 386 | printk("Stack: "); |
| 387 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
| 388 | regs->bp, ""); |
| 389 | |
| 390 | printk(KERN_EMERG "Code: "); |
| 391 | |
| 392 | ip = (u8 *)regs->ip - code_prologue; |
| 393 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
| 394 | /* try starting at RIP */ |
| 395 | ip = (u8 *)regs->ip; |
| 396 | code_len = code_len - code_prologue + 1; |
| 397 | } |
| 398 | for (i = 0; i < code_len; i++, ip++) { |
| 399 | if (ip < (u8 *)PAGE_OFFSET || |
| 400 | probe_kernel_address(ip, c)) { |
| 401 | printk(" Bad RIP value."); |
| 402 | break; |
| 403 | } |
| 404 | if (ip == (u8 *)regs->ip) |
| 405 | printk("<%02x> ", c); |
| 406 | else |
| 407 | printk("%02x ", c); |
| 408 | } |
| 409 | } |
| 410 | printk("\n"); |
| 411 | } |
| 412 | |
| 413 | int is_valid_bugaddr(unsigned long ip) |
| 414 | { |
| 415 | unsigned short ud2; |
| 416 | |
| 417 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) |
| 418 | return 0; |
| 419 | |
| 420 | return ud2 == 0x0b0f; |
| 421 | } |
| 422 | |
| 423 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; |
| 424 | static int die_owner = -1; |
| 425 | static unsigned int die_nest_count; |
| 426 | |
| 427 | unsigned __kprobes long oops_begin(void) |
| 428 | { |
| 429 | int cpu; |
| 430 | unsigned long flags; |
| 431 | |
| 432 | oops_enter(); |
| 433 | |
| 434 | /* racy, but better than risking deadlock. */ |
| 435 | raw_local_irq_save(flags); |
| 436 | cpu = smp_processor_id(); |
| 437 | if (!__raw_spin_trylock(&die_lock)) { |
| 438 | if (cpu == die_owner) |
| 439 | /* nested oops. should stop eventually */; |
| 440 | else |
| 441 | __raw_spin_lock(&die_lock); |
| 442 | } |
| 443 | die_nest_count++; |
| 444 | die_owner = cpu; |
| 445 | console_verbose(); |
| 446 | bust_spinlocks(1); |
| 447 | return flags; |
| 448 | } |
| 449 | |
| 450 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
| 451 | { |
| 452 | die_owner = -1; |
| 453 | bust_spinlocks(0); |
| 454 | die_nest_count--; |
| 455 | if (!die_nest_count) |
| 456 | /* Nest count reaches zero, release the lock. */ |
| 457 | __raw_spin_unlock(&die_lock); |
| 458 | raw_local_irq_restore(flags); |
| 459 | if (!regs) { |
| 460 | oops_exit(); |
| 461 | return; |
| 462 | } |
| 463 | if (in_interrupt()) |
| 464 | panic("Fatal exception in interrupt"); |
| 465 | if (panic_on_oops) |
| 466 | panic("Fatal exception"); |
| 467 | oops_exit(); |
| 468 | do_exit(signr); |
| 469 | } |
| 470 | |
| 471 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) |
| 472 | { |
| 473 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); |
| 474 | #ifdef CONFIG_PREEMPT |
| 475 | printk("PREEMPT "); |
| 476 | #endif |
| 477 | #ifdef CONFIG_SMP |
| 478 | printk("SMP "); |
| 479 | #endif |
| 480 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 481 | printk("DEBUG_PAGEALLOC"); |
| 482 | #endif |
| 483 | printk("\n"); |
| 484 | if (notify_die(DIE_OOPS, str, regs, err, |
| 485 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) |
| 486 | return 1; |
| 487 | |
| 488 | show_registers(regs); |
| 489 | add_taint(TAINT_DIE); |
| 490 | /* Executive summary in case the oops scrolled away */ |
| 491 | printk(KERN_ALERT "RIP "); |
| 492 | printk_address(regs->ip, 1); |
| 493 | printk(" RSP <%016lx>\n", regs->sp); |
| 494 | if (kexec_should_crash(current)) |
| 495 | crash_kexec(regs); |
| 496 | return 0; |
| 497 | } |
| 498 | |
| 499 | void die(const char *str, struct pt_regs *regs, long err) |
| 500 | { |
| 501 | unsigned long flags = oops_begin(); |
| 502 | |
| 503 | if (!user_mode(regs)) |
| 504 | report_bug(regs->ip, regs); |
| 505 | |
| 506 | if (__die(str, regs, err)) |
| 507 | regs = NULL; |
| 508 | oops_end(flags, regs, SIGSEGV); |
| 509 | } |
| 510 | |
| 511 | notrace __kprobes void |
| 512 | die_nmi(char *str, struct pt_regs *regs, int do_panic) |
| 513 | { |
| 514 | unsigned long flags; |
| 515 | |
| 516 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) |
| 517 | return; |
| 518 | |
| 519 | flags = oops_begin(); |
| 520 | /* |
| 521 | * We are in trouble anyway, lets at least try |
| 522 | * to get a message out. |
| 523 | */ |
| 524 | printk(KERN_EMERG "%s", str); |
| 525 | printk(" on CPU%d, ip %08lx, registers:\n", |
| 526 | smp_processor_id(), regs->ip); |
| 527 | show_registers(regs); |
| 528 | if (kexec_should_crash(current)) |
| 529 | crash_kexec(regs); |
| 530 | if (do_panic || panic_on_oops) |
| 531 | panic("Non maskable interrupt"); |
| 532 | oops_end(flags, NULL, SIGBUS); |
| 533 | nmi_exit(); |
| 534 | local_irq_enable(); |
| 535 | do_exit(SIGBUS); |
| 536 | } |
| 537 | |
| 538 | static int __init oops_setup(char *s) |
| 539 | { |
| 540 | if (!s) |
| 541 | return -EINVAL; |
| 542 | if (!strcmp(s, "panic")) |
| 543 | panic_on_oops = 1; |
| 544 | return 0; |
| 545 | } |
| 546 | early_param("oops", oops_setup); |
| 547 | |
| 548 | static int __init kstack_setup(char *s) |
| 549 | { |
| 550 | if (!s) |
| 551 | return -EINVAL; |
| 552 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); |
| 553 | return 0; |
| 554 | } |
| 555 | early_param("kstack", kstack_setup); |
| 556 | |
| 557 | static int __init code_bytes_setup(char *s) |
| 558 | { |
| 559 | code_bytes = simple_strtoul(s, NULL, 0); |
| 560 | if (code_bytes > 8192) |
| 561 | code_bytes = 8192; |
| 562 | |
| 563 | return 1; |
| 564 | } |
| 565 | __setup("code_bytes=", code_bytes_setup); |