blob: 31e077bb0caad729df75f65b4bcb7a1e3cb9b9da [file] [log] [blame]
Eric W. Biederman5033cba2005-06-25 14:57:56 -07001/*
2 * Architecture specific (i386) functions for kexec based crash dumps.
3 *
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/smp.h>
14#include <linux/irq.h>
15#include <linux/reboot.h>
16#include <linux/kexec.h>
17#include <linux/irq.h>
18#include <linux/delay.h>
19#include <linux/elf.h>
20#include <linux/elfcore.h>
21
22#include <asm/processor.h>
23#include <asm/hardirq.h>
24#include <asm/nmi.h>
25#include <asm/hw_irq.h>
Eric W. Biederman63d30292005-06-25 14:58:00 -070026#include <asm/apic.h>
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -070027#include <mach_ipi.h>
Eric W. Biederman5033cba2005-06-25 14:57:56 -070028
Eric W. Biederman5033cba2005-06-25 14:57:56 -070029
30note_buf_t crash_notes[NR_CPUS];
31
Eric W. Biederman2c818b42005-06-25 14:57:59 -070032static u32 *append_elf_note(u32 *buf,
33 char *name, unsigned type, void *data, size_t data_len)
34{
35 struct elf_note note;
36 note.n_namesz = strlen(name) + 1;
37 note.n_descsz = data_len;
38 note.n_type = type;
39 memcpy(buf, &note, sizeof(note));
40 buf += (sizeof(note) +3)/4;
41 memcpy(buf, name, note.n_namesz);
42 buf += (note.n_namesz + 3)/4;
43 memcpy(buf, data, note.n_descsz);
44 buf += (note.n_descsz + 3)/4;
45 return buf;
46}
47
48static void final_note(u32 *buf)
49{
50 struct elf_note note;
51 note.n_namesz = 0;
52 note.n_descsz = 0;
53 note.n_type = 0;
54 memcpy(buf, &note, sizeof(note));
55}
56
57
58static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
59{
60 struct elf_prstatus prstatus;
61 u32 *buf;
62 if ((cpu < 0) || (cpu >= NR_CPUS)) {
63 return;
64 }
65 /* Using ELF notes here is opportunistic.
66 * I need a well defined structure format
67 * for the data I pass, and I need tags
68 * on the data to indicate what information I have
69 * squirrelled away. ELF notes happen to provide
70 * all of that that no need to invent something new.
71 */
72 buf = &crash_notes[cpu][0];
73 memset(&prstatus, 0, sizeof(prstatus));
74 prstatus.pr_pid = current->pid;
75 elf_core_copy_regs(&prstatus.pr_reg, regs);
76 buf = append_elf_note(buf, "CORE", NT_PRSTATUS,
77 &prstatus, sizeof(prstatus));
78
79 final_note(buf);
80}
81
82static void crash_get_current_regs(struct pt_regs *regs)
83{
84 __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
85 __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
86 __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
87 __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
88 __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
89 __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
90 __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
91 __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
92 __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
93 __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
94 __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
95 __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
96 __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
97
98 regs->eip = (unsigned long)current_text_addr();
99}
100
101static void crash_save_self(void)
102{
103 struct pt_regs regs;
104 int cpu;
105 cpu = smp_processor_id();
106 crash_get_current_regs(&regs);
107 crash_save_this_cpu(&regs, cpu);
108}
109
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -0700110#ifdef CONFIG_SMP
111static atomic_t waiting_for_crash_ipi;
112
113static int crash_nmi_callback(struct pt_regs *regs, int cpu)
114{
Vivek Goyal4d554762005-06-25 14:58:13 -0700115 struct pt_regs fixed_regs;
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -0700116 local_irq_disable();
Vivek Goyal4d554762005-06-25 14:58:13 -0700117
118 /* CPU does not save ss and esp on stack if execution is already
119 * running in kernel mode at the time of NMI occurrence. This code
120 * fixes it.
121 */
122 if (!user_mode(regs)) {
123 memcpy(&fixed_regs, regs, sizeof(*regs));
124 fixed_regs.esp = (unsigned long)&(regs->esp);
125 __asm__ __volatile__("xorl %eax, %eax;");
126 __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(fixed_regs.xss));
127 regs = &fixed_regs;
128 }
Eric W. Biederman2c818b42005-06-25 14:57:59 -0700129 crash_save_this_cpu(regs, cpu);
Eric W. Biederman63d30292005-06-25 14:58:00 -0700130 disable_local_APIC();
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -0700131 atomic_dec(&waiting_for_crash_ipi);
132 /* Assume hlt works */
133 __asm__("hlt");
134 for(;;);
135 return 1;
136}
137
138/*
139 * By using the NMI code instead of a vector we just sneak thru the
140 * word generator coming out with just what we want. AND it does
141 * not matter if clustered_apic_mode is set or not.
142 */
143static void smp_send_nmi_allbutself(void)
144{
145 send_IPI_allbutself(APIC_DM_NMI);
146}
147
148static void nmi_shootdown_cpus(void)
149{
150 unsigned long msecs;
151 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
152
153 /* Would it be better to replace the trap vector here? */
154 set_nmi_callback(crash_nmi_callback);
155 /* Ensure the new callback function is set before sending
156 * out the NMI
157 */
158 wmb();
159
160 smp_send_nmi_allbutself();
161
162 msecs = 1000; /* Wait at most a second for the other cpus to stop */
163 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
164 mdelay(1);
165 msecs--;
166 }
167
168 /* Leave the nmi callback set */
Eric W. Biederman63d30292005-06-25 14:58:00 -0700169 disable_local_APIC();
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -0700170}
171#else
172static void nmi_shootdown_cpus(void)
173{
174 /* There are no cpus to shootdown */
175}
176#endif
177
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700178void machine_crash_shutdown(void)
179{
180 /* This function is only called after the system
181 * has paniced or is otherwise in a critical state.
182 * The minimum amount of code to allow a kexec'd kernel
183 * to run successfully needs to happen here.
184 *
185 * In practice this means shooting down the other cpus in
186 * an SMP system.
187 */
Eric W. Biedermanc4ac4262005-06-25 14:57:58 -0700188 /* The kernel is broken so disable interrupts */
189 local_irq_disable();
190 nmi_shootdown_cpus();
Eric W. Biederman63d30292005-06-25 14:58:00 -0700191 lapic_shutdown();
192#if defined(CONFIG_X86_IO_APIC)
193 disable_IO_APIC();
194#endif
Eric W. Biederman2c818b42005-06-25 14:57:59 -0700195 crash_save_self();
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700196}