blob: e59cda134166f6f03548b0b115ad1546071c0aea [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Pavel Machek and
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
13 */
14
Andrew Mortonbb81a092006-12-07 02:14:01 +010015#include <linux/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/interrupt.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/module.h>
20#include <linux/sysdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/sysctl.h>
Andi Kleeneddb6fb2006-02-03 21:50:41 +010022#include <linux/kprobes.h>
Andrew Mortonbb81a092006-12-07 02:14:01 +010023#include <linux/cpumask.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
25#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <asm/proto.h>
28#include <asm/kdebug.h>
Andi Kleen553f2652006-04-07 19:49:57 +020029#include <asm/mce.h>
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020030#include <asm/intel_arch_perfmon.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Andi Kleen29cbc782006-09-30 01:47:55 +020032int unknown_nmi_panic;
33int nmi_watchdog_enabled;
34int panic_on_unrecovered_nmi;
35
Don Zickus828f0af2006-09-26 10:52:26 +020036/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
37 * evtsel_nmi_owner tracks the ownership of the event selection
38 * - different performance counters/ event selection may be reserved for
39 * different subsystems this reservation system just tries to coordinate
40 * things a little
41 */
42static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
43static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
44
Andrew Mortonbb81a092006-12-07 02:14:01 +010045static cpumask_t backtrace_mask = CPU_MASK_NONE;
46
Don Zickus828f0af2006-09-26 10:52:26 +020047/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
48 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
49 */
50#define NMI_MAX_COUNTER_BITS 66
51
Linus Torvalds1da177e2005-04-16 15:20:36 -070052/* nmi_active:
Don Zickusf2802e72006-09-26 10:52:26 +020053 * >0: the lapic NMI watchdog is active, but can be disabled
54 * <0: the lapic NMI watchdog has not been set up, and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -070055 * be enabled
Don Zickusf2802e72006-09-26 10:52:26 +020056 * 0: the lapic NMI watchdog is disabled, but can be enabled
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 */
Don Zickusf2802e72006-09-26 10:52:26 +020058atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
Linus Torvalds1da177e2005-04-16 15:20:36 -070059int panic_on_timeout;
60
61unsigned int nmi_watchdog = NMI_DEFAULT;
62static unsigned int nmi_hz = HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
Don Zickusf2802e72006-09-26 10:52:26 +020064struct nmi_watchdog_ctlblk {
65 int enabled;
66 u64 check_bit;
67 unsigned int cccr_msr;
68 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
69 unsigned int evntsel_msr; /* the MSR to select the events to handle */
70};
71static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
Don Zickusf2802e72006-09-26 10:52:26 +020073/* local prototypes */
Don Zickusf2802e72006-09-26 10:52:26 +020074static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
Andi Kleen75152112005-05-16 21:53:34 -070075
Don Zickus828f0af2006-09-26 10:52:26 +020076/* converts an msr to an appropriate reservation bit */
77static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
78{
79 /* returns the bit offset of the performance counter register */
80 switch (boot_cpu_data.x86_vendor) {
81 case X86_VENDOR_AMD:
82 return (msr - MSR_K7_PERFCTR0);
83 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020084 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
85 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
86 else
87 return (msr - MSR_P4_BPU_PERFCTR0);
Don Zickus828f0af2006-09-26 10:52:26 +020088 }
89 return 0;
90}
91
92/* converts an msr to an appropriate reservation bit */
93static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
94{
95 /* returns the bit offset of the event selection register */
96 switch (boot_cpu_data.x86_vendor) {
97 case X86_VENDOR_AMD:
98 return (msr - MSR_K7_EVNTSEL0);
99 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200100 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
101 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
102 else
103 return (msr - MSR_P4_BSU_ESCR0);
Don Zickus828f0af2006-09-26 10:52:26 +0200104 }
105 return 0;
106}
107
108/* checks for a bit availability (hack for oprofile) */
109int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
110{
111 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
112
113 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
114}
115
116/* checks the an msr for availability */
117int avail_to_resrv_perfctr_nmi(unsigned int msr)
118{
119 unsigned int counter;
120
121 counter = nmi_perfctr_msr_to_bit(msr);
122 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
123
124 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
125}
126
127int reserve_perfctr_nmi(unsigned int msr)
128{
129 unsigned int counter;
130
131 counter = nmi_perfctr_msr_to_bit(msr);
132 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
133
134 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
135 return 1;
136 return 0;
137}
138
139void release_perfctr_nmi(unsigned int msr)
140{
141 unsigned int counter;
142
143 counter = nmi_perfctr_msr_to_bit(msr);
144 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
145
146 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
147}
148
149int reserve_evntsel_nmi(unsigned int msr)
150{
151 unsigned int counter;
152
153 counter = nmi_evntsel_msr_to_bit(msr);
154 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
155
156 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
157 return 1;
158 return 0;
159}
160
161void release_evntsel_nmi(unsigned int msr)
162{
163 unsigned int counter;
164
165 counter = nmi_evntsel_msr_to_bit(msr);
166 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
167
168 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
169}
170
Ashok Raje6982c62005-06-25 14:54:58 -0700171static __cpuinit inline int nmi_known_cpu(void)
Andi Kleen75152112005-05-16 21:53:34 -0700172{
173 switch (boot_cpu_data.x86_vendor) {
174 case X86_VENDOR_AMD:
175 return boot_cpu_data.x86 == 15;
176 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200177 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
178 return 1;
179 else
180 return (boot_cpu_data.x86 == 15);
Andi Kleen75152112005-05-16 21:53:34 -0700181 }
182 return 0;
183}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
185/* Run after command line and cpu_init init, but before all other checks */
Don Zickuse33e89a2006-09-26 10:52:27 +0200186void nmi_watchdog_default(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 if (nmi_watchdog != NMI_DEFAULT)
189 return;
Andi Kleen75152112005-05-16 21:53:34 -0700190 if (nmi_known_cpu())
191 nmi_watchdog = NMI_LOCAL_APIC;
192 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 nmi_watchdog = NMI_IO_APIC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194}
195
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100196static int endflag __initdata = 0;
197
Andi Kleen75152112005-05-16 21:53:34 -0700198#ifdef CONFIG_SMP
199/* The performance counters used by NMI_LOCAL_APIC don't trigger when
200 * the CPU is idle. To make sure the NMI watchdog really ticks on all
201 * CPUs during the test make them busy.
202 */
203static __init void nmi_cpu_busy(void *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204{
Ingo Molnar366c7f52006-07-03 00:25:25 -0700205 local_irq_enable_in_hardirq();
Andi Kleen75152112005-05-16 21:53:34 -0700206 /* Intentionally don't use cpu_relax here. This is
207 to make sure that the performance counter really ticks,
208 even if there is a simulator or similar that catches the
209 pause instruction. On a real HT machine this is fine because
210 all other CPUs are busy with "useless" delay loops and don't
211 care if they get somewhat less cycles. */
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100212 while (endflag == 0)
213 mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
Andi Kleen75152112005-05-16 21:53:34 -0700215#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
Venkatesh Pallipadi16761932007-02-13 13:26:22 +0100217static unsigned int adjust_for_32bit_ctr(unsigned int hz)
218{
219 unsigned int retval = hz;
220
221 /*
222 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
223 * are writable, with higher bits sign extending from bit 31.
224 * So, we can only program the counter with 31 bit values and
225 * 32nd bit should be 1, for 33.. to be 1.
226 * Find the appropriate nmi_hz
227 */
228 if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
229 retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
230 }
231 return retval;
232}
233
Andi Kleen75152112005-05-16 21:53:34 -0700234int __init check_nmi_watchdog (void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
Andi Kleenac6b9312005-05-16 21:53:19 -0700236 int *counts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 int cpu;
238
Don Zickusf2802e72006-09-26 10:52:26 +0200239 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
240 return 0;
241
242 if (!atomic_read(&nmi_active))
243 return 0;
244
Andi Kleen75152112005-05-16 21:53:34 -0700245 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
246 if (!counts)
247 return -1;
Jack F Vogel67701ae2005-05-01 08:58:48 -0700248
Andi Kleen75152112005-05-16 21:53:34 -0700249 printk(KERN_INFO "testing NMI watchdog ... ");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Andi Kleen7554c3f2006-01-11 22:45:45 +0100251#ifdef CONFIG_SMP
Andi Kleen75152112005-05-16 21:53:34 -0700252 if (nmi_watchdog == NMI_LOCAL_APIC)
253 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
Andi Kleen7554c3f2006-01-11 22:45:45 +0100254#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
256 for (cpu = 0; cpu < NR_CPUS; cpu++)
Ravikiran G Thirumalaidf79efd2006-01-11 22:45:39 +0100257 counts[cpu] = cpu_pda(cpu)->__nmi_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 local_irq_enable();
259 mdelay((10*1000)/nmi_hz); // wait 10 ticks
260
Andrew Morton394e3902006-03-23 03:01:05 -0800261 for_each_online_cpu(cpu) {
Don Zickusf2802e72006-09-26 10:52:26 +0200262 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
263 continue;
Ravikiran G Thirumalaidf79efd2006-01-11 22:45:39 +0100264 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
Andi Kleen75152112005-05-16 21:53:34 -0700265 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 cpu,
Andi Kleen75152112005-05-16 21:53:34 -0700267 counts[cpu],
Ravikiran G Thirumalaidf79efd2006-01-11 22:45:39 +0100268 cpu_pda(cpu)->__nmi_count);
Don Zickusf2802e72006-09-26 10:52:26 +0200269 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
270 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
272 }
Don Zickusf2802e72006-09-26 10:52:26 +0200273 if (!atomic_read(&nmi_active)) {
274 kfree(counts);
275 atomic_set(&nmi_active, -1);
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100276 endflag = 1;
Don Zickusf2802e72006-09-26 10:52:26 +0200277 return -1;
278 }
Andi Kleen75152112005-05-16 21:53:34 -0700279 endflag = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 printk("OK.\n");
281
282 /* now that we know it works we can reduce NMI frequency to
283 something more reasonable; makes a difference in some configs */
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200284 if (nmi_watchdog == NMI_LOCAL_APIC) {
285 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
286
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 nmi_hz = 1;
Venkatesh Pallipadi16761932007-02-13 13:26:22 +0100288 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
289 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Andi Kleenac6b9312005-05-16 21:53:19 -0700292 kfree(counts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 return 0;
294}
295
296int __init setup_nmi_watchdog(char *str)
297{
298 int nmi;
299
300 if (!strncmp(str,"panic",5)) {
301 panic_on_timeout = 1;
302 str = strchr(str, ',');
303 if (!str)
304 return 1;
305 ++str;
306 }
307
308 get_option(&str, &nmi);
309
Don Zickusf2802e72006-09-26 10:52:26 +0200310 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 return 0;
Don Zickusf2802e72006-09-26 10:52:26 +0200312
Andi Kleen75152112005-05-16 21:53:34 -0700313 nmi_watchdog = nmi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 return 1;
315}
316
317__setup("nmi_watchdog=", setup_nmi_watchdog);
318
319static void disable_lapic_nmi_watchdog(void)
320{
Don Zickusf2802e72006-09-26 10:52:26 +0200321 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
322
323 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 return;
Don Zickusf2802e72006-09-26 10:52:26 +0200325
326 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
327
328 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329}
330
331static void enable_lapic_nmi_watchdog(void)
332{
Don Zickusf2802e72006-09-26 10:52:26 +0200333 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
334
335 /* are we already enabled */
336 if (atomic_read(&nmi_active) != 0)
337 return;
338
339 /* are we lapic aware */
340 if (nmi_known_cpu() <= 0)
341 return;
342
343 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
344 touch_nmi_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345}
346
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347void disable_timer_nmi_watchdog(void)
348{
Don Zickusf2802e72006-09-26 10:52:26 +0200349 BUG_ON(nmi_watchdog != NMI_IO_APIC);
350
351 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 return;
353
354 disable_irq(0);
Don Zickusf2802e72006-09-26 10:52:26 +0200355 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
356
357 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358}
359
360void enable_timer_nmi_watchdog(void)
361{
Don Zickusf2802e72006-09-26 10:52:26 +0200362 BUG_ON(nmi_watchdog != NMI_IO_APIC);
363
364 if (atomic_read(&nmi_active) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 touch_nmi_watchdog();
Don Zickusf2802e72006-09-26 10:52:26 +0200366 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 enable_irq(0);
368 }
369}
370
371#ifdef CONFIG_PM
372
373static int nmi_pm_active; /* nmi_active before suspend */
374
Pavel Machek829ca9a2005-09-03 15:56:56 -0700375static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376{
Shaohua Li4038f902006-09-26 10:52:27 +0200377 /* only CPU0 goes here, other CPUs should be offline */
Don Zickusf2802e72006-09-26 10:52:26 +0200378 nmi_pm_active = atomic_read(&nmi_active);
Shaohua Li4038f902006-09-26 10:52:27 +0200379 stop_apic_nmi_watchdog(NULL);
380 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 return 0;
382}
383
384static int lapic_nmi_resume(struct sys_device *dev)
385{
Shaohua Li4038f902006-09-26 10:52:27 +0200386 /* only CPU0 goes here, other CPUs should be offline */
387 if (nmi_pm_active > 0) {
388 setup_apic_nmi_watchdog(NULL);
389 touch_nmi_watchdog();
390 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 return 0;
392}
393
394static struct sysdev_class nmi_sysclass = {
395 set_kset_name("lapic_nmi"),
396 .resume = lapic_nmi_resume,
397 .suspend = lapic_nmi_suspend,
398};
399
400static struct sys_device device_lapic_nmi = {
401 .id = 0,
402 .cls = &nmi_sysclass,
403};
404
405static int __init init_lapic_nmi_sysfs(void)
406{
407 int error;
408
Don Zickusf2802e72006-09-26 10:52:26 +0200409 /* should really be a BUG_ON but b/c this is an
410 * init call, it just doesn't work. -dcz
411 */
412 if (nmi_watchdog != NMI_LOCAL_APIC)
413 return 0;
414
415 if ( atomic_read(&nmi_active) < 0 )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 return 0;
417
418 error = sysdev_class_register(&nmi_sysclass);
419 if (!error)
420 error = sysdev_register(&device_lapic_nmi);
421 return error;
422}
423/* must come after the local APIC's device_initcall() */
424late_initcall(init_lapic_nmi_sysfs);
425
426#endif /* CONFIG_PM */
427
Don Zickusf2802e72006-09-26 10:52:26 +0200428/*
429 * Activate the NMI watchdog via the local APIC.
430 * Original code written by Keith Owens.
431 */
432
433/* Note that these events don't tick when the CPU idles. This means
434 the frequency varies with CPU load. */
435
436#define K7_EVNTSEL_ENABLE (1 << 22)
437#define K7_EVNTSEL_INT (1 << 20)
438#define K7_EVNTSEL_OS (1 << 17)
439#define K7_EVNTSEL_USR (1 << 16)
440#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
441#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
442
Don Zickus828f0af2006-09-26 10:52:26 +0200443static int setup_k7_watchdog(void)
Andi Kleen75152112005-05-16 21:53:34 -0700444{
Don Zickusf2802e72006-09-26 10:52:26 +0200445 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 unsigned int evntsel;
Don Zickusf2802e72006-09-26 10:52:26 +0200447 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
Don Zickusf2802e72006-09-26 10:52:26 +0200449 perfctr_msr = MSR_K7_PERFCTR0;
450 evntsel_msr = MSR_K7_EVNTSEL0;
451 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200452 goto fail;
453
Don Zickusf2802e72006-09-26 10:52:26 +0200454 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200455 goto fail1;
456
457 /* Simulator may not support it */
Don Zickusf2802e72006-09-26 10:52:26 +0200458 if (checking_wrmsrl(evntsel_msr, 0UL))
Don Zickus828f0af2006-09-26 10:52:26 +0200459 goto fail2;
Don Zickusf2802e72006-09-26 10:52:26 +0200460 wrmsrl(perfctr_msr, 0UL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
462 evntsel = K7_EVNTSEL_INT
463 | K7_EVNTSEL_OS
464 | K7_EVNTSEL_USR
465 | K7_NMI_EVENT;
466
Don Zickusf2802e72006-09-26 10:52:26 +0200467 /* setup the timer */
468 wrmsr(evntsel_msr, evntsel, 0);
469 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 apic_write(APIC_LVTPC, APIC_DM_NMI);
471 evntsel |= K7_EVNTSEL_ENABLE;
Don Zickusf2802e72006-09-26 10:52:26 +0200472 wrmsr(evntsel_msr, evntsel, 0);
473
474 wd->perfctr_msr = perfctr_msr;
475 wd->evntsel_msr = evntsel_msr;
476 wd->cccr_msr = 0; //unused
477 wd->check_bit = 1ULL<<63;
Don Zickus828f0af2006-09-26 10:52:26 +0200478 return 1;
479fail2:
Don Zickusf2802e72006-09-26 10:52:26 +0200480 release_evntsel_nmi(evntsel_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200481fail1:
Don Zickusf2802e72006-09-26 10:52:26 +0200482 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200483fail:
484 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485}
486
Don Zickusf2802e72006-09-26 10:52:26 +0200487static void stop_k7_watchdog(void)
488{
489 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
490
491 wrmsr(wd->evntsel_msr, 0, 0);
492
493 release_evntsel_nmi(wd->evntsel_msr);
494 release_perfctr_nmi(wd->perfctr_msr);
495}
496
497/* Note that these events don't tick when the CPU idles. This means
498 the frequency varies with CPU load. */
499
500#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
501#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
502#define P4_ESCR_OS (1<<3)
503#define P4_ESCR_USR (1<<2)
504#define P4_CCCR_OVF_PMI0 (1<<26)
505#define P4_CCCR_OVF_PMI1 (1<<27)
506#define P4_CCCR_THRESHOLD(N) ((N)<<20)
507#define P4_CCCR_COMPLEMENT (1<<19)
508#define P4_CCCR_COMPARE (1<<18)
509#define P4_CCCR_REQUIRED (3<<16)
510#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
511#define P4_CCCR_ENABLE (1<<12)
512#define P4_CCCR_OVF (1<<31)
513/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
514 CRU_ESCR0 (with any non-null event selector) through a complemented
515 max threshold. [IA32-Vol3, Section 14.9.9] */
Andi Kleen75152112005-05-16 21:53:34 -0700516
517static int setup_p4_watchdog(void)
518{
Don Zickusf2802e72006-09-26 10:52:26 +0200519 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
520 unsigned int evntsel, cccr_val;
Andi Kleen75152112005-05-16 21:53:34 -0700521 unsigned int misc_enable, dummy;
Don Zickusf2802e72006-09-26 10:52:26 +0200522 unsigned int ht_num;
523 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Andi Kleen75152112005-05-16 21:53:34 -0700524
Don Zickusf2802e72006-09-26 10:52:26 +0200525 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
Andi Kleen75152112005-05-16 21:53:34 -0700526 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
527 return 0;
528
Andi Kleen75152112005-05-16 21:53:34 -0700529#ifdef CONFIG_SMP
Don Zickusf2802e72006-09-26 10:52:26 +0200530 /* detect which hyperthread we are on */
531 if (smp_num_siblings == 2) {
532 unsigned int ebx, apicid;
Andi Kleen75152112005-05-16 21:53:34 -0700533
Don Zickusf2802e72006-09-26 10:52:26 +0200534 ebx = cpuid_ebx(1);
535 apicid = (ebx >> 24) & 0xff;
536 ht_num = apicid & 1;
537 } else
538#endif
539 ht_num = 0;
540
541 /* performance counters are shared resources
542 * assign each hyperthread its own set
543 * (re-use the ESCR0 register, seems safe
544 * and keeps the cccr_val the same)
545 */
546 if (!ht_num) {
547 /* logical cpu 0 */
548 perfctr_msr = MSR_P4_IQ_PERFCTR0;
549 evntsel_msr = MSR_P4_CRU_ESCR0;
550 cccr_msr = MSR_P4_IQ_CCCR0;
551 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
552 } else {
553 /* logical cpu 1 */
554 perfctr_msr = MSR_P4_IQ_PERFCTR1;
555 evntsel_msr = MSR_P4_CRU_ESCR0;
556 cccr_msr = MSR_P4_IQ_CCCR1;
557 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
558 }
559
560 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200561 goto fail;
562
Don Zickusf2802e72006-09-26 10:52:26 +0200563 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200564 goto fail1;
Andi Kleen75152112005-05-16 21:53:34 -0700565
Don Zickusf2802e72006-09-26 10:52:26 +0200566 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
567 | P4_ESCR_OS
568 | P4_ESCR_USR;
569
570 cccr_val |= P4_CCCR_THRESHOLD(15)
571 | P4_CCCR_COMPLEMENT
572 | P4_CCCR_COMPARE
573 | P4_CCCR_REQUIRED;
574
575 wrmsr(evntsel_msr, evntsel, 0);
576 wrmsr(cccr_msr, cccr_val, 0);
577 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
Andi Kleen75152112005-05-16 21:53:34 -0700578 apic_write(APIC_LVTPC, APIC_DM_NMI);
Don Zickusf2802e72006-09-26 10:52:26 +0200579 cccr_val |= P4_CCCR_ENABLE;
580 wrmsr(cccr_msr, cccr_val, 0);
581
582 wd->perfctr_msr = perfctr_msr;
583 wd->evntsel_msr = evntsel_msr;
584 wd->cccr_msr = cccr_msr;
585 wd->check_bit = 1ULL<<39;
Andi Kleen75152112005-05-16 21:53:34 -0700586 return 1;
Don Zickus828f0af2006-09-26 10:52:26 +0200587fail1:
Don Zickusf2802e72006-09-26 10:52:26 +0200588 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200589fail:
590 return 0;
Andi Kleen75152112005-05-16 21:53:34 -0700591}
592
Don Zickusf2802e72006-09-26 10:52:26 +0200593static void stop_p4_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
Don Zickusf2802e72006-09-26 10:52:26 +0200595 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Andi Kleen75152112005-05-16 21:53:34 -0700596
Don Zickusf2802e72006-09-26 10:52:26 +0200597 wrmsr(wd->cccr_msr, 0, 0);
598 wrmsr(wd->evntsel_msr, 0, 0);
599
600 release_evntsel_nmi(wd->evntsel_msr);
601 release_perfctr_nmi(wd->perfctr_msr);
602}
603
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200604#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
605#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
606
607static int setup_intel_arch_watchdog(void)
608{
609 unsigned int ebx;
610 union cpuid10_eax eax;
611 unsigned int unused;
612 unsigned int perfctr_msr, evntsel_msr;
613 unsigned int evntsel;
614 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
615
616 /*
617 * Check whether the Architectural PerfMon supports
618 * Unhalted Core Cycles Event or not.
619 * NOTE: Corresponding bit = 0 in ebx indicates event present.
620 */
621 cpuid(10, &(eax.full), &ebx, &unused, &unused);
622 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
623 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
624 goto fail;
625
626 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
627 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
628
629 if (!reserve_perfctr_nmi(perfctr_msr))
630 goto fail;
631
632 if (!reserve_evntsel_nmi(evntsel_msr))
633 goto fail1;
634
635 wrmsrl(perfctr_msr, 0UL);
636
637 evntsel = ARCH_PERFMON_EVENTSEL_INT
638 | ARCH_PERFMON_EVENTSEL_OS
639 | ARCH_PERFMON_EVENTSEL_USR
640 | ARCH_PERFMON_NMI_EVENT_SEL
641 | ARCH_PERFMON_NMI_EVENT_UMASK;
642
643 /* setup the timer */
644 wrmsr(evntsel_msr, evntsel, 0);
Venkatesh Pallipadi16761932007-02-13 13:26:22 +0100645
646 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
647 wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200648
649 apic_write(APIC_LVTPC, APIC_DM_NMI);
650 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
651 wrmsr(evntsel_msr, evntsel, 0);
652
653 wd->perfctr_msr = perfctr_msr;
654 wd->evntsel_msr = evntsel_msr;
655 wd->cccr_msr = 0; //unused
656 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
657 return 1;
658fail1:
659 release_perfctr_nmi(perfctr_msr);
660fail:
661 return 0;
662}
663
664static void stop_intel_arch_watchdog(void)
665{
666 unsigned int ebx;
667 union cpuid10_eax eax;
668 unsigned int unused;
669 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
670
671 /*
672 * Check whether the Architectural PerfMon supports
673 * Unhalted Core Cycles Event or not.
674 * NOTE: Corresponding bit = 0 in ebx indicates event present.
675 */
676 cpuid(10, &(eax.full), &ebx, &unused, &unused);
677 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
678 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
679 return;
680
681 wrmsr(wd->evntsel_msr, 0, 0);
682
683 release_evntsel_nmi(wd->evntsel_msr);
684 release_perfctr_nmi(wd->perfctr_msr);
685}
686
Don Zickusf2802e72006-09-26 10:52:26 +0200687void setup_apic_nmi_watchdog(void *unused)
688{
Shaohua Li4038f902006-09-26 10:52:27 +0200689 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
690
Don Zickusf2802e72006-09-26 10:52:26 +0200691 /* only support LOCAL and IO APICs for now */
692 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
693 (nmi_watchdog != NMI_IO_APIC))
694 return;
695
Shaohua Li4038f902006-09-26 10:52:27 +0200696 if (wd->enabled == 1)
697 return;
698
699 /* cheap hack to support suspend/resume */
700 /* if cpu0 is not active neither should the other cpus */
701 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
702 return;
703
Don Zickusf2802e72006-09-26 10:52:26 +0200704 if (nmi_watchdog == NMI_LOCAL_APIC) {
705 switch (boot_cpu_data.x86_vendor) {
706 case X86_VENDOR_AMD:
707 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
708 return;
709 if (!setup_k7_watchdog())
710 return;
711 break;
712 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200713 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
714 if (!setup_intel_arch_watchdog())
715 return;
716 break;
717 }
Don Zickusf2802e72006-09-26 10:52:26 +0200718 if (!setup_p4_watchdog())
719 return;
720 break;
721 default:
722 return;
723 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 }
Shaohua Li4038f902006-09-26 10:52:27 +0200725 wd->enabled = 1;
Don Zickusf2802e72006-09-26 10:52:26 +0200726 atomic_inc(&nmi_active);
727}
728
Shaohua Li4038f902006-09-26 10:52:27 +0200729void stop_apic_nmi_watchdog(void *unused)
Don Zickusf2802e72006-09-26 10:52:26 +0200730{
Shaohua Li4038f902006-09-26 10:52:27 +0200731 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
732
Don Zickusf2802e72006-09-26 10:52:26 +0200733 /* only support LOCAL and IO APICs for now */
734 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
735 (nmi_watchdog != NMI_IO_APIC))
736 return;
737
Shaohua Li4038f902006-09-26 10:52:27 +0200738 if (wd->enabled == 0)
739 return;
740
Don Zickusf2802e72006-09-26 10:52:26 +0200741 if (nmi_watchdog == NMI_LOCAL_APIC) {
742 switch (boot_cpu_data.x86_vendor) {
743 case X86_VENDOR_AMD:
744 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
745 return;
746 stop_k7_watchdog();
747 break;
748 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200749 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
750 stop_intel_arch_watchdog();
751 break;
752 }
Don Zickusf2802e72006-09-26 10:52:26 +0200753 stop_p4_watchdog();
754 break;
755 default:
756 return;
757 }
758 }
Shaohua Li4038f902006-09-26 10:52:27 +0200759 wd->enabled = 0;
Don Zickusf2802e72006-09-26 10:52:26 +0200760 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761}
762
763/*
764 * the best way to detect whether a CPU has a 'hard lockup' problem
765 * is to check it's local APIC timer IRQ counts. If they are not
766 * changing then that CPU has some problem.
767 *
768 * as these watchdog NMI IRQs are generated on every CPU, we only
769 * have to check the current processor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 */
771
Andi Kleen75152112005-05-16 21:53:34 -0700772static DEFINE_PER_CPU(unsigned, last_irq_sum);
773static DEFINE_PER_CPU(local_t, alert_counter);
774static DEFINE_PER_CPU(int, nmi_touch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
776void touch_nmi_watchdog (void)
777{
Jan Beulich99019e92006-02-16 23:41:55 +0100778 if (nmi_watchdog > 0) {
779 unsigned cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Jan Beulich99019e92006-02-16 23:41:55 +0100781 /*
782 * Tell other CPUs to reset their alert counters. We cannot
783 * do it ourselves because the alert count increase is not
784 * atomic.
785 */
786 for_each_present_cpu (cpu)
787 per_cpu(nmi_touch, cpu) = 1;
788 }
Ingo Molnar8446f1d2005-09-06 15:16:27 -0700789
790 touch_softlockup_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791}
792
Don Zickus3adbbcc2006-09-26 10:52:26 +0200793int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794{
Andi Kleen75152112005-05-16 21:53:34 -0700795 int sum;
796 int touched = 0;
Andrew Mortonbb81a092006-12-07 02:14:01 +0100797 int cpu = smp_processor_id();
Don Zickusf2802e72006-09-26 10:52:26 +0200798 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
799 u64 dummy;
Don Zickus3adbbcc2006-09-26 10:52:26 +0200800 int rc=0;
Don Zickusf2802e72006-09-26 10:52:26 +0200801
802 /* check for other users first */
803 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
804 == NOTIFY_STOP) {
Don Zickus3adbbcc2006-09-26 10:52:26 +0200805 rc = 1;
Don Zickusf2802e72006-09-26 10:52:26 +0200806 touched = 1;
807 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 sum = read_pda(apic_timer_irqs);
Andi Kleen75152112005-05-16 21:53:34 -0700810 if (__get_cpu_var(nmi_touch)) {
811 __get_cpu_var(nmi_touch) = 0;
812 touched = 1;
813 }
Don Zickusf2802e72006-09-26 10:52:26 +0200814
Andrew Mortonbb81a092006-12-07 02:14:01 +0100815 if (cpu_isset(cpu, backtrace_mask)) {
816 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
817
818 spin_lock(&lock);
819 printk("NMI backtrace for cpu %d\n", cpu);
820 dump_stack();
821 spin_unlock(&lock);
822 cpu_clear(cpu, backtrace_mask);
823 }
824
Andi Kleen553f2652006-04-07 19:49:57 +0200825#ifdef CONFIG_X86_MCE
826 /* Could check oops_in_progress here too, but it's safer
827 not too */
828 if (atomic_read(&mce_entry) > 0)
829 touched = 1;
830#endif
Don Zickusf2802e72006-09-26 10:52:26 +0200831 /* if the apic timer isn't firing, this cpu isn't doing much */
Andi Kleen75152112005-05-16 21:53:34 -0700832 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 /*
834 * Ayiee, looks like this CPU is stuck ...
835 * wait a few IRQs (5 seconds) before doing the oops ...
836 */
Andi Kleen75152112005-05-16 21:53:34 -0700837 local_inc(&__get_cpu_var(alert_counter));
Don Zickusf2802e72006-09-26 10:52:26 +0200838 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
Andi Kleenfac58552006-09-26 10:52:27 +0200839 die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
840 panic_on_timeout);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 } else {
Andi Kleen75152112005-05-16 21:53:34 -0700842 __get_cpu_var(last_irq_sum) = sum;
843 local_set(&__get_cpu_var(alert_counter), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
Don Zickusf2802e72006-09-26 10:52:26 +0200845
846 /* see if the nmi watchdog went off */
847 if (wd->enabled) {
848 if (nmi_watchdog == NMI_LOCAL_APIC) {
849 rdmsrl(wd->perfctr_msr, dummy);
850 if (dummy & wd->check_bit){
851 /* this wasn't a watchdog timer interrupt */
852 goto done;
853 }
854
855 /* only Intel uses the cccr msr */
856 if (wd->cccr_msr != 0) {
857 /*
858 * P4 quirks:
859 * - An overflown perfctr will assert its interrupt
860 * until the OVF flag in its CCCR is cleared.
861 * - LVTPC is masked on interrupt and must be
862 * unmasked by the LVTPC handler.
863 */
864 rdmsrl(wd->cccr_msr, dummy);
865 dummy &= ~P4_CCCR_OVF;
866 wrmsrl(wd->cccr_msr, dummy);
867 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi16761932007-02-13 13:26:22 +0100868 /* start the cycle over again */
869 wrmsrl(wd->perfctr_msr,
870 -((u64)cpu_khz * 1000 / nmi_hz));
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200871 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
872 /*
873 * ArchPerfom/Core Duo needs to re-unmask
874 * the apic vector
875 */
876 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi16761932007-02-13 13:26:22 +0100877 /* ARCH_PERFMON has 32 bit counter writes */
878 wrmsr(wd->perfctr_msr,
879 (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
880 } else {
881 /* start the cycle over again */
882 wrmsrl(wd->perfctr_msr,
883 -((u64)cpu_khz * 1000 / nmi_hz));
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200884 }
Don Zickus3adbbcc2006-09-26 10:52:26 +0200885 rc = 1;
886 } else if (nmi_watchdog == NMI_IO_APIC) {
887 /* don't know how to accurately check for this.
888 * just assume it was a watchdog timer interrupt
889 * This matches the old behaviour.
890 */
891 rc = 1;
892 } else
893 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
Andi Kleen75152112005-05-16 21:53:34 -0700894 }
Don Zickusf2802e72006-09-26 10:52:26 +0200895done:
Don Zickus3adbbcc2006-09-26 10:52:26 +0200896 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897}
898
Andi Kleeneddb6fb2006-02-03 21:50:41 +0100899asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 nmi_enter();
902 add_pda(__nmi_count,1);
Don Zickus3adbbcc2006-09-26 10:52:26 +0200903 default_do_nmi(regs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 nmi_exit();
905}
906
Don Zickus3adbbcc2006-09-26 10:52:26 +0200907int do_nmi_callback(struct pt_regs * regs, int cpu)
908{
Don Zickus2fbe7b22006-09-26 10:52:27 +0200909#ifdef CONFIG_SYSCTL
910 if (unknown_nmi_panic)
911 return unknown_nmi_panic_callback(regs, cpu);
912#endif
913 return 0;
Don Zickus3adbbcc2006-09-26 10:52:26 +0200914}
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916#ifdef CONFIG_SYSCTL
917
918static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
919{
920 unsigned char reason = get_nmi_reason();
921 char buf[64];
922
Don Zickus2fbe7b22006-09-26 10:52:27 +0200923 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
Andi Kleenfac58552006-09-26 10:52:27 +0200924 die_nmi(buf, regs, 1); /* Always panic here */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 return 0;
926}
927
Don Zickus407984f2006-09-26 10:52:27 +0200928/*
929 * proc handler for /proc/sys/kernel/nmi
930 */
931int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
932 void __user *buffer, size_t *length, loff_t *ppos)
933{
934 int old_state;
935
936 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
937 old_state = nmi_watchdog_enabled;
938 proc_dointvec(table, write, file, buffer, length, ppos);
939 if (!!old_state == !!nmi_watchdog_enabled)
940 return 0;
941
942 if (atomic_read(&nmi_active) < 0) {
943 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
Don Zickuse33e89a2006-09-26 10:52:27 +0200944 return -EIO;
Don Zickus407984f2006-09-26 10:52:27 +0200945 }
946
947 /* if nmi_watchdog is not set yet, then set it */
948 nmi_watchdog_default();
949
Don Zickuse33e89a2006-09-26 10:52:27 +0200950 if (nmi_watchdog == NMI_LOCAL_APIC) {
Don Zickus407984f2006-09-26 10:52:27 +0200951 if (nmi_watchdog_enabled)
952 enable_lapic_nmi_watchdog();
953 else
954 disable_lapic_nmi_watchdog();
Don Zickus407984f2006-09-26 10:52:27 +0200955 } else {
Don Zickuse33e89a2006-09-26 10:52:27 +0200956 printk( KERN_WARNING
Don Zickus407984f2006-09-26 10:52:27 +0200957 "NMI watchdog doesn't know what hardware to touch\n");
958 return -EIO;
959 }
960 return 0;
961}
962
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963#endif
964
Andrew Mortonbb81a092006-12-07 02:14:01 +0100965void __trigger_all_cpu_backtrace(void)
966{
967 int i;
968
969 backtrace_mask = cpu_online_map;
970 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
971 for (i = 0; i < 10 * 1000; i++) {
972 if (cpus_empty(backtrace_mask))
973 break;
974 mdelay(1);
975 }
976}
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978EXPORT_SYMBOL(nmi_active);
979EXPORT_SYMBOL(nmi_watchdog);
Don Zickus828f0af2006-09-26 10:52:26 +0200980EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
981EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
982EXPORT_SYMBOL(reserve_perfctr_nmi);
983EXPORT_SYMBOL(release_perfctr_nmi);
984EXPORT_SYMBOL(reserve_evntsel_nmi);
985EXPORT_SYMBOL(release_evntsel_nmi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986EXPORT_SYMBOL(disable_timer_nmi_watchdog);
987EXPORT_SYMBOL(enable_timer_nmi_watchdog);
988EXPORT_SYMBOL(touch_nmi_watchdog);