blob: 821df34d2b3a3a1b8a71ada0a77b64229e0f0200 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/interrupt.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/nmi.h>
20#include <linux/sysdev.h>
21#include <linux/sysctl.h>
Don Zickus3e4ff112006-06-26 13:57:01 +020022#include <linux/percpu.h>
Andi Kleen1de84972006-09-26 10:52:27 +020023#include <linux/dmi.h>
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +020024#include <linux/kprobes.h>
Andrew Mortonbb81a092006-12-07 02:14:01 +010025#include <linux/cpumask.h>
Thomas Gleixnerf8b50352007-02-16 01:28:09 -080026#include <linux/kernel_stat.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027
28#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/nmi.h>
Don Zickusb7471c62006-09-26 10:52:26 +020030#include <asm/kdebug.h>
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020031#include <asm/intel_arch_perfmon.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include "mach_traps.h"
34
Andi Kleen29cbc782006-09-30 01:47:55 +020035int unknown_nmi_panic;
36int nmi_watchdog_enabled;
37
Don Zickus828f0af2006-09-26 10:52:26 +020038/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
39 * evtsel_nmi_owner tracks the ownership of the event selection
40 * - different performance counters/ event selection may be reserved for
41 * different subsystems this reservation system just tries to coordinate
42 * things a little
43 */
44static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
45static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46
Andrew Mortonbb81a092006-12-07 02:14:01 +010047static cpumask_t backtrace_mask = CPU_MASK_NONE;
48
Don Zickus828f0af2006-09-26 10:52:26 +020049/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
50 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
51 */
52#define NMI_MAX_COUNTER_BITS 66
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054/* nmi_active:
Don Zickusb7471c62006-09-26 10:52:26 +020055 * >0: the lapic NMI watchdog is active, but can be disabled
56 * <0: the lapic NMI watchdog has not been set up, and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 * be enabled
Don Zickusb7471c62006-09-26 10:52:26 +020058 * 0: the lapic NMI watchdog is disabled, but can be enabled
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 */
Don Zickusb7471c62006-09-26 10:52:26 +020060atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
Don Zickusb7471c62006-09-26 10:52:26 +020062unsigned int nmi_watchdog = NMI_DEFAULT;
63static unsigned int nmi_hz = HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Don Zickusb7471c62006-09-26 10:52:26 +020065struct nmi_watchdog_ctlblk {
66 int enabled;
67 u64 check_bit;
68 unsigned int cccr_msr;
69 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
70 unsigned int evntsel_msr; /* the MSR to select the events to handle */
71};
72static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
Don Zickusb7471c62006-09-26 10:52:26 +020074/* local prototypes */
Don Zickusb7471c62006-09-26 10:52:26 +020075static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76
77extern void show_registers(struct pt_regs *regs);
78extern int unknown_nmi_panic;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
Don Zickus828f0af2006-09-26 10:52:26 +020080/* converts an msr to an appropriate reservation bit */
81static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
82{
83 /* returns the bit offset of the performance counter register */
84 switch (boot_cpu_data.x86_vendor) {
85 case X86_VENDOR_AMD:
86 return (msr - MSR_K7_PERFCTR0);
87 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020088 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
89 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
90
Don Zickus828f0af2006-09-26 10:52:26 +020091 switch (boot_cpu_data.x86) {
92 case 6:
93 return (msr - MSR_P6_PERFCTR0);
94 case 15:
95 return (msr - MSR_P4_BPU_PERFCTR0);
96 }
97 }
98 return 0;
99}
100
101/* converts an msr to an appropriate reservation bit */
102static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
103{
104 /* returns the bit offset of the event selection register */
105 switch (boot_cpu_data.x86_vendor) {
106 case X86_VENDOR_AMD:
107 return (msr - MSR_K7_EVNTSEL0);
108 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200109 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
110 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
111
Don Zickus828f0af2006-09-26 10:52:26 +0200112 switch (boot_cpu_data.x86) {
113 case 6:
114 return (msr - MSR_P6_EVNTSEL0);
115 case 15:
116 return (msr - MSR_P4_BSU_ESCR0);
117 }
118 }
119 return 0;
120}
121
122/* checks for a bit availability (hack for oprofile) */
123int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
124{
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
128}
129
130/* checks the an msr for availability */
131int avail_to_resrv_perfctr_nmi(unsigned int msr)
132{
133 unsigned int counter;
134
135 counter = nmi_perfctr_msr_to_bit(msr);
136 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
137
138 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
139}
140
141int reserve_perfctr_nmi(unsigned int msr)
142{
143 unsigned int counter;
144
145 counter = nmi_perfctr_msr_to_bit(msr);
146 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
147
148 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
149 return 1;
150 return 0;
151}
152
153void release_perfctr_nmi(unsigned int msr)
154{
155 unsigned int counter;
156
157 counter = nmi_perfctr_msr_to_bit(msr);
158 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159
160 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
161}
162
163int reserve_evntsel_nmi(unsigned int msr)
164{
165 unsigned int counter;
166
167 counter = nmi_evntsel_msr_to_bit(msr);
168 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
169
170 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
171 return 1;
172 return 0;
173}
174
175void release_evntsel_nmi(unsigned int msr)
176{
177 unsigned int counter;
178
179 counter = nmi_evntsel_msr_to_bit(msr);
180 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
181
182 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
183}
184
Don Zickusb7471c62006-09-26 10:52:26 +0200185static __cpuinit inline int nmi_known_cpu(void)
186{
187 switch (boot_cpu_data.x86_vendor) {
188 case X86_VENDOR_AMD:
Andi Kleen0a4599c2007-02-13 13:26:25 +0100189 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
190 || (boot_cpu_data.x86 == 16));
Don Zickusb7471c62006-09-26 10:52:26 +0200191 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200192 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
193 return 1;
194 else
195 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
Don Zickusb7471c62006-09-26 10:52:26 +0200196 }
197 return 0;
198}
199
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100200static int endflag __initdata = 0;
201
Eric W. Biederman29b70082005-10-30 14:59:40 -0800202#ifdef CONFIG_SMP
203/* The performance counters used by NMI_LOCAL_APIC don't trigger when
204 * the CPU is idle. To make sure the NMI watchdog really ticks on all
205 * CPUs during the test make them busy.
206 */
207static __init void nmi_cpu_busy(void *data)
208{
Ingo Molnar366c7f52006-07-03 00:25:25 -0700209 local_irq_enable_in_hardirq();
Eric W. Biederman29b70082005-10-30 14:59:40 -0800210 /* Intentionally don't use cpu_relax here. This is
211 to make sure that the performance counter really ticks,
212 even if there is a simulator or similar that catches the
213 pause instruction. On a real HT machine this is fine because
214 all other CPUs are busy with "useless" delay loops and don't
215 care if they get somewhat less cycles. */
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100216 while (endflag == 0)
217 mb();
Eric W. Biederman29b70082005-10-30 14:59:40 -0800218}
219#endif
220
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100221static unsigned int adjust_for_32bit_ctr(unsigned int hz)
222{
223 u64 counter_val;
224 unsigned int retval = hz;
225
226 /*
227 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
228 * are writable, with higher bits sign extending from bit 31.
229 * So, we can only program the counter with 31 bit values and
230 * 32nd bit should be 1, for 33.. to be 1.
231 * Find the appropriate nmi_hz
232 */
233 counter_val = (u64)cpu_khz * 1000;
234 do_div(counter_val, retval);
235 if (counter_val > 0x7fffffffULL) {
236 u64 count = (u64)cpu_khz * 1000;
237 do_div(count, 0x7fffffffUL);
238 retval = count + 1;
239 }
240 return retval;
241}
242
Jack F Vogel67701ae2005-05-01 08:58:48 -0700243static int __init check_nmi_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
Eric W. Biederman29b70082005-10-30 14:59:40 -0800245 unsigned int *prev_nmi_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 int cpu;
247
Andi Kleen1de84972006-09-26 10:52:27 +0200248 /* Enable NMI watchdog for newer systems.
Andi Kleena1bae672006-10-21 18:37:02 +0200249 Probably safe on most older systems too, but let's be careful.
250 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
251 which hangs the system. Disable watchdog for all thinkpads */
252 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
253 !dmi_name_in_vendors("ThinkPad"))
Andi Kleen1de84972006-09-26 10:52:27 +0200254 nmi_watchdog = NMI_LOCAL_APIC;
255
Don Zickusb7471c62006-09-26 10:52:26 +0200256 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
257 return 0;
258
259 if (!atomic_read(&nmi_active))
Jack F Vogel67701ae2005-05-01 08:58:48 -0700260 return 0;
261
Eric W. Biederman29b70082005-10-30 14:59:40 -0800262 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
263 if (!prev_nmi_count)
264 return -1;
265
Jack F Vogel67701ae2005-05-01 08:58:48 -0700266 printk(KERN_INFO "Testing NMI watchdog ... ");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
Eric W. Biederman29b70082005-10-30 14:59:40 -0800268 if (nmi_watchdog == NMI_LOCAL_APIC)
269 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
270
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800271 for_each_possible_cpu(cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
273 local_irq_enable();
274 mdelay((10*1000)/nmi_hz); // wait 10 ticks
275
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800276 for_each_possible_cpu(cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277#ifdef CONFIG_SMP
278 /* Check cpu_callin_map here because that is set
279 after the timer is started. */
280 if (!cpu_isset(cpu, cpu_callin_map))
281 continue;
282#endif
Don Zickusb7471c62006-09-26 10:52:26 +0200283 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
284 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
Eric W. Biederman29b70082005-10-30 14:59:40 -0800286 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
287 cpu,
288 prev_nmi_count[cpu],
289 nmi_count(cpu));
Don Zickusb7471c62006-09-26 10:52:26 +0200290 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
291 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 }
293 }
Don Zickusb7471c62006-09-26 10:52:26 +0200294 if (!atomic_read(&nmi_active)) {
295 kfree(prev_nmi_count);
296 atomic_set(&nmi_active, -1);
297 return -1;
298 }
Eric W. Biederman29b70082005-10-30 14:59:40 -0800299 endflag = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 printk("OK.\n");
301
302 /* now that we know it works we can reduce NMI frequency to
303 something more reasonable; makes a difference in some configs */
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200304 if (nmi_watchdog == NMI_LOCAL_APIC) {
305 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
306
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 nmi_hz = 1;
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100308
309 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
310 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
311 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200312 }
313 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Eric W. Biederman29b70082005-10-30 14:59:40 -0800315 kfree(prev_nmi_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 return 0;
317}
Jack F Vogel67701ae2005-05-01 08:58:48 -0700318/* This needs to happen later in boot so counters are working */
319late_initcall(check_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321static int __init setup_nmi_watchdog(char *str)
322{
323 int nmi;
324
325 get_option(&str, &nmi);
326
Don Zickusb7471c62006-09-26 10:52:26 +0200327 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 return 0;
Venkatesh Pallipadi58d9ce7d2007-01-22 20:40:34 -0800329
Don Zickusb7471c62006-09-26 10:52:26 +0200330 nmi_watchdog = nmi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 return 1;
332}
333
334__setup("nmi_watchdog=", setup_nmi_watchdog);
335
336static void disable_lapic_nmi_watchdog(void)
337{
Don Zickusb7471c62006-09-26 10:52:26 +0200338 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
339
340 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
Don Zickusb7471c62006-09-26 10:52:26 +0200343 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344
Don Zickusb7471c62006-09-26 10:52:26 +0200345 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346}
347
348static void enable_lapic_nmi_watchdog(void)
349{
Don Zickusb7471c62006-09-26 10:52:26 +0200350 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
351
352 /* are we already enabled */
353 if (atomic_read(&nmi_active) != 0)
354 return;
355
356 /* are we lapic aware */
357 if (nmi_known_cpu() <= 0)
358 return;
359
360 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
361 touch_nmi_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362}
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364void disable_timer_nmi_watchdog(void)
365{
Don Zickusb7471c62006-09-26 10:52:26 +0200366 BUG_ON(nmi_watchdog != NMI_IO_APIC);
367
368 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return;
370
Don Zickusb7471c62006-09-26 10:52:26 +0200371 disable_irq(0);
372 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
373
374 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375}
376
377void enable_timer_nmi_watchdog(void)
378{
Don Zickusb7471c62006-09-26 10:52:26 +0200379 BUG_ON(nmi_watchdog != NMI_IO_APIC);
380
381 if (atomic_read(&nmi_active) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 touch_nmi_watchdog();
Don Zickusb7471c62006-09-26 10:52:26 +0200383 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
384 enable_irq(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
386}
387
Ingo Molnar5d0e6002007-02-13 13:26:24 +0100388static void __acpi_nmi_disable(void *__unused)
389{
390 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
391}
392
393/*
394 * Disable timer based NMIs on all CPUs:
395 */
396void acpi_nmi_disable(void)
397{
398 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
399 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
400}
401
402static void __acpi_nmi_enable(void *__unused)
403{
404 apic_write_around(APIC_LVT0, APIC_DM_NMI);
405}
406
407/*
408 * Enable timer based NMIs on all CPUs:
409 */
410void acpi_nmi_enable(void)
411{
412 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
413 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
414}
415
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416#ifdef CONFIG_PM
417
418static int nmi_pm_active; /* nmi_active before suspend */
419
Pavel Machek438510f2005-04-16 15:25:24 -0700420static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421{
Shaohua Li4038f902006-09-26 10:52:27 +0200422 /* only CPU0 goes here, other CPUs should be offline */
Don Zickusb7471c62006-09-26 10:52:26 +0200423 nmi_pm_active = atomic_read(&nmi_active);
Shaohua Li4038f902006-09-26 10:52:27 +0200424 stop_apic_nmi_watchdog(NULL);
425 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 return 0;
427}
428
429static int lapic_nmi_resume(struct sys_device *dev)
430{
Shaohua Li4038f902006-09-26 10:52:27 +0200431 /* only CPU0 goes here, other CPUs should be offline */
432 if (nmi_pm_active > 0) {
433 setup_apic_nmi_watchdog(NULL);
434 touch_nmi_watchdog();
435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 return 0;
437}
438
439
440static struct sysdev_class nmi_sysclass = {
441 set_kset_name("lapic_nmi"),
442 .resume = lapic_nmi_resume,
443 .suspend = lapic_nmi_suspend,
444};
445
446static struct sys_device device_lapic_nmi = {
447 .id = 0,
448 .cls = &nmi_sysclass,
449};
450
451static int __init init_lapic_nmi_sysfs(void)
452{
453 int error;
454
Don Zickusb7471c62006-09-26 10:52:26 +0200455 /* should really be a BUG_ON but b/c this is an
456 * init call, it just doesn't work. -dcz
457 */
458 if (nmi_watchdog != NMI_LOCAL_APIC)
459 return 0;
460
461 if ( atomic_read(&nmi_active) < 0 )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 0;
463
464 error = sysdev_class_register(&nmi_sysclass);
465 if (!error)
466 error = sysdev_register(&device_lapic_nmi);
467 return error;
468}
469/* must come after the local APIC's device_initcall() */
470late_initcall(init_lapic_nmi_sysfs);
471
472#endif /* CONFIG_PM */
473
474/*
475 * Activate the NMI watchdog via the local APIC.
476 * Original code written by Keith Owens.
477 */
478
Don Zickusb7471c62006-09-26 10:52:26 +0200479static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700480{
481 u64 count = (u64)cpu_khz * 1000;
482
483 do_div(count, nmi_hz);
484 if(descr)
485 Dprintk("setting %s to -0x%08Lx\n", descr, count);
Don Zickusb7471c62006-09-26 10:52:26 +0200486 wrmsrl(perfctr_msr, 0 - count);
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700487}
488
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100489static void write_watchdog_counter32(unsigned int perfctr_msr,
490 const char *descr)
491{
492 u64 count = (u64)cpu_khz * 1000;
493
494 do_div(count, nmi_hz);
495 if(descr)
496 Dprintk("setting %s to -0x%08Lx\n", descr, count);
497 wrmsr(perfctr_msr, (u32)(-count), 0);
498}
499
Don Zickusb7471c62006-09-26 10:52:26 +0200500/* Note that these events don't tick when the CPU idles. This means
501 the frequency varies with CPU load. */
502
503#define K7_EVNTSEL_ENABLE (1 << 22)
504#define K7_EVNTSEL_INT (1 << 20)
505#define K7_EVNTSEL_OS (1 << 17)
506#define K7_EVNTSEL_USR (1 << 16)
507#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
508#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
509
Don Zickus828f0af2006-09-26 10:52:26 +0200510static int setup_k7_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511{
Don Zickusb7471c62006-09-26 10:52:26 +0200512 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200514 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
Don Zickusb7471c62006-09-26 10:52:26 +0200516 perfctr_msr = MSR_K7_PERFCTR0;
517 evntsel_msr = MSR_K7_EVNTSEL0;
518 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200519 goto fail;
520
Don Zickusb7471c62006-09-26 10:52:26 +0200521 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200522 goto fail1;
523
Don Zickusb7471c62006-09-26 10:52:26 +0200524 wrmsrl(perfctr_msr, 0UL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
526 evntsel = K7_EVNTSEL_INT
527 | K7_EVNTSEL_OS
528 | K7_EVNTSEL_USR
529 | K7_NMI_EVENT;
530
Don Zickusb7471c62006-09-26 10:52:26 +0200531 /* setup the timer */
532 wrmsr(evntsel_msr, evntsel, 0);
533 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 apic_write(APIC_LVTPC, APIC_DM_NMI);
535 evntsel |= K7_EVNTSEL_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200536 wrmsr(evntsel_msr, evntsel, 0);
537
538 wd->perfctr_msr = perfctr_msr;
539 wd->evntsel_msr = evntsel_msr;
540 wd->cccr_msr = 0; //unused
541 wd->check_bit = 1ULL<<63;
Don Zickus828f0af2006-09-26 10:52:26 +0200542 return 1;
543fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200544 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200545fail:
546 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547}
548
Don Zickusb7471c62006-09-26 10:52:26 +0200549static void stop_k7_watchdog(void)
550{
551 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
552
553 wrmsr(wd->evntsel_msr, 0, 0);
554
555 release_evntsel_nmi(wd->evntsel_msr);
556 release_perfctr_nmi(wd->perfctr_msr);
557}
558
559#define P6_EVNTSEL0_ENABLE (1 << 22)
560#define P6_EVNTSEL_INT (1 << 20)
561#define P6_EVNTSEL_OS (1 << 17)
562#define P6_EVNTSEL_USR (1 << 16)
563#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
564#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
565
Don Zickus828f0af2006-09-26 10:52:26 +0200566static int setup_p6_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567{
Don Zickusb7471c62006-09-26 10:52:26 +0200568 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200570 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
Don Zickusb7471c62006-09-26 10:52:26 +0200572 perfctr_msr = MSR_P6_PERFCTR0;
573 evntsel_msr = MSR_P6_EVNTSEL0;
574 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200575 goto fail;
576
Don Zickusb7471c62006-09-26 10:52:26 +0200577 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200578 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
Don Zickusb7471c62006-09-26 10:52:26 +0200580 wrmsrl(perfctr_msr, 0UL);
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 evntsel = P6_EVNTSEL_INT
583 | P6_EVNTSEL_OS
584 | P6_EVNTSEL_USR
585 | P6_NMI_EVENT;
586
Don Zickusb7471c62006-09-26 10:52:26 +0200587 /* setup the timer */
588 wrmsr(evntsel_msr, evntsel, 0);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100589 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
590 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 apic_write(APIC_LVTPC, APIC_DM_NMI);
592 evntsel |= P6_EVNTSEL0_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200593 wrmsr(evntsel_msr, evntsel, 0);
594
595 wd->perfctr_msr = perfctr_msr;
596 wd->evntsel_msr = evntsel_msr;
597 wd->cccr_msr = 0; //unused
598 wd->check_bit = 1ULL<<39;
Don Zickus828f0af2006-09-26 10:52:26 +0200599 return 1;
600fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200601 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200602fail:
603 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604}
605
Don Zickusb7471c62006-09-26 10:52:26 +0200606static void stop_p6_watchdog(void)
607{
608 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
609
610 wrmsr(wd->evntsel_msr, 0, 0);
611
612 release_evntsel_nmi(wd->evntsel_msr);
613 release_perfctr_nmi(wd->perfctr_msr);
614}
615
616/* Note that these events don't tick when the CPU idles. This means
617 the frequency varies with CPU load. */
618
619#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
620#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
621#define P4_ESCR_OS (1<<3)
622#define P4_ESCR_USR (1<<2)
623#define P4_CCCR_OVF_PMI0 (1<<26)
624#define P4_CCCR_OVF_PMI1 (1<<27)
625#define P4_CCCR_THRESHOLD(N) ((N)<<20)
626#define P4_CCCR_COMPLEMENT (1<<19)
627#define P4_CCCR_COMPARE (1<<18)
628#define P4_CCCR_REQUIRED (3<<16)
629#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
630#define P4_CCCR_ENABLE (1<<12)
631#define P4_CCCR_OVF (1<<31)
632/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
633 CRU_ESCR0 (with any non-null event selector) through a complemented
634 max threshold. [IA32-Vol3, Section 14.9.9] */
635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636static int setup_p4_watchdog(void)
637{
Don Zickusb7471c62006-09-26 10:52:26 +0200638 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
639 unsigned int evntsel, cccr_val;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 unsigned int misc_enable, dummy;
Don Zickusb7471c62006-09-26 10:52:26 +0200641 unsigned int ht_num;
642 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
Don Zickusb7471c62006-09-26 10:52:26 +0200644 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
646 return 0;
647
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648#ifdef CONFIG_SMP
Don Zickusb7471c62006-09-26 10:52:26 +0200649 /* detect which hyperthread we are on */
650 if (smp_num_siblings == 2) {
651 unsigned int ebx, apicid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
Don Zickusb7471c62006-09-26 10:52:26 +0200653 ebx = cpuid_ebx(1);
654 apicid = (ebx >> 24) & 0xff;
655 ht_num = apicid & 1;
656 } else
657#endif
658 ht_num = 0;
659
660 /* performance counters are shared resources
661 * assign each hyperthread its own set
662 * (re-use the ESCR0 register, seems safe
663 * and keeps the cccr_val the same)
664 */
665 if (!ht_num) {
666 /* logical cpu 0 */
667 perfctr_msr = MSR_P4_IQ_PERFCTR0;
668 evntsel_msr = MSR_P4_CRU_ESCR0;
669 cccr_msr = MSR_P4_IQ_CCCR0;
670 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
671 } else {
672 /* logical cpu 1 */
673 perfctr_msr = MSR_P4_IQ_PERFCTR1;
674 evntsel_msr = MSR_P4_CRU_ESCR0;
675 cccr_msr = MSR_P4_IQ_CCCR1;
676 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
677 }
678
679 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200680 goto fail;
681
Don Zickusb7471c62006-09-26 10:52:26 +0200682 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200683 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
Don Zickusb7471c62006-09-26 10:52:26 +0200685 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
686 | P4_ESCR_OS
687 | P4_ESCR_USR;
688
689 cccr_val |= P4_CCCR_THRESHOLD(15)
690 | P4_CCCR_COMPLEMENT
691 | P4_CCCR_COMPARE
692 | P4_CCCR_REQUIRED;
693
694 wrmsr(evntsel_msr, evntsel, 0);
695 wrmsr(cccr_msr, cccr_val, 0);
696 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 apic_write(APIC_LVTPC, APIC_DM_NMI);
Don Zickusb7471c62006-09-26 10:52:26 +0200698 cccr_val |= P4_CCCR_ENABLE;
699 wrmsr(cccr_msr, cccr_val, 0);
700 wd->perfctr_msr = perfctr_msr;
701 wd->evntsel_msr = evntsel_msr;
702 wd->cccr_msr = cccr_msr;
703 wd->check_bit = 1ULL<<39;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 return 1;
Don Zickus828f0af2006-09-26 10:52:26 +0200705fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200706 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200707fail:
708 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709}
710
Don Zickusb7471c62006-09-26 10:52:26 +0200711static void stop_p4_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712{
Don Zickusb7471c62006-09-26 10:52:26 +0200713 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
Don Zickusb7471c62006-09-26 10:52:26 +0200715 wrmsr(wd->cccr_msr, 0, 0);
716 wrmsr(wd->evntsel_msr, 0, 0);
717
718 release_evntsel_nmi(wd->evntsel_msr);
719 release_perfctr_nmi(wd->perfctr_msr);
720}
721
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200722#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
723#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
724
725static int setup_intel_arch_watchdog(void)
726{
727 unsigned int ebx;
728 union cpuid10_eax eax;
729 unsigned int unused;
730 unsigned int perfctr_msr, evntsel_msr;
731 unsigned int evntsel;
732 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
733
734 /*
735 * Check whether the Architectural PerfMon supports
736 * Unhalted Core Cycles Event or not.
737 * NOTE: Corresponding bit = 0 in ebx indicates event present.
738 */
739 cpuid(10, &(eax.full), &ebx, &unused, &unused);
740 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
741 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
742 goto fail;
743
744 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
745 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
746
747 if (!reserve_perfctr_nmi(perfctr_msr))
748 goto fail;
749
750 if (!reserve_evntsel_nmi(evntsel_msr))
751 goto fail1;
752
753 wrmsrl(perfctr_msr, 0UL);
754
755 evntsel = ARCH_PERFMON_EVENTSEL_INT
756 | ARCH_PERFMON_EVENTSEL_OS
757 | ARCH_PERFMON_EVENTSEL_USR
758 | ARCH_PERFMON_NMI_EVENT_SEL
759 | ARCH_PERFMON_NMI_EVENT_UMASK;
760
761 /* setup the timer */
762 wrmsr(evntsel_msr, evntsel, 0);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100763 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
764 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200765 apic_write(APIC_LVTPC, APIC_DM_NMI);
766 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
767 wrmsr(evntsel_msr, evntsel, 0);
768
769 wd->perfctr_msr = perfctr_msr;
770 wd->evntsel_msr = evntsel_msr;
771 wd->cccr_msr = 0; //unused
772 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
773 return 1;
774fail1:
775 release_perfctr_nmi(perfctr_msr);
776fail:
777 return 0;
778}
779
780static void stop_intel_arch_watchdog(void)
781{
782 unsigned int ebx;
783 union cpuid10_eax eax;
784 unsigned int unused;
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786
787 /*
788 * Check whether the Architectural PerfMon supports
789 * Unhalted Core Cycles Event or not.
790 * NOTE: Corresponding bit = 0 in ebx indicates event present.
791 */
792 cpuid(10, &(eax.full), &ebx, &unused, &unused);
793 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
794 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
795 return;
796
797 wrmsr(wd->evntsel_msr, 0, 0);
798 release_evntsel_nmi(wd->evntsel_msr);
799 release_perfctr_nmi(wd->perfctr_msr);
800}
801
Don Zickusb7471c62006-09-26 10:52:26 +0200802void setup_apic_nmi_watchdog (void *unused)
803{
Shaohua Li4038f902006-09-26 10:52:27 +0200804 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
805
Don Zickusb7471c62006-09-26 10:52:26 +0200806 /* only support LOCAL and IO APICs for now */
807 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
808 (nmi_watchdog != NMI_IO_APIC))
809 return;
810
Shaohua Li4038f902006-09-26 10:52:27 +0200811 if (wd->enabled == 1)
812 return;
813
814 /* cheap hack to support suspend/resume */
815 /* if cpu0 is not active neither should the other cpus */
816 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
817 return;
818
Don Zickusb7471c62006-09-26 10:52:26 +0200819 if (nmi_watchdog == NMI_LOCAL_APIC) {
820 switch (boot_cpu_data.x86_vendor) {
821 case X86_VENDOR_AMD:
Andi Kleen0a4599c2007-02-13 13:26:25 +0100822 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
823 boot_cpu_data.x86 != 16)
Don Zickusb7471c62006-09-26 10:52:26 +0200824 return;
825 if (!setup_k7_watchdog())
Don Zickus828f0af2006-09-26 10:52:26 +0200826 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 break;
Don Zickusb7471c62006-09-26 10:52:26 +0200828 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200829 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
830 if (!setup_intel_arch_watchdog())
831 return;
832 break;
833 }
Don Zickusb7471c62006-09-26 10:52:26 +0200834 switch (boot_cpu_data.x86) {
835 case 6:
836 if (boot_cpu_data.x86_model > 0xd)
837 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838
Don Zickusb7471c62006-09-26 10:52:26 +0200839 if (!setup_p6_watchdog())
840 return;
841 break;
842 case 15:
843 if (boot_cpu_data.x86_model > 0x4)
844 return;
845
846 if (!setup_p4_watchdog())
847 return;
848 break;
849 default:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 return;
Don Zickusb7471c62006-09-26 10:52:26 +0200851 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 break;
853 default:
854 return;
855 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 }
Shaohua Li4038f902006-09-26 10:52:27 +0200857 wd->enabled = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200858 atomic_inc(&nmi_active);
859}
860
Shaohua Li4038f902006-09-26 10:52:27 +0200861void stop_apic_nmi_watchdog(void *unused)
Don Zickusb7471c62006-09-26 10:52:26 +0200862{
Shaohua Li4038f902006-09-26 10:52:27 +0200863 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
864
Don Zickusb7471c62006-09-26 10:52:26 +0200865 /* only support LOCAL and IO APICs for now */
866 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
867 (nmi_watchdog != NMI_IO_APIC))
868 return;
869
Shaohua Li4038f902006-09-26 10:52:27 +0200870 if (wd->enabled == 0)
871 return;
872
Don Zickusb7471c62006-09-26 10:52:26 +0200873 if (nmi_watchdog == NMI_LOCAL_APIC) {
874 switch (boot_cpu_data.x86_vendor) {
875 case X86_VENDOR_AMD:
876 stop_k7_watchdog();
877 break;
878 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200879 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
880 stop_intel_arch_watchdog();
881 break;
882 }
Don Zickusb7471c62006-09-26 10:52:26 +0200883 switch (boot_cpu_data.x86) {
884 case 6:
885 if (boot_cpu_data.x86_model > 0xd)
886 break;
887 stop_p6_watchdog();
888 break;
889 case 15:
890 if (boot_cpu_data.x86_model > 0x4)
891 break;
892 stop_p4_watchdog();
893 break;
894 }
895 break;
896 default:
897 return;
898 }
899 }
Shaohua Li4038f902006-09-26 10:52:27 +0200900 wd->enabled = 0;
Don Zickusb7471c62006-09-26 10:52:26 +0200901 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902}
903
904/*
905 * the best way to detect whether a CPU has a 'hard lockup' problem
906 * is to check it's local APIC timer IRQ counts. If they are not
907 * changing then that CPU has some problem.
908 *
909 * as these watchdog NMI IRQs are generated on every CPU, we only
910 * have to check the current processor.
911 *
912 * since NMIs don't listen to _any_ locks, we have to be extremely
913 * careful not to rely on unsafe variables. The printk might lock
914 * up though, so we have to break up any console locks first ...
915 * [when there will be more tty-related locks, break them up
916 * here too!]
917 */
918
919static unsigned int
920 last_irq_sums [NR_CPUS],
921 alert_counter [NR_CPUS];
922
923void touch_nmi_watchdog (void)
924{
Jan Beulichc6ea3962006-12-07 02:14:09 +0100925 if (nmi_watchdog > 0) {
926 unsigned cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
Jan Beulichc6ea3962006-12-07 02:14:09 +0100928 /*
929 * Just reset the alert counters, (other CPUs might be
930 * spinning on locks we hold):
931 */
932 for_each_present_cpu (cpu)
933 alert_counter[cpu] = 0;
934 }
Ingo Molnar8446f1d2005-09-06 15:16:27 -0700935
936 /*
937 * Tickle the softlockup detector too:
938 */
939 touch_softlockup_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940}
Michal Schmidt1e862402006-07-30 03:03:29 -0700941EXPORT_SYMBOL(touch_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942
943extern void die_nmi(struct pt_regs *, const char *msg);
944
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +0200945__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
947
948 /*
949 * Since current_thread_info()-> is always on the stack, and we
950 * always switch the stack NMI-atomically, it's safe to use
951 * smp_processor_id().
952 */
Jesper Juhlb791cce2006-03-28 01:56:52 -0800953 unsigned int sum;
Don Zickusb7471c62006-09-26 10:52:26 +0200954 int touched = 0;
Jesper Juhlb791cce2006-03-28 01:56:52 -0800955 int cpu = smp_processor_id();
Don Zickusb7471c62006-09-26 10:52:26 +0200956 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
957 u64 dummy;
Don Zickus3adbbcc2006-09-26 10:52:26 +0200958 int rc=0;
Don Zickusb7471c62006-09-26 10:52:26 +0200959
960 /* check for other users first */
961 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
962 == NOTIFY_STOP) {
Don Zickus3adbbcc2006-09-26 10:52:26 +0200963 rc = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200964 touched = 1;
965 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Andrew Mortonbb81a092006-12-07 02:14:01 +0100967 if (cpu_isset(cpu, backtrace_mask)) {
968 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
969
970 spin_lock(&lock);
971 printk("NMI backtrace for cpu %d\n", cpu);
972 dump_stack();
973 spin_unlock(&lock);
974 cpu_clear(cpu, backtrace_mask);
975 }
976
Thomas Gleixnerf8b50352007-02-16 01:28:09 -0800977 /*
978 * Take the local apic timer and PIT/HPET into account. We don't
979 * know which one is active, when we have highres/dyntick on
980 */
981 sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
Thomas Gleixnerf8b50352007-02-16 01:28:09 -0800983 /* if the none of the timers isn't firing, this cpu isn't doing much */
Don Zickusb7471c62006-09-26 10:52:26 +0200984 if (!touched && last_irq_sums[cpu] == sum) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 /*
986 * Ayiee, looks like this CPU is stuck ...
987 * wait a few IRQs (5 seconds) before doing the oops ...
988 */
989 alert_counter[cpu]++;
990 if (alert_counter[cpu] == 5*nmi_hz)
George Anzinger748f2ed2005-09-03 15:56:48 -0700991 /*
992 * die_nmi will return ONLY if NOTIFY_STOP happens..
993 */
Ingo Molnar91368d72006-03-23 03:00:54 -0800994 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
GOTO Masanorib884e252006-03-07 21:55:29 -0800995 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 last_irq_sums[cpu] = sum;
997 alert_counter[cpu] = 0;
998 }
Don Zickusb7471c62006-09-26 10:52:26 +0200999 /* see if the nmi watchdog went off */
1000 if (wd->enabled) {
1001 if (nmi_watchdog == NMI_LOCAL_APIC) {
1002 rdmsrl(wd->perfctr_msr, dummy);
1003 if (dummy & wd->check_bit){
1004 /* this wasn't a watchdog timer interrupt */
1005 goto done;
1006 }
1007
1008 /* only Intel P4 uses the cccr msr */
1009 if (wd->cccr_msr != 0) {
1010 /*
1011 * P4 quirks:
1012 * - An overflown perfctr will assert its interrupt
1013 * until the OVF flag in its CCCR is cleared.
1014 * - LVTPC is masked on interrupt and must be
1015 * unmasked by the LVTPC handler.
1016 */
1017 rdmsrl(wd->cccr_msr, dummy);
1018 dummy &= ~P4_CCCR_OVF;
1019 wrmsrl(wd->cccr_msr, dummy);
1020 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +01001021 /* start the cycle over again */
1022 write_watchdog_counter(wd->perfctr_msr, NULL);
Don Zickusb7471c62006-09-26 10:52:26 +02001023 }
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +02001024 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
1025 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
1026 /* P6 based Pentium M need to re-unmask
Don Zickusb7471c62006-09-26 10:52:26 +02001027 * the apic vector but it doesn't hurt
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +02001028 * other P6 variant.
1029 * ArchPerfom/Core Duo also needs this */
Don Zickusb7471c62006-09-26 10:52:26 +02001030 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +01001031 /* P6/ARCH_PERFMON has 32 bit counter write */
1032 write_watchdog_counter32(wd->perfctr_msr, NULL);
1033 } else {
1034 /* start the cycle over again */
1035 write_watchdog_counter(wd->perfctr_msr, NULL);
Don Zickusb7471c62006-09-26 10:52:26 +02001036 }
Don Zickus3adbbcc2006-09-26 10:52:26 +02001037 rc = 1;
1038 } else if (nmi_watchdog == NMI_IO_APIC) {
1039 /* don't know how to accurately check for this.
1040 * just assume it was a watchdog timer interrupt
1041 * This matches the old behaviour.
1042 */
1043 rc = 1;
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +02001044 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 }
Don Zickusb7471c62006-09-26 10:52:26 +02001046done:
Don Zickus3adbbcc2006-09-26 10:52:26 +02001047 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048}
1049
Don Zickus2fbe7b22006-09-26 10:52:27 +02001050int do_nmi_callback(struct pt_regs * regs, int cpu)
1051{
1052#ifdef CONFIG_SYSCTL
1053 if (unknown_nmi_panic)
1054 return unknown_nmi_panic_callback(regs, cpu);
1055#endif
1056 return 0;
1057}
1058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059#ifdef CONFIG_SYSCTL
1060
1061static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
1062{
1063 unsigned char reason = get_nmi_reason();
1064 char buf[64];
1065
Don Zickus2fbe7b22006-09-26 10:52:27 +02001066 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1067 die_nmi(regs, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return 0;
1069}
1070
Don Zickus407984f2006-09-26 10:52:27 +02001071/*
Don Zickuse33e89a2006-09-26 10:52:27 +02001072 * proc handler for /proc/sys/kernel/nmi
Don Zickus407984f2006-09-26 10:52:27 +02001073 */
1074int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1075 void __user *buffer, size_t *length, loff_t *ppos)
1076{
1077 int old_state;
1078
1079 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1080 old_state = nmi_watchdog_enabled;
1081 proc_dointvec(table, write, file, buffer, length, ppos);
1082 if (!!old_state == !!nmi_watchdog_enabled)
1083 return 0;
1084
1085 if (atomic_read(&nmi_active) < 0) {
Don Zickuse33e89a2006-09-26 10:52:27 +02001086 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1087 return -EIO;
Don Zickus407984f2006-09-26 10:52:27 +02001088 }
1089
1090 if (nmi_watchdog == NMI_DEFAULT) {
1091 if (nmi_known_cpu() > 0)
1092 nmi_watchdog = NMI_LOCAL_APIC;
1093 else
1094 nmi_watchdog = NMI_IO_APIC;
1095 }
1096
Don Zickuse33e89a2006-09-26 10:52:27 +02001097 if (nmi_watchdog == NMI_LOCAL_APIC) {
Don Zickus407984f2006-09-26 10:52:27 +02001098 if (nmi_watchdog_enabled)
1099 enable_lapic_nmi_watchdog();
1100 else
1101 disable_lapic_nmi_watchdog();
Don Zickus407984f2006-09-26 10:52:27 +02001102 } else {
1103 printk( KERN_WARNING
1104 "NMI watchdog doesn't know what hardware to touch\n");
1105 return -EIO;
1106 }
1107 return 0;
1108}
1109
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110#endif
1111
Andrew Mortonbb81a092006-12-07 02:14:01 +01001112void __trigger_all_cpu_backtrace(void)
1113{
1114 int i;
1115
1116 backtrace_mask = cpu_online_map;
1117 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1118 for (i = 0; i < 10 * 1000; i++) {
1119 if (cpus_empty(backtrace_mask))
1120 break;
1121 mdelay(1);
1122 }
1123}
1124
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125EXPORT_SYMBOL(nmi_active);
1126EXPORT_SYMBOL(nmi_watchdog);
Don Zickus828f0af2006-09-26 10:52:26 +02001127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1128EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1129EXPORT_SYMBOL(reserve_perfctr_nmi);
1130EXPORT_SYMBOL(release_perfctr_nmi);
1131EXPORT_SYMBOL(reserve_evntsel_nmi);
1132EXPORT_SYMBOL(release_evntsel_nmi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1134EXPORT_SYMBOL(enable_timer_nmi_watchdog);