blob: b11abacc5cfd3d869095c744f60556e79cfae238 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/interrupt.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/nmi.h>
20#include <linux/sysdev.h>
21#include <linux/sysctl.h>
Don Zickus3e4ff112006-06-26 13:57:01 +020022#include <linux/percpu.h>
Andi Kleen1de84972006-09-26 10:52:27 +020023#include <linux/dmi.h>
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +020024#include <linux/kprobes.h>
Andrew Mortonbb81a092006-12-07 02:14:01 +010025#include <linux/cpumask.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
27#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <asm/nmi.h>
Don Zickusb7471c62006-09-26 10:52:26 +020029#include <asm/kdebug.h>
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020030#include <asm/intel_arch_perfmon.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include "mach_traps.h"
33
Andi Kleen29cbc782006-09-30 01:47:55 +020034int unknown_nmi_panic;
35int nmi_watchdog_enabled;
36
Don Zickus828f0af2006-09-26 10:52:26 +020037/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
41 * things a little
42 */
43static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
45
Andrew Mortonbb81a092006-12-07 02:14:01 +010046static cpumask_t backtrace_mask = CPU_MASK_NONE;
47
Don Zickus828f0af2006-09-26 10:52:26 +020048/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
49 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
50 */
51#define NMI_MAX_COUNTER_BITS 66
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053/* nmi_active:
Don Zickusb7471c62006-09-26 10:52:26 +020054 * >0: the lapic NMI watchdog is active, but can be disabled
55 * <0: the lapic NMI watchdog has not been set up, and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 * be enabled
Don Zickusb7471c62006-09-26 10:52:26 +020057 * 0: the lapic NMI watchdog is disabled, but can be enabled
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 */
Don Zickusb7471c62006-09-26 10:52:26 +020059atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Don Zickusb7471c62006-09-26 10:52:26 +020061unsigned int nmi_watchdog = NMI_DEFAULT;
62static unsigned int nmi_hz = HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
Don Zickusb7471c62006-09-26 10:52:26 +020064struct nmi_watchdog_ctlblk {
65 int enabled;
66 u64 check_bit;
67 unsigned int cccr_msr;
68 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
69 unsigned int evntsel_msr; /* the MSR to select the events to handle */
70};
71static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
Don Zickusb7471c62006-09-26 10:52:26 +020073/* local prototypes */
Don Zickusb7471c62006-09-26 10:52:26 +020074static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
75
76extern void show_registers(struct pt_regs *regs);
77extern int unknown_nmi_panic;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Don Zickus828f0af2006-09-26 10:52:26 +020079/* converts an msr to an appropriate reservation bit */
80static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
81{
82 /* returns the bit offset of the performance counter register */
83 switch (boot_cpu_data.x86_vendor) {
84 case X86_VENDOR_AMD:
85 return (msr - MSR_K7_PERFCTR0);
86 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +020087 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
88 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
89
Don Zickus828f0af2006-09-26 10:52:26 +020090 switch (boot_cpu_data.x86) {
91 case 6:
92 return (msr - MSR_P6_PERFCTR0);
93 case 15:
94 return (msr - MSR_P4_BPU_PERFCTR0);
95 }
96 }
97 return 0;
98}
99
100/* converts an msr to an appropriate reservation bit */
101static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
102{
103 /* returns the bit offset of the event selection register */
104 switch (boot_cpu_data.x86_vendor) {
105 case X86_VENDOR_AMD:
106 return (msr - MSR_K7_EVNTSEL0);
107 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200108 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
109 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
110
Don Zickus828f0af2006-09-26 10:52:26 +0200111 switch (boot_cpu_data.x86) {
112 case 6:
113 return (msr - MSR_P6_EVNTSEL0);
114 case 15:
115 return (msr - MSR_P4_BSU_ESCR0);
116 }
117 }
118 return 0;
119}
120
121/* checks for a bit availability (hack for oprofile) */
122int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
123{
124 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
125
126 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
127}
128
129/* checks the an msr for availability */
130int avail_to_resrv_perfctr_nmi(unsigned int msr)
131{
132 unsigned int counter;
133
134 counter = nmi_perfctr_msr_to_bit(msr);
135 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
136
137 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
138}
139
140int reserve_perfctr_nmi(unsigned int msr)
141{
142 unsigned int counter;
143
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
146
147 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
148 return 1;
149 return 0;
150}
151
152void release_perfctr_nmi(unsigned int msr)
153{
154 unsigned int counter;
155
156 counter = nmi_perfctr_msr_to_bit(msr);
157 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
158
159 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
160}
161
162int reserve_evntsel_nmi(unsigned int msr)
163{
164 unsigned int counter;
165
166 counter = nmi_evntsel_msr_to_bit(msr);
167 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
168
169 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
170 return 1;
171 return 0;
172}
173
174void release_evntsel_nmi(unsigned int msr)
175{
176 unsigned int counter;
177
178 counter = nmi_evntsel_msr_to_bit(msr);
179 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
180
181 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
182}
183
Don Zickusb7471c62006-09-26 10:52:26 +0200184static __cpuinit inline int nmi_known_cpu(void)
185{
186 switch (boot_cpu_data.x86_vendor) {
187 case X86_VENDOR_AMD:
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200190 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
191 return 1;
192 else
193 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
Don Zickusb7471c62006-09-26 10:52:26 +0200194 }
195 return 0;
196}
197
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100198static int endflag __initdata = 0;
199
Eric W. Biederman29b70082005-10-30 14:59:40 -0800200#ifdef CONFIG_SMP
201/* The performance counters used by NMI_LOCAL_APIC don't trigger when
202 * the CPU is idle. To make sure the NMI watchdog really ticks on all
203 * CPUs during the test make them busy.
204 */
205static __init void nmi_cpu_busy(void *data)
206{
Ingo Molnar366c7f52006-07-03 00:25:25 -0700207 local_irq_enable_in_hardirq();
Eric W. Biederman29b70082005-10-30 14:59:40 -0800208 /* Intentionally don't use cpu_relax here. This is
209 to make sure that the performance counter really ticks,
210 even if there is a simulator or similar that catches the
211 pause instruction. On a real HT machine this is fine because
212 all other CPUs are busy with "useless" delay loops and don't
213 care if they get somewhat less cycles. */
Ravikiran G Thirumalai92715e22006-12-09 21:33:35 +0100214 while (endflag == 0)
215 mb();
Eric W. Biederman29b70082005-10-30 14:59:40 -0800216}
217#endif
218
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100219static unsigned int adjust_for_32bit_ctr(unsigned int hz)
220{
221 u64 counter_val;
222 unsigned int retval = hz;
223
224 /*
225 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
226 * are writable, with higher bits sign extending from bit 31.
227 * So, we can only program the counter with 31 bit values and
228 * 32nd bit should be 1, for 33.. to be 1.
229 * Find the appropriate nmi_hz
230 */
231 counter_val = (u64)cpu_khz * 1000;
232 do_div(counter_val, retval);
233 if (counter_val > 0x7fffffffULL) {
234 u64 count = (u64)cpu_khz * 1000;
235 do_div(count, 0x7fffffffUL);
236 retval = count + 1;
237 }
238 return retval;
239}
240
Jack F Vogel67701ae2005-05-01 08:58:48 -0700241static int __init check_nmi_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242{
Eric W. Biederman29b70082005-10-30 14:59:40 -0800243 unsigned int *prev_nmi_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 int cpu;
245
Andi Kleen1de84972006-09-26 10:52:27 +0200246 /* Enable NMI watchdog for newer systems.
Andi Kleena1bae672006-10-21 18:37:02 +0200247 Probably safe on most older systems too, but let's be careful.
248 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
249 which hangs the system. Disable watchdog for all thinkpads */
250 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
251 !dmi_name_in_vendors("ThinkPad"))
Andi Kleen1de84972006-09-26 10:52:27 +0200252 nmi_watchdog = NMI_LOCAL_APIC;
253
Don Zickusb7471c62006-09-26 10:52:26 +0200254 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
255 return 0;
256
257 if (!atomic_read(&nmi_active))
Jack F Vogel67701ae2005-05-01 08:58:48 -0700258 return 0;
259
Eric W. Biederman29b70082005-10-30 14:59:40 -0800260 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
261 if (!prev_nmi_count)
262 return -1;
263
Jack F Vogel67701ae2005-05-01 08:58:48 -0700264 printk(KERN_INFO "Testing NMI watchdog ... ");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
Eric W. Biederman29b70082005-10-30 14:59:40 -0800266 if (nmi_watchdog == NMI_LOCAL_APIC)
267 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
268
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800269 for_each_possible_cpu(cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
271 local_irq_enable();
272 mdelay((10*1000)/nmi_hz); // wait 10 ticks
273
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800274 for_each_possible_cpu(cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275#ifdef CONFIG_SMP
276 /* Check cpu_callin_map here because that is set
277 after the timer is started. */
278 if (!cpu_isset(cpu, cpu_callin_map))
279 continue;
280#endif
Don Zickusb7471c62006-09-26 10:52:26 +0200281 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
282 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
Eric W. Biederman29b70082005-10-30 14:59:40 -0800284 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
285 cpu,
286 prev_nmi_count[cpu],
287 nmi_count(cpu));
Don Zickusb7471c62006-09-26 10:52:26 +0200288 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
289 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 }
291 }
Don Zickusb7471c62006-09-26 10:52:26 +0200292 if (!atomic_read(&nmi_active)) {
293 kfree(prev_nmi_count);
294 atomic_set(&nmi_active, -1);
295 return -1;
296 }
Eric W. Biederman29b70082005-10-30 14:59:40 -0800297 endflag = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 printk("OK.\n");
299
300 /* now that we know it works we can reduce NMI frequency to
301 something more reasonable; makes a difference in some configs */
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200302 if (nmi_watchdog == NMI_LOCAL_APIC) {
303 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 nmi_hz = 1;
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100306
307 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
308 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
309 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200310 }
311 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
Eric W. Biederman29b70082005-10-30 14:59:40 -0800313 kfree(prev_nmi_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 return 0;
315}
Jack F Vogel67701ae2005-05-01 08:58:48 -0700316/* This needs to happen later in boot so counters are working */
317late_initcall(check_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
319static int __init setup_nmi_watchdog(char *str)
320{
321 int nmi;
322
323 get_option(&str, &nmi);
324
Don Zickusb7471c62006-09-26 10:52:26 +0200325 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 return 0;
Venkatesh Pallipadi58d9ce7d2007-01-22 20:40:34 -0800327
Don Zickusb7471c62006-09-26 10:52:26 +0200328 nmi_watchdog = nmi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 return 1;
330}
331
332__setup("nmi_watchdog=", setup_nmi_watchdog);
333
334static void disable_lapic_nmi_watchdog(void)
335{
Don Zickusb7471c62006-09-26 10:52:26 +0200336 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
337
338 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Don Zickusb7471c62006-09-26 10:52:26 +0200341 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
Don Zickusb7471c62006-09-26 10:52:26 +0200343 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344}
345
346static void enable_lapic_nmi_watchdog(void)
347{
Don Zickusb7471c62006-09-26 10:52:26 +0200348 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
349
350 /* are we already enabled */
351 if (atomic_read(&nmi_active) != 0)
352 return;
353
354 /* are we lapic aware */
355 if (nmi_known_cpu() <= 0)
356 return;
357
358 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
359 touch_nmi_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360}
361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362void disable_timer_nmi_watchdog(void)
363{
Don Zickusb7471c62006-09-26 10:52:26 +0200364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
365
366 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 return;
368
Don Zickusb7471c62006-09-26 10:52:26 +0200369 disable_irq(0);
370 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
371
372 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373}
374
375void enable_timer_nmi_watchdog(void)
376{
Don Zickusb7471c62006-09-26 10:52:26 +0200377 BUG_ON(nmi_watchdog != NMI_IO_APIC);
378
379 if (atomic_read(&nmi_active) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 touch_nmi_watchdog();
Don Zickusb7471c62006-09-26 10:52:26 +0200381 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
382 enable_irq(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 }
384}
385
Ingo Molnar5d0e6002007-02-13 13:26:24 +0100386static void __acpi_nmi_disable(void *__unused)
387{
388 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
389}
390
391/*
392 * Disable timer based NMIs on all CPUs:
393 */
394void acpi_nmi_disable(void)
395{
396 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
397 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
398}
399
400static void __acpi_nmi_enable(void *__unused)
401{
402 apic_write_around(APIC_LVT0, APIC_DM_NMI);
403}
404
405/*
406 * Enable timer based NMIs on all CPUs:
407 */
408void acpi_nmi_enable(void)
409{
410 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
411 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
412}
413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414#ifdef CONFIG_PM
415
416static int nmi_pm_active; /* nmi_active before suspend */
417
Pavel Machek438510f2005-04-16 15:25:24 -0700418static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419{
Shaohua Li4038f902006-09-26 10:52:27 +0200420 /* only CPU0 goes here, other CPUs should be offline */
Don Zickusb7471c62006-09-26 10:52:26 +0200421 nmi_pm_active = atomic_read(&nmi_active);
Shaohua Li4038f902006-09-26 10:52:27 +0200422 stop_apic_nmi_watchdog(NULL);
423 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 return 0;
425}
426
427static int lapic_nmi_resume(struct sys_device *dev)
428{
Shaohua Li4038f902006-09-26 10:52:27 +0200429 /* only CPU0 goes here, other CPUs should be offline */
430 if (nmi_pm_active > 0) {
431 setup_apic_nmi_watchdog(NULL);
432 touch_nmi_watchdog();
433 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 return 0;
435}
436
437
438static struct sysdev_class nmi_sysclass = {
439 set_kset_name("lapic_nmi"),
440 .resume = lapic_nmi_resume,
441 .suspend = lapic_nmi_suspend,
442};
443
444static struct sys_device device_lapic_nmi = {
445 .id = 0,
446 .cls = &nmi_sysclass,
447};
448
449static int __init init_lapic_nmi_sysfs(void)
450{
451 int error;
452
Don Zickusb7471c62006-09-26 10:52:26 +0200453 /* should really be a BUG_ON but b/c this is an
454 * init call, it just doesn't work. -dcz
455 */
456 if (nmi_watchdog != NMI_LOCAL_APIC)
457 return 0;
458
459 if ( atomic_read(&nmi_active) < 0 )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 return 0;
461
462 error = sysdev_class_register(&nmi_sysclass);
463 if (!error)
464 error = sysdev_register(&device_lapic_nmi);
465 return error;
466}
467/* must come after the local APIC's device_initcall() */
468late_initcall(init_lapic_nmi_sysfs);
469
470#endif /* CONFIG_PM */
471
472/*
473 * Activate the NMI watchdog via the local APIC.
474 * Original code written by Keith Owens.
475 */
476
Don Zickusb7471c62006-09-26 10:52:26 +0200477static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700478{
479 u64 count = (u64)cpu_khz * 1000;
480
481 do_div(count, nmi_hz);
482 if(descr)
483 Dprintk("setting %s to -0x%08Lx\n", descr, count);
Don Zickusb7471c62006-09-26 10:52:26 +0200484 wrmsrl(perfctr_msr, 0 - count);
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700485}
486
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100487static void write_watchdog_counter32(unsigned int perfctr_msr,
488 const char *descr)
489{
490 u64 count = (u64)cpu_khz * 1000;
491
492 do_div(count, nmi_hz);
493 if(descr)
494 Dprintk("setting %s to -0x%08Lx\n", descr, count);
495 wrmsr(perfctr_msr, (u32)(-count), 0);
496}
497
Don Zickusb7471c62006-09-26 10:52:26 +0200498/* Note that these events don't tick when the CPU idles. This means
499 the frequency varies with CPU load. */
500
501#define K7_EVNTSEL_ENABLE (1 << 22)
502#define K7_EVNTSEL_INT (1 << 20)
503#define K7_EVNTSEL_OS (1 << 17)
504#define K7_EVNTSEL_USR (1 << 16)
505#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
506#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
507
Don Zickus828f0af2006-09-26 10:52:26 +0200508static int setup_k7_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509{
Don Zickusb7471c62006-09-26 10:52:26 +0200510 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200512 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
Don Zickusb7471c62006-09-26 10:52:26 +0200514 perfctr_msr = MSR_K7_PERFCTR0;
515 evntsel_msr = MSR_K7_EVNTSEL0;
516 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200517 goto fail;
518
Don Zickusb7471c62006-09-26 10:52:26 +0200519 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200520 goto fail1;
521
Don Zickusb7471c62006-09-26 10:52:26 +0200522 wrmsrl(perfctr_msr, 0UL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
524 evntsel = K7_EVNTSEL_INT
525 | K7_EVNTSEL_OS
526 | K7_EVNTSEL_USR
527 | K7_NMI_EVENT;
528
Don Zickusb7471c62006-09-26 10:52:26 +0200529 /* setup the timer */
530 wrmsr(evntsel_msr, evntsel, 0);
531 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 apic_write(APIC_LVTPC, APIC_DM_NMI);
533 evntsel |= K7_EVNTSEL_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200534 wrmsr(evntsel_msr, evntsel, 0);
535
536 wd->perfctr_msr = perfctr_msr;
537 wd->evntsel_msr = evntsel_msr;
538 wd->cccr_msr = 0; //unused
539 wd->check_bit = 1ULL<<63;
Don Zickus828f0af2006-09-26 10:52:26 +0200540 return 1;
541fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200542 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200543fail:
544 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
Don Zickusb7471c62006-09-26 10:52:26 +0200547static void stop_k7_watchdog(void)
548{
549 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
550
551 wrmsr(wd->evntsel_msr, 0, 0);
552
553 release_evntsel_nmi(wd->evntsel_msr);
554 release_perfctr_nmi(wd->perfctr_msr);
555}
556
557#define P6_EVNTSEL0_ENABLE (1 << 22)
558#define P6_EVNTSEL_INT (1 << 20)
559#define P6_EVNTSEL_OS (1 << 17)
560#define P6_EVNTSEL_USR (1 << 16)
561#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
562#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
563
Don Zickus828f0af2006-09-26 10:52:26 +0200564static int setup_p6_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565{
Don Zickusb7471c62006-09-26 10:52:26 +0200566 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200568 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569
Don Zickusb7471c62006-09-26 10:52:26 +0200570 perfctr_msr = MSR_P6_PERFCTR0;
571 evntsel_msr = MSR_P6_EVNTSEL0;
572 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200573 goto fail;
574
Don Zickusb7471c62006-09-26 10:52:26 +0200575 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200576 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577
Don Zickusb7471c62006-09-26 10:52:26 +0200578 wrmsrl(perfctr_msr, 0UL);
579
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 evntsel = P6_EVNTSEL_INT
581 | P6_EVNTSEL_OS
582 | P6_EVNTSEL_USR
583 | P6_NMI_EVENT;
584
Don Zickusb7471c62006-09-26 10:52:26 +0200585 /* setup the timer */
586 wrmsr(evntsel_msr, evntsel, 0);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100587 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
588 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 apic_write(APIC_LVTPC, APIC_DM_NMI);
590 evntsel |= P6_EVNTSEL0_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200591 wrmsr(evntsel_msr, evntsel, 0);
592
593 wd->perfctr_msr = perfctr_msr;
594 wd->evntsel_msr = evntsel_msr;
595 wd->cccr_msr = 0; //unused
596 wd->check_bit = 1ULL<<39;
Don Zickus828f0af2006-09-26 10:52:26 +0200597 return 1;
598fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200599 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200600fail:
601 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602}
603
Don Zickusb7471c62006-09-26 10:52:26 +0200604static void stop_p6_watchdog(void)
605{
606 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
607
608 wrmsr(wd->evntsel_msr, 0, 0);
609
610 release_evntsel_nmi(wd->evntsel_msr);
611 release_perfctr_nmi(wd->perfctr_msr);
612}
613
614/* Note that these events don't tick when the CPU idles. This means
615 the frequency varies with CPU load. */
616
617#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
618#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
619#define P4_ESCR_OS (1<<3)
620#define P4_ESCR_USR (1<<2)
621#define P4_CCCR_OVF_PMI0 (1<<26)
622#define P4_CCCR_OVF_PMI1 (1<<27)
623#define P4_CCCR_THRESHOLD(N) ((N)<<20)
624#define P4_CCCR_COMPLEMENT (1<<19)
625#define P4_CCCR_COMPARE (1<<18)
626#define P4_CCCR_REQUIRED (3<<16)
627#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
628#define P4_CCCR_ENABLE (1<<12)
629#define P4_CCCR_OVF (1<<31)
630/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
631 CRU_ESCR0 (with any non-null event selector) through a complemented
632 max threshold. [IA32-Vol3, Section 14.9.9] */
633
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634static int setup_p4_watchdog(void)
635{
Don Zickusb7471c62006-09-26 10:52:26 +0200636 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
637 unsigned int evntsel, cccr_val;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 unsigned int misc_enable, dummy;
Don Zickusb7471c62006-09-26 10:52:26 +0200639 unsigned int ht_num;
640 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
Don Zickusb7471c62006-09-26 10:52:26 +0200642 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
644 return 0;
645
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646#ifdef CONFIG_SMP
Don Zickusb7471c62006-09-26 10:52:26 +0200647 /* detect which hyperthread we are on */
648 if (smp_num_siblings == 2) {
649 unsigned int ebx, apicid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
Don Zickusb7471c62006-09-26 10:52:26 +0200651 ebx = cpuid_ebx(1);
652 apicid = (ebx >> 24) & 0xff;
653 ht_num = apicid & 1;
654 } else
655#endif
656 ht_num = 0;
657
658 /* performance counters are shared resources
659 * assign each hyperthread its own set
660 * (re-use the ESCR0 register, seems safe
661 * and keeps the cccr_val the same)
662 */
663 if (!ht_num) {
664 /* logical cpu 0 */
665 perfctr_msr = MSR_P4_IQ_PERFCTR0;
666 evntsel_msr = MSR_P4_CRU_ESCR0;
667 cccr_msr = MSR_P4_IQ_CCCR0;
668 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
669 } else {
670 /* logical cpu 1 */
671 perfctr_msr = MSR_P4_IQ_PERFCTR1;
672 evntsel_msr = MSR_P4_CRU_ESCR0;
673 cccr_msr = MSR_P4_IQ_CCCR1;
674 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
675 }
676
677 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200678 goto fail;
679
Don Zickusb7471c62006-09-26 10:52:26 +0200680 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200681 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
Don Zickusb7471c62006-09-26 10:52:26 +0200683 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
684 | P4_ESCR_OS
685 | P4_ESCR_USR;
686
687 cccr_val |= P4_CCCR_THRESHOLD(15)
688 | P4_CCCR_COMPLEMENT
689 | P4_CCCR_COMPARE
690 | P4_CCCR_REQUIRED;
691
692 wrmsr(evntsel_msr, evntsel, 0);
693 wrmsr(cccr_msr, cccr_val, 0);
694 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 apic_write(APIC_LVTPC, APIC_DM_NMI);
Don Zickusb7471c62006-09-26 10:52:26 +0200696 cccr_val |= P4_CCCR_ENABLE;
697 wrmsr(cccr_msr, cccr_val, 0);
698 wd->perfctr_msr = perfctr_msr;
699 wd->evntsel_msr = evntsel_msr;
700 wd->cccr_msr = cccr_msr;
701 wd->check_bit = 1ULL<<39;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 return 1;
Don Zickus828f0af2006-09-26 10:52:26 +0200703fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200704 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200705fail:
706 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707}
708
Don Zickusb7471c62006-09-26 10:52:26 +0200709static void stop_p4_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710{
Don Zickusb7471c62006-09-26 10:52:26 +0200711 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
Don Zickusb7471c62006-09-26 10:52:26 +0200713 wrmsr(wd->cccr_msr, 0, 0);
714 wrmsr(wd->evntsel_msr, 0, 0);
715
716 release_evntsel_nmi(wd->evntsel_msr);
717 release_perfctr_nmi(wd->perfctr_msr);
718}
719
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200720#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
721#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
722
723static int setup_intel_arch_watchdog(void)
724{
725 unsigned int ebx;
726 union cpuid10_eax eax;
727 unsigned int unused;
728 unsigned int perfctr_msr, evntsel_msr;
729 unsigned int evntsel;
730 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
731
732 /*
733 * Check whether the Architectural PerfMon supports
734 * Unhalted Core Cycles Event or not.
735 * NOTE: Corresponding bit = 0 in ebx indicates event present.
736 */
737 cpuid(10, &(eax.full), &ebx, &unused, &unused);
738 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
739 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
740 goto fail;
741
742 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
743 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
744
745 if (!reserve_perfctr_nmi(perfctr_msr))
746 goto fail;
747
748 if (!reserve_evntsel_nmi(evntsel_msr))
749 goto fail1;
750
751 wrmsrl(perfctr_msr, 0UL);
752
753 evntsel = ARCH_PERFMON_EVENTSEL_INT
754 | ARCH_PERFMON_EVENTSEL_OS
755 | ARCH_PERFMON_EVENTSEL_USR
756 | ARCH_PERFMON_NMI_EVENT_SEL
757 | ARCH_PERFMON_NMI_EVENT_UMASK;
758
759 /* setup the timer */
760 wrmsr(evntsel_msr, evntsel, 0);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +0100761 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
762 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200763 apic_write(APIC_LVTPC, APIC_DM_NMI);
764 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
765 wrmsr(evntsel_msr, evntsel, 0);
766
767 wd->perfctr_msr = perfctr_msr;
768 wd->evntsel_msr = evntsel_msr;
769 wd->cccr_msr = 0; //unused
770 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
771 return 1;
772fail1:
773 release_perfctr_nmi(perfctr_msr);
774fail:
775 return 0;
776}
777
778static void stop_intel_arch_watchdog(void)
779{
780 unsigned int ebx;
781 union cpuid10_eax eax;
782 unsigned int unused;
783 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
784
785 /*
786 * Check whether the Architectural PerfMon supports
787 * Unhalted Core Cycles Event or not.
788 * NOTE: Corresponding bit = 0 in ebx indicates event present.
789 */
790 cpuid(10, &(eax.full), &ebx, &unused, &unused);
791 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
792 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
793 return;
794
795 wrmsr(wd->evntsel_msr, 0, 0);
796 release_evntsel_nmi(wd->evntsel_msr);
797 release_perfctr_nmi(wd->perfctr_msr);
798}
799
Don Zickusb7471c62006-09-26 10:52:26 +0200800void setup_apic_nmi_watchdog (void *unused)
801{
Shaohua Li4038f902006-09-26 10:52:27 +0200802 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
803
Don Zickusb7471c62006-09-26 10:52:26 +0200804 /* only support LOCAL and IO APICs for now */
805 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
806 (nmi_watchdog != NMI_IO_APIC))
807 return;
808
Shaohua Li4038f902006-09-26 10:52:27 +0200809 if (wd->enabled == 1)
810 return;
811
812 /* cheap hack to support suspend/resume */
813 /* if cpu0 is not active neither should the other cpus */
814 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
815 return;
816
Don Zickusb7471c62006-09-26 10:52:26 +0200817 if (nmi_watchdog == NMI_LOCAL_APIC) {
818 switch (boot_cpu_data.x86_vendor) {
819 case X86_VENDOR_AMD:
820 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
821 return;
822 if (!setup_k7_watchdog())
Don Zickus828f0af2006-09-26 10:52:26 +0200823 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 break;
Don Zickusb7471c62006-09-26 10:52:26 +0200825 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200826 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
827 if (!setup_intel_arch_watchdog())
828 return;
829 break;
830 }
Don Zickusb7471c62006-09-26 10:52:26 +0200831 switch (boot_cpu_data.x86) {
832 case 6:
833 if (boot_cpu_data.x86_model > 0xd)
834 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835
Don Zickusb7471c62006-09-26 10:52:26 +0200836 if (!setup_p6_watchdog())
837 return;
838 break;
839 case 15:
840 if (boot_cpu_data.x86_model > 0x4)
841 return;
842
843 if (!setup_p4_watchdog())
844 return;
845 break;
846 default:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return;
Don Zickusb7471c62006-09-26 10:52:26 +0200848 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 break;
850 default:
851 return;
852 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 }
Shaohua Li4038f902006-09-26 10:52:27 +0200854 wd->enabled = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200855 atomic_inc(&nmi_active);
856}
857
Shaohua Li4038f902006-09-26 10:52:27 +0200858void stop_apic_nmi_watchdog(void *unused)
Don Zickusb7471c62006-09-26 10:52:26 +0200859{
Shaohua Li4038f902006-09-26 10:52:27 +0200860 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
861
Don Zickusb7471c62006-09-26 10:52:26 +0200862 /* only support LOCAL and IO APICs for now */
863 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
864 (nmi_watchdog != NMI_IO_APIC))
865 return;
866
Shaohua Li4038f902006-09-26 10:52:27 +0200867 if (wd->enabled == 0)
868 return;
869
Don Zickusb7471c62006-09-26 10:52:26 +0200870 if (nmi_watchdog == NMI_LOCAL_APIC) {
871 switch (boot_cpu_data.x86_vendor) {
872 case X86_VENDOR_AMD:
873 stop_k7_watchdog();
874 break;
875 case X86_VENDOR_INTEL:
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +0200876 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
877 stop_intel_arch_watchdog();
878 break;
879 }
Don Zickusb7471c62006-09-26 10:52:26 +0200880 switch (boot_cpu_data.x86) {
881 case 6:
882 if (boot_cpu_data.x86_model > 0xd)
883 break;
884 stop_p6_watchdog();
885 break;
886 case 15:
887 if (boot_cpu_data.x86_model > 0x4)
888 break;
889 stop_p4_watchdog();
890 break;
891 }
892 break;
893 default:
894 return;
895 }
896 }
Shaohua Li4038f902006-09-26 10:52:27 +0200897 wd->enabled = 0;
Don Zickusb7471c62006-09-26 10:52:26 +0200898 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899}
900
901/*
902 * the best way to detect whether a CPU has a 'hard lockup' problem
903 * is to check it's local APIC timer IRQ counts. If they are not
904 * changing then that CPU has some problem.
905 *
906 * as these watchdog NMI IRQs are generated on every CPU, we only
907 * have to check the current processor.
908 *
909 * since NMIs don't listen to _any_ locks, we have to be extremely
910 * careful not to rely on unsafe variables. The printk might lock
911 * up though, so we have to break up any console locks first ...
912 * [when there will be more tty-related locks, break them up
913 * here too!]
914 */
915
916static unsigned int
917 last_irq_sums [NR_CPUS],
918 alert_counter [NR_CPUS];
919
920void touch_nmi_watchdog (void)
921{
Jan Beulichc6ea3962006-12-07 02:14:09 +0100922 if (nmi_watchdog > 0) {
923 unsigned cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924
Jan Beulichc6ea3962006-12-07 02:14:09 +0100925 /*
926 * Just reset the alert counters, (other CPUs might be
927 * spinning on locks we hold):
928 */
929 for_each_present_cpu (cpu)
930 alert_counter[cpu] = 0;
931 }
Ingo Molnar8446f1d2005-09-06 15:16:27 -0700932
933 /*
934 * Tickle the softlockup detector too:
935 */
936 touch_softlockup_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937}
Michal Schmidt1e862402006-07-30 03:03:29 -0700938EXPORT_SYMBOL(touch_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939
940extern void die_nmi(struct pt_regs *, const char *msg);
941
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +0200942__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943{
944
945 /*
946 * Since current_thread_info()-> is always on the stack, and we
947 * always switch the stack NMI-atomically, it's safe to use
948 * smp_processor_id().
949 */
Jesper Juhlb791cce2006-03-28 01:56:52 -0800950 unsigned int sum;
Don Zickusb7471c62006-09-26 10:52:26 +0200951 int touched = 0;
Jesper Juhlb791cce2006-03-28 01:56:52 -0800952 int cpu = smp_processor_id();
Don Zickusb7471c62006-09-26 10:52:26 +0200953 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
954 u64 dummy;
Don Zickus3adbbcc2006-09-26 10:52:26 +0200955 int rc=0;
Don Zickusb7471c62006-09-26 10:52:26 +0200956
957 /* check for other users first */
958 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
959 == NOTIFY_STOP) {
Don Zickus3adbbcc2006-09-26 10:52:26 +0200960 rc = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200961 touched = 1;
962 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Andrew Mortonbb81a092006-12-07 02:14:01 +0100964 if (cpu_isset(cpu, backtrace_mask)) {
965 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
966
967 spin_lock(&lock);
968 printk("NMI backtrace for cpu %d\n", cpu);
969 dump_stack();
970 spin_unlock(&lock);
971 cpu_clear(cpu, backtrace_mask);
972 }
973
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
975
Don Zickusb7471c62006-09-26 10:52:26 +0200976 /* if the apic timer isn't firing, this cpu isn't doing much */
977 if (!touched && last_irq_sums[cpu] == sum) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 /*
979 * Ayiee, looks like this CPU is stuck ...
980 * wait a few IRQs (5 seconds) before doing the oops ...
981 */
982 alert_counter[cpu]++;
983 if (alert_counter[cpu] == 5*nmi_hz)
George Anzinger748f2ed2005-09-03 15:56:48 -0700984 /*
985 * die_nmi will return ONLY if NOTIFY_STOP happens..
986 */
Ingo Molnar91368d72006-03-23 03:00:54 -0800987 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
GOTO Masanorib884e252006-03-07 21:55:29 -0800988 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 last_irq_sums[cpu] = sum;
990 alert_counter[cpu] = 0;
991 }
Don Zickusb7471c62006-09-26 10:52:26 +0200992 /* see if the nmi watchdog went off */
993 if (wd->enabled) {
994 if (nmi_watchdog == NMI_LOCAL_APIC) {
995 rdmsrl(wd->perfctr_msr, dummy);
996 if (dummy & wd->check_bit){
997 /* this wasn't a watchdog timer interrupt */
998 goto done;
999 }
1000
1001 /* only Intel P4 uses the cccr msr */
1002 if (wd->cccr_msr != 0) {
1003 /*
1004 * P4 quirks:
1005 * - An overflown perfctr will assert its interrupt
1006 * until the OVF flag in its CCCR is cleared.
1007 * - LVTPC is masked on interrupt and must be
1008 * unmasked by the LVTPC handler.
1009 */
1010 rdmsrl(wd->cccr_msr, dummy);
1011 dummy &= ~P4_CCCR_OVF;
1012 wrmsrl(wd->cccr_msr, dummy);
1013 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +01001014 /* start the cycle over again */
1015 write_watchdog_counter(wd->perfctr_msr, NULL);
Don Zickusb7471c62006-09-26 10:52:26 +02001016 }
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +02001017 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
1018 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
1019 /* P6 based Pentium M need to re-unmask
Don Zickusb7471c62006-09-26 10:52:26 +02001020 * the apic vector but it doesn't hurt
Venkatesh Pallipadi248dcb22006-09-26 10:52:27 +02001021 * other P6 variant.
1022 * ArchPerfom/Core Duo also needs this */
Don Zickusb7471c62006-09-26 10:52:26 +02001023 apic_write(APIC_LVTPC, APIC_DM_NMI);
Venkatesh Pallipadi90ce4bc2007-02-13 13:26:22 +01001024 /* P6/ARCH_PERFMON has 32 bit counter write */
1025 write_watchdog_counter32(wd->perfctr_msr, NULL);
1026 } else {
1027 /* start the cycle over again */
1028 write_watchdog_counter(wd->perfctr_msr, NULL);
Don Zickusb7471c62006-09-26 10:52:26 +02001029 }
Don Zickus3adbbcc2006-09-26 10:52:26 +02001030 rc = 1;
1031 } else if (nmi_watchdog == NMI_IO_APIC) {
1032 /* don't know how to accurately check for this.
1033 * just assume it was a watchdog timer interrupt
1034 * This matches the old behaviour.
1035 */
1036 rc = 1;
Fernando Luis Vázquez Cao06039752006-09-26 10:52:36 +02001037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 }
Don Zickusb7471c62006-09-26 10:52:26 +02001039done:
Don Zickus3adbbcc2006-09-26 10:52:26 +02001040 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041}
1042
Don Zickus2fbe7b22006-09-26 10:52:27 +02001043int do_nmi_callback(struct pt_regs * regs, int cpu)
1044{
1045#ifdef CONFIG_SYSCTL
1046 if (unknown_nmi_panic)
1047 return unknown_nmi_panic_callback(regs, cpu);
1048#endif
1049 return 0;
1050}
1051
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052#ifdef CONFIG_SYSCTL
1053
1054static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
1055{
1056 unsigned char reason = get_nmi_reason();
1057 char buf[64];
1058
Don Zickus2fbe7b22006-09-26 10:52:27 +02001059 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1060 die_nmi(regs, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 return 0;
1062}
1063
Don Zickus407984f2006-09-26 10:52:27 +02001064/*
Don Zickuse33e89a2006-09-26 10:52:27 +02001065 * proc handler for /proc/sys/kernel/nmi
Don Zickus407984f2006-09-26 10:52:27 +02001066 */
1067int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1068 void __user *buffer, size_t *length, loff_t *ppos)
1069{
1070 int old_state;
1071
1072 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1073 old_state = nmi_watchdog_enabled;
1074 proc_dointvec(table, write, file, buffer, length, ppos);
1075 if (!!old_state == !!nmi_watchdog_enabled)
1076 return 0;
1077
1078 if (atomic_read(&nmi_active) < 0) {
Don Zickuse33e89a2006-09-26 10:52:27 +02001079 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1080 return -EIO;
Don Zickus407984f2006-09-26 10:52:27 +02001081 }
1082
1083 if (nmi_watchdog == NMI_DEFAULT) {
1084 if (nmi_known_cpu() > 0)
1085 nmi_watchdog = NMI_LOCAL_APIC;
1086 else
1087 nmi_watchdog = NMI_IO_APIC;
1088 }
1089
Don Zickuse33e89a2006-09-26 10:52:27 +02001090 if (nmi_watchdog == NMI_LOCAL_APIC) {
Don Zickus407984f2006-09-26 10:52:27 +02001091 if (nmi_watchdog_enabled)
1092 enable_lapic_nmi_watchdog();
1093 else
1094 disable_lapic_nmi_watchdog();
Don Zickus407984f2006-09-26 10:52:27 +02001095 } else {
1096 printk( KERN_WARNING
1097 "NMI watchdog doesn't know what hardware to touch\n");
1098 return -EIO;
1099 }
1100 return 0;
1101}
1102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103#endif
1104
Andrew Mortonbb81a092006-12-07 02:14:01 +01001105void __trigger_all_cpu_backtrace(void)
1106{
1107 int i;
1108
1109 backtrace_mask = cpu_online_map;
1110 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1111 for (i = 0; i < 10 * 1000; i++) {
1112 if (cpus_empty(backtrace_mask))
1113 break;
1114 mdelay(1);
1115 }
1116}
1117
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118EXPORT_SYMBOL(nmi_active);
1119EXPORT_SYMBOL(nmi_watchdog);
Don Zickus828f0af2006-09-26 10:52:26 +02001120EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1121EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1122EXPORT_SYMBOL(reserve_perfctr_nmi);
1123EXPORT_SYMBOL(release_perfctr_nmi);
1124EXPORT_SYMBOL(reserve_evntsel_nmi);
1125EXPORT_SYMBOL(release_evntsel_nmi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1127EXPORT_SYMBOL(enable_timer_nmi_watchdog);