blob: 6e5085d5d2f6e7f97e5a3f0d4e9900df2dfcb06b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
15
16#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/interrupt.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/module.h>
20#include <linux/nmi.h>
21#include <linux/sysdev.h>
22#include <linux/sysctl.h>
Don Zickus3e4ff112006-06-26 13:57:01 +020023#include <linux/percpu.h>
Andi Kleen1de84972006-09-26 10:52:27 +020024#include <linux/dmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
26#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <asm/nmi.h>
Don Zickusb7471c62006-09-26 10:52:26 +020028#include <asm/kdebug.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029
30#include "mach_traps.h"
31
Don Zickus828f0af2006-09-26 10:52:26 +020032/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
33 * evtsel_nmi_owner tracks the ownership of the event selection
34 * - different performance counters/ event selection may be reserved for
35 * different subsystems this reservation system just tries to coordinate
36 * things a little
37 */
38static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
39static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
40
41/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
42 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
43 */
44#define NMI_MAX_COUNTER_BITS 66
45
Linus Torvalds1da177e2005-04-16 15:20:36 -070046/* nmi_active:
Don Zickusb7471c62006-09-26 10:52:26 +020047 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 * be enabled
Don Zickusb7471c62006-09-26 10:52:26 +020050 * 0: the lapic NMI watchdog is disabled, but can be enabled
Linus Torvalds1da177e2005-04-16 15:20:36 -070051 */
Don Zickusb7471c62006-09-26 10:52:26 +020052atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Don Zickusb7471c62006-09-26 10:52:26 +020054unsigned int nmi_watchdog = NMI_DEFAULT;
55static unsigned int nmi_hz = HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
Don Zickusb7471c62006-09-26 10:52:26 +020057struct nmi_watchdog_ctlblk {
58 int enabled;
59 u64 check_bit;
60 unsigned int cccr_msr;
61 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
62 unsigned int evntsel_msr; /* the MSR to select the events to handle */
63};
64static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Don Zickusb7471c62006-09-26 10:52:26 +020066/* local prototypes */
Don Zickusb7471c62006-09-26 10:52:26 +020067static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
68
69extern void show_registers(struct pt_regs *regs);
70extern int unknown_nmi_panic;
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
Don Zickus828f0af2006-09-26 10:52:26 +020072/* converts an msr to an appropriate reservation bit */
73static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
74{
75 /* returns the bit offset of the performance counter register */
76 switch (boot_cpu_data.x86_vendor) {
77 case X86_VENDOR_AMD:
78 return (msr - MSR_K7_PERFCTR0);
79 case X86_VENDOR_INTEL:
80 switch (boot_cpu_data.x86) {
81 case 6:
82 return (msr - MSR_P6_PERFCTR0);
83 case 15:
84 return (msr - MSR_P4_BPU_PERFCTR0);
85 }
86 }
87 return 0;
88}
89
90/* converts an msr to an appropriate reservation bit */
91static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
92{
93 /* returns the bit offset of the event selection register */
94 switch (boot_cpu_data.x86_vendor) {
95 case X86_VENDOR_AMD:
96 return (msr - MSR_K7_EVNTSEL0);
97 case X86_VENDOR_INTEL:
98 switch (boot_cpu_data.x86) {
99 case 6:
100 return (msr - MSR_P6_EVNTSEL0);
101 case 15:
102 return (msr - MSR_P4_BSU_ESCR0);
103 }
104 }
105 return 0;
106}
107
108/* checks for a bit availability (hack for oprofile) */
109int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
110{
111 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
112
113 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
114}
115
116/* checks the an msr for availability */
117int avail_to_resrv_perfctr_nmi(unsigned int msr)
118{
119 unsigned int counter;
120
121 counter = nmi_perfctr_msr_to_bit(msr);
122 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
123
124 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
125}
126
127int reserve_perfctr_nmi(unsigned int msr)
128{
129 unsigned int counter;
130
131 counter = nmi_perfctr_msr_to_bit(msr);
132 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
133
134 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
135 return 1;
136 return 0;
137}
138
139void release_perfctr_nmi(unsigned int msr)
140{
141 unsigned int counter;
142
143 counter = nmi_perfctr_msr_to_bit(msr);
144 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
145
146 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
147}
148
149int reserve_evntsel_nmi(unsigned int msr)
150{
151 unsigned int counter;
152
153 counter = nmi_evntsel_msr_to_bit(msr);
154 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
155
156 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
157 return 1;
158 return 0;
159}
160
161void release_evntsel_nmi(unsigned int msr)
162{
163 unsigned int counter;
164
165 counter = nmi_evntsel_msr_to_bit(msr);
166 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
167
168 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
169}
170
Don Zickusb7471c62006-09-26 10:52:26 +0200171static __cpuinit inline int nmi_known_cpu(void)
172{
173 switch (boot_cpu_data.x86_vendor) {
174 case X86_VENDOR_AMD:
175 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
176 case X86_VENDOR_INTEL:
177 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
178 }
179 return 0;
180}
181
Eric W. Biederman29b70082005-10-30 14:59:40 -0800182#ifdef CONFIG_SMP
183/* The performance counters used by NMI_LOCAL_APIC don't trigger when
184 * the CPU is idle. To make sure the NMI watchdog really ticks on all
185 * CPUs during the test make them busy.
186 */
187static __init void nmi_cpu_busy(void *data)
188{
189 volatile int *endflag = data;
Ingo Molnar366c7f52006-07-03 00:25:25 -0700190 local_irq_enable_in_hardirq();
Eric W. Biederman29b70082005-10-30 14:59:40 -0800191 /* Intentionally don't use cpu_relax here. This is
192 to make sure that the performance counter really ticks,
193 even if there is a simulator or similar that catches the
194 pause instruction. On a real HT machine this is fine because
195 all other CPUs are busy with "useless" delay loops and don't
196 care if they get somewhat less cycles. */
197 while (*endflag == 0)
198 barrier();
199}
200#endif
201
Jack F Vogel67701ae2005-05-01 08:58:48 -0700202static int __init check_nmi_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203{
Eric W. Biederman29b70082005-10-30 14:59:40 -0800204 volatile int endflag = 0;
205 unsigned int *prev_nmi_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 int cpu;
207
Andi Kleen1de84972006-09-26 10:52:27 +0200208 /* Enable NMI watchdog for newer systems.
209 Actually it should be safe for most systems before 2004 too except
210 for some IBM systems that corrupt registers when NMI happens
211 during SMM. Unfortunately we don't have more exact information
212 on these and use this coarse check. */
213 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
214 nmi_watchdog = NMI_LOCAL_APIC;
215
Don Zickusb7471c62006-09-26 10:52:26 +0200216 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
217 return 0;
218
219 if (!atomic_read(&nmi_active))
Jack F Vogel67701ae2005-05-01 08:58:48 -0700220 return 0;
221
Eric W. Biederman29b70082005-10-30 14:59:40 -0800222 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
223 if (!prev_nmi_count)
224 return -1;
225
Jack F Vogel67701ae2005-05-01 08:58:48 -0700226 printk(KERN_INFO "Testing NMI watchdog ... ");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227
Eric W. Biederman29b70082005-10-30 14:59:40 -0800228 if (nmi_watchdog == NMI_LOCAL_APIC)
229 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
230
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800231 for_each_possible_cpu(cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
233 local_irq_enable();
234 mdelay((10*1000)/nmi_hz); // wait 10 ticks
235
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800236 for_each_possible_cpu(cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237#ifdef CONFIG_SMP
238 /* Check cpu_callin_map here because that is set
239 after the timer is started. */
240 if (!cpu_isset(cpu, cpu_callin_map))
241 continue;
242#endif
Don Zickusb7471c62006-09-26 10:52:26 +0200243 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
244 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
Eric W. Biederman29b70082005-10-30 14:59:40 -0800246 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
247 cpu,
248 prev_nmi_count[cpu],
249 nmi_count(cpu));
Don Zickusb7471c62006-09-26 10:52:26 +0200250 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
251 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 }
253 }
Don Zickusb7471c62006-09-26 10:52:26 +0200254 if (!atomic_read(&nmi_active)) {
255 kfree(prev_nmi_count);
256 atomic_set(&nmi_active, -1);
257 return -1;
258 }
Eric W. Biederman29b70082005-10-30 14:59:40 -0800259 endflag = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 printk("OK.\n");
261
262 /* now that we know it works we can reduce NMI frequency to
263 something more reasonable; makes a difference in some configs */
264 if (nmi_watchdog == NMI_LOCAL_APIC)
265 nmi_hz = 1;
266
Eric W. Biederman29b70082005-10-30 14:59:40 -0800267 kfree(prev_nmi_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return 0;
269}
Jack F Vogel67701ae2005-05-01 08:58:48 -0700270/* This needs to happen later in boot so counters are working */
271late_initcall(check_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273static int __init setup_nmi_watchdog(char *str)
274{
275 int nmi;
276
277 get_option(&str, &nmi);
278
Don Zickusb7471c62006-09-26 10:52:26 +0200279 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 /*
282 * If any other x86 CPU has a local APIC, then
283 * please test the NMI stuff there and send me the
284 * missing bits. Right now Intel P6/P4 and AMD K7 only.
285 */
Don Zickusb7471c62006-09-26 10:52:26 +0200286 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
287 return 0; /* no lapic support */
288 nmi_watchdog = nmi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 return 1;
290}
291
292__setup("nmi_watchdog=", setup_nmi_watchdog);
293
294static void disable_lapic_nmi_watchdog(void)
295{
Don Zickusb7471c62006-09-26 10:52:26 +0200296 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
297
298 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300
Don Zickusb7471c62006-09-26 10:52:26 +0200301 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302
Don Zickusb7471c62006-09-26 10:52:26 +0200303 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304}
305
306static void enable_lapic_nmi_watchdog(void)
307{
Don Zickusb7471c62006-09-26 10:52:26 +0200308 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
309
310 /* are we already enabled */
311 if (atomic_read(&nmi_active) != 0)
312 return;
313
314 /* are we lapic aware */
315 if (nmi_known_cpu() <= 0)
316 return;
317
318 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
319 touch_nmi_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320}
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322void disable_timer_nmi_watchdog(void)
323{
Don Zickusb7471c62006-09-26 10:52:26 +0200324 BUG_ON(nmi_watchdog != NMI_IO_APIC);
325
326 if (atomic_read(&nmi_active) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 return;
328
Don Zickusb7471c62006-09-26 10:52:26 +0200329 disable_irq(0);
330 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
331
332 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
335void enable_timer_nmi_watchdog(void)
336{
Don Zickusb7471c62006-09-26 10:52:26 +0200337 BUG_ON(nmi_watchdog != NMI_IO_APIC);
338
339 if (atomic_read(&nmi_active) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 touch_nmi_watchdog();
Don Zickusb7471c62006-09-26 10:52:26 +0200341 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
342 enable_irq(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 }
344}
345
346#ifdef CONFIG_PM
347
348static int nmi_pm_active; /* nmi_active before suspend */
349
Pavel Machek438510f2005-04-16 15:25:24 -0700350static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
Shaohua Li4038f902006-09-26 10:52:27 +0200352 /* only CPU0 goes here, other CPUs should be offline */
Don Zickusb7471c62006-09-26 10:52:26 +0200353 nmi_pm_active = atomic_read(&nmi_active);
Shaohua Li4038f902006-09-26 10:52:27 +0200354 stop_apic_nmi_watchdog(NULL);
355 BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 return 0;
357}
358
359static int lapic_nmi_resume(struct sys_device *dev)
360{
Shaohua Li4038f902006-09-26 10:52:27 +0200361 /* only CPU0 goes here, other CPUs should be offline */
362 if (nmi_pm_active > 0) {
363 setup_apic_nmi_watchdog(NULL);
364 touch_nmi_watchdog();
365 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 return 0;
367}
368
369
370static struct sysdev_class nmi_sysclass = {
371 set_kset_name("lapic_nmi"),
372 .resume = lapic_nmi_resume,
373 .suspend = lapic_nmi_suspend,
374};
375
376static struct sys_device device_lapic_nmi = {
377 .id = 0,
378 .cls = &nmi_sysclass,
379};
380
381static int __init init_lapic_nmi_sysfs(void)
382{
383 int error;
384
Don Zickusb7471c62006-09-26 10:52:26 +0200385 /* should really be a BUG_ON but b/c this is an
386 * init call, it just doesn't work. -dcz
387 */
388 if (nmi_watchdog != NMI_LOCAL_APIC)
389 return 0;
390
391 if ( atomic_read(&nmi_active) < 0 )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 return 0;
393
394 error = sysdev_class_register(&nmi_sysclass);
395 if (!error)
396 error = sysdev_register(&device_lapic_nmi);
397 return error;
398}
399/* must come after the local APIC's device_initcall() */
400late_initcall(init_lapic_nmi_sysfs);
401
402#endif /* CONFIG_PM */
403
404/*
405 * Activate the NMI watchdog via the local APIC.
406 * Original code written by Keith Owens.
407 */
408
Don Zickusb7471c62006-09-26 10:52:26 +0200409static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700410{
411 u64 count = (u64)cpu_khz * 1000;
412
413 do_div(count, nmi_hz);
414 if(descr)
415 Dprintk("setting %s to -0x%08Lx\n", descr, count);
Don Zickusb7471c62006-09-26 10:52:26 +0200416 wrmsrl(perfctr_msr, 0 - count);
Jan Beulich7fbb4f62005-06-23 00:08:23 -0700417}
418
Don Zickusb7471c62006-09-26 10:52:26 +0200419/* Note that these events don't tick when the CPU idles. This means
420 the frequency varies with CPU load. */
421
422#define K7_EVNTSEL_ENABLE (1 << 22)
423#define K7_EVNTSEL_INT (1 << 20)
424#define K7_EVNTSEL_OS (1 << 17)
425#define K7_EVNTSEL_USR (1 << 16)
426#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
427#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
428
Don Zickus828f0af2006-09-26 10:52:26 +0200429static int setup_k7_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430{
Don Zickusb7471c62006-09-26 10:52:26 +0200431 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200433 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Don Zickusb7471c62006-09-26 10:52:26 +0200435 perfctr_msr = MSR_K7_PERFCTR0;
436 evntsel_msr = MSR_K7_EVNTSEL0;
437 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200438 goto fail;
439
Don Zickusb7471c62006-09-26 10:52:26 +0200440 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200441 goto fail1;
442
Don Zickusb7471c62006-09-26 10:52:26 +0200443 wrmsrl(perfctr_msr, 0UL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445 evntsel = K7_EVNTSEL_INT
446 | K7_EVNTSEL_OS
447 | K7_EVNTSEL_USR
448 | K7_NMI_EVENT;
449
Don Zickusb7471c62006-09-26 10:52:26 +0200450 /* setup the timer */
451 wrmsr(evntsel_msr, evntsel, 0);
452 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 apic_write(APIC_LVTPC, APIC_DM_NMI);
454 evntsel |= K7_EVNTSEL_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200455 wrmsr(evntsel_msr, evntsel, 0);
456
457 wd->perfctr_msr = perfctr_msr;
458 wd->evntsel_msr = evntsel_msr;
459 wd->cccr_msr = 0; //unused
460 wd->check_bit = 1ULL<<63;
Don Zickus828f0af2006-09-26 10:52:26 +0200461 return 1;
462fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200463 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200464fail:
465 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466}
467
Don Zickusb7471c62006-09-26 10:52:26 +0200468static void stop_k7_watchdog(void)
469{
470 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
471
472 wrmsr(wd->evntsel_msr, 0, 0);
473
474 release_evntsel_nmi(wd->evntsel_msr);
475 release_perfctr_nmi(wd->perfctr_msr);
476}
477
478#define P6_EVNTSEL0_ENABLE (1 << 22)
479#define P6_EVNTSEL_INT (1 << 20)
480#define P6_EVNTSEL_OS (1 << 17)
481#define P6_EVNTSEL_USR (1 << 16)
482#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
483#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
484
Don Zickus828f0af2006-09-26 10:52:26 +0200485static int setup_p6_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486{
Don Zickusb7471c62006-09-26 10:52:26 +0200487 unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 unsigned int evntsel;
Don Zickusb7471c62006-09-26 10:52:26 +0200489 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
Don Zickusb7471c62006-09-26 10:52:26 +0200491 perfctr_msr = MSR_P6_PERFCTR0;
492 evntsel_msr = MSR_P6_EVNTSEL0;
493 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200494 goto fail;
495
Don Zickusb7471c62006-09-26 10:52:26 +0200496 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200497 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Don Zickusb7471c62006-09-26 10:52:26 +0200499 wrmsrl(perfctr_msr, 0UL);
500
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 evntsel = P6_EVNTSEL_INT
502 | P6_EVNTSEL_OS
503 | P6_EVNTSEL_USR
504 | P6_NMI_EVENT;
505
Don Zickusb7471c62006-09-26 10:52:26 +0200506 /* setup the timer */
507 wrmsr(evntsel_msr, evntsel, 0);
508 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 apic_write(APIC_LVTPC, APIC_DM_NMI);
510 evntsel |= P6_EVNTSEL0_ENABLE;
Don Zickusb7471c62006-09-26 10:52:26 +0200511 wrmsr(evntsel_msr, evntsel, 0);
512
513 wd->perfctr_msr = perfctr_msr;
514 wd->evntsel_msr = evntsel_msr;
515 wd->cccr_msr = 0; //unused
516 wd->check_bit = 1ULL<<39;
Don Zickus828f0af2006-09-26 10:52:26 +0200517 return 1;
518fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200519 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200520fail:
521 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522}
523
Don Zickusb7471c62006-09-26 10:52:26 +0200524static void stop_p6_watchdog(void)
525{
526 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
527
528 wrmsr(wd->evntsel_msr, 0, 0);
529
530 release_evntsel_nmi(wd->evntsel_msr);
531 release_perfctr_nmi(wd->perfctr_msr);
532}
533
534/* Note that these events don't tick when the CPU idles. This means
535 the frequency varies with CPU load. */
536
537#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
538#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
539#define P4_ESCR_OS (1<<3)
540#define P4_ESCR_USR (1<<2)
541#define P4_CCCR_OVF_PMI0 (1<<26)
542#define P4_CCCR_OVF_PMI1 (1<<27)
543#define P4_CCCR_THRESHOLD(N) ((N)<<20)
544#define P4_CCCR_COMPLEMENT (1<<19)
545#define P4_CCCR_COMPARE (1<<18)
546#define P4_CCCR_REQUIRED (3<<16)
547#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
548#define P4_CCCR_ENABLE (1<<12)
549#define P4_CCCR_OVF (1<<31)
550/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
551 CRU_ESCR0 (with any non-null event selector) through a complemented
552 max threshold. [IA32-Vol3, Section 14.9.9] */
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554static int setup_p4_watchdog(void)
555{
Don Zickusb7471c62006-09-26 10:52:26 +0200556 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
557 unsigned int evntsel, cccr_val;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 unsigned int misc_enable, dummy;
Don Zickusb7471c62006-09-26 10:52:26 +0200559 unsigned int ht_num;
560 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
Don Zickusb7471c62006-09-26 10:52:26 +0200562 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
564 return 0;
565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566#ifdef CONFIG_SMP
Don Zickusb7471c62006-09-26 10:52:26 +0200567 /* detect which hyperthread we are on */
568 if (smp_num_siblings == 2) {
569 unsigned int ebx, apicid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
Don Zickusb7471c62006-09-26 10:52:26 +0200571 ebx = cpuid_ebx(1);
572 apicid = (ebx >> 24) & 0xff;
573 ht_num = apicid & 1;
574 } else
575#endif
576 ht_num = 0;
577
578 /* performance counters are shared resources
579 * assign each hyperthread its own set
580 * (re-use the ESCR0 register, seems safe
581 * and keeps the cccr_val the same)
582 */
583 if (!ht_num) {
584 /* logical cpu 0 */
585 perfctr_msr = MSR_P4_IQ_PERFCTR0;
586 evntsel_msr = MSR_P4_CRU_ESCR0;
587 cccr_msr = MSR_P4_IQ_CCCR0;
588 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
589 } else {
590 /* logical cpu 1 */
591 perfctr_msr = MSR_P4_IQ_PERFCTR1;
592 evntsel_msr = MSR_P4_CRU_ESCR0;
593 cccr_msr = MSR_P4_IQ_CCCR1;
594 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
595 }
596
597 if (!reserve_perfctr_nmi(perfctr_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200598 goto fail;
599
Don Zickusb7471c62006-09-26 10:52:26 +0200600 if (!reserve_evntsel_nmi(evntsel_msr))
Don Zickus828f0af2006-09-26 10:52:26 +0200601 goto fail1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
Don Zickusb7471c62006-09-26 10:52:26 +0200603 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
604 | P4_ESCR_OS
605 | P4_ESCR_USR;
606
607 cccr_val |= P4_CCCR_THRESHOLD(15)
608 | P4_CCCR_COMPLEMENT
609 | P4_CCCR_COMPARE
610 | P4_CCCR_REQUIRED;
611
612 wrmsr(evntsel_msr, evntsel, 0);
613 wrmsr(cccr_msr, cccr_val, 0);
614 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 apic_write(APIC_LVTPC, APIC_DM_NMI);
Don Zickusb7471c62006-09-26 10:52:26 +0200616 cccr_val |= P4_CCCR_ENABLE;
617 wrmsr(cccr_msr, cccr_val, 0);
618 wd->perfctr_msr = perfctr_msr;
619 wd->evntsel_msr = evntsel_msr;
620 wd->cccr_msr = cccr_msr;
621 wd->check_bit = 1ULL<<39;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 return 1;
Don Zickus828f0af2006-09-26 10:52:26 +0200623fail1:
Don Zickusb7471c62006-09-26 10:52:26 +0200624 release_perfctr_nmi(perfctr_msr);
Don Zickus828f0af2006-09-26 10:52:26 +0200625fail:
626 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627}
628
Don Zickusb7471c62006-09-26 10:52:26 +0200629static void stop_p4_watchdog(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630{
Don Zickusb7471c62006-09-26 10:52:26 +0200631 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Don Zickusb7471c62006-09-26 10:52:26 +0200633 wrmsr(wd->cccr_msr, 0, 0);
634 wrmsr(wd->evntsel_msr, 0, 0);
635
636 release_evntsel_nmi(wd->evntsel_msr);
637 release_perfctr_nmi(wd->perfctr_msr);
638}
639
640void setup_apic_nmi_watchdog (void *unused)
641{
Shaohua Li4038f902006-09-26 10:52:27 +0200642 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
643
Don Zickusb7471c62006-09-26 10:52:26 +0200644 /* only support LOCAL and IO APICs for now */
645 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
646 (nmi_watchdog != NMI_IO_APIC))
647 return;
648
Shaohua Li4038f902006-09-26 10:52:27 +0200649 if (wd->enabled == 1)
650 return;
651
652 /* cheap hack to support suspend/resume */
653 /* if cpu0 is not active neither should the other cpus */
654 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
655 return;
656
Don Zickusb7471c62006-09-26 10:52:26 +0200657 if (nmi_watchdog == NMI_LOCAL_APIC) {
658 switch (boot_cpu_data.x86_vendor) {
659 case X86_VENDOR_AMD:
660 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
661 return;
662 if (!setup_k7_watchdog())
Don Zickus828f0af2006-09-26 10:52:26 +0200663 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 break;
Don Zickusb7471c62006-09-26 10:52:26 +0200665 case X86_VENDOR_INTEL:
666 switch (boot_cpu_data.x86) {
667 case 6:
668 if (boot_cpu_data.x86_model > 0xd)
669 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Don Zickusb7471c62006-09-26 10:52:26 +0200671 if (!setup_p6_watchdog())
672 return;
673 break;
674 case 15:
675 if (boot_cpu_data.x86_model > 0x4)
676 return;
677
678 if (!setup_p4_watchdog())
679 return;
680 break;
681 default:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 return;
Don Zickusb7471c62006-09-26 10:52:26 +0200683 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 break;
685 default:
686 return;
687 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 }
Shaohua Li4038f902006-09-26 10:52:27 +0200689 wd->enabled = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200690 atomic_inc(&nmi_active);
691}
692
Shaohua Li4038f902006-09-26 10:52:27 +0200693void stop_apic_nmi_watchdog(void *unused)
Don Zickusb7471c62006-09-26 10:52:26 +0200694{
Shaohua Li4038f902006-09-26 10:52:27 +0200695 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
696
Don Zickusb7471c62006-09-26 10:52:26 +0200697 /* only support LOCAL and IO APICs for now */
698 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
699 (nmi_watchdog != NMI_IO_APIC))
700 return;
701
Shaohua Li4038f902006-09-26 10:52:27 +0200702 if (wd->enabled == 0)
703 return;
704
Don Zickusb7471c62006-09-26 10:52:26 +0200705 if (nmi_watchdog == NMI_LOCAL_APIC) {
706 switch (boot_cpu_data.x86_vendor) {
707 case X86_VENDOR_AMD:
708 stop_k7_watchdog();
709 break;
710 case X86_VENDOR_INTEL:
711 switch (boot_cpu_data.x86) {
712 case 6:
713 if (boot_cpu_data.x86_model > 0xd)
714 break;
715 stop_p6_watchdog();
716 break;
717 case 15:
718 if (boot_cpu_data.x86_model > 0x4)
719 break;
720 stop_p4_watchdog();
721 break;
722 }
723 break;
724 default:
725 return;
726 }
727 }
Shaohua Li4038f902006-09-26 10:52:27 +0200728 wd->enabled = 0;
Don Zickusb7471c62006-09-26 10:52:26 +0200729 atomic_dec(&nmi_active);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730}
731
732/*
733 * the best way to detect whether a CPU has a 'hard lockup' problem
734 * is to check it's local APIC timer IRQ counts. If they are not
735 * changing then that CPU has some problem.
736 *
737 * as these watchdog NMI IRQs are generated on every CPU, we only
738 * have to check the current processor.
739 *
740 * since NMIs don't listen to _any_ locks, we have to be extremely
741 * careful not to rely on unsafe variables. The printk might lock
742 * up though, so we have to break up any console locks first ...
743 * [when there will be more tty-related locks, break them up
744 * here too!]
745 */
746
747static unsigned int
748 last_irq_sums [NR_CPUS],
749 alert_counter [NR_CPUS];
750
751void touch_nmi_watchdog (void)
752{
753 int i;
754
755 /*
756 * Just reset the alert counters, (other CPUs might be
757 * spinning on locks we hold):
758 */
KAMEZAWA Hiroyukic8912592006-03-28 01:56:39 -0800759 for_each_possible_cpu(i)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 alert_counter[i] = 0;
Ingo Molnar8446f1d2005-09-06 15:16:27 -0700761
762 /*
763 * Tickle the softlockup detector too:
764 */
765 touch_softlockup_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766}
Michal Schmidt1e862402006-07-30 03:03:29 -0700767EXPORT_SYMBOL(touch_nmi_watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
769extern void die_nmi(struct pt_regs *, const char *msg);
770
Don Zickus3adbbcce2006-09-26 10:52:26 +0200771int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772{
773
774 /*
775 * Since current_thread_info()-> is always on the stack, and we
776 * always switch the stack NMI-atomically, it's safe to use
777 * smp_processor_id().
778 */
Jesper Juhlb791cce2006-03-28 01:56:52 -0800779 unsigned int sum;
Don Zickusb7471c62006-09-26 10:52:26 +0200780 int touched = 0;
Jesper Juhlb791cce2006-03-28 01:56:52 -0800781 int cpu = smp_processor_id();
Don Zickusb7471c62006-09-26 10:52:26 +0200782 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
783 u64 dummy;
Don Zickus3adbbcce2006-09-26 10:52:26 +0200784 int rc=0;
Don Zickusb7471c62006-09-26 10:52:26 +0200785
786 /* check for other users first */
787 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
788 == NOTIFY_STOP) {
Don Zickus3adbbcce2006-09-26 10:52:26 +0200789 rc = 1;
Don Zickusb7471c62006-09-26 10:52:26 +0200790 touched = 1;
791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
793 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
794
Don Zickusb7471c62006-09-26 10:52:26 +0200795 /* if the apic timer isn't firing, this cpu isn't doing much */
796 if (!touched && last_irq_sums[cpu] == sum) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 /*
798 * Ayiee, looks like this CPU is stuck ...
799 * wait a few IRQs (5 seconds) before doing the oops ...
800 */
801 alert_counter[cpu]++;
802 if (alert_counter[cpu] == 5*nmi_hz)
George Anzinger748f2ed2005-09-03 15:56:48 -0700803 /*
804 * die_nmi will return ONLY if NOTIFY_STOP happens..
805 */
Ingo Molnar91368d72006-03-23 03:00:54 -0800806 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
GOTO Masanorib884e252006-03-07 21:55:29 -0800807 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 last_irq_sums[cpu] = sum;
809 alert_counter[cpu] = 0;
810 }
Don Zickusb7471c62006-09-26 10:52:26 +0200811 /* see if the nmi watchdog went off */
812 if (wd->enabled) {
813 if (nmi_watchdog == NMI_LOCAL_APIC) {
814 rdmsrl(wd->perfctr_msr, dummy);
815 if (dummy & wd->check_bit){
816 /* this wasn't a watchdog timer interrupt */
817 goto done;
818 }
819
820 /* only Intel P4 uses the cccr msr */
821 if (wd->cccr_msr != 0) {
822 /*
823 * P4 quirks:
824 * - An overflown perfctr will assert its interrupt
825 * until the OVF flag in its CCCR is cleared.
826 * - LVTPC is masked on interrupt and must be
827 * unmasked by the LVTPC handler.
828 */
829 rdmsrl(wd->cccr_msr, dummy);
830 dummy &= ~P4_CCCR_OVF;
831 wrmsrl(wd->cccr_msr, dummy);
832 apic_write(APIC_LVTPC, APIC_DM_NMI);
833 }
834 else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
835 /* Only P6 based Pentium M need to re-unmask
836 * the apic vector but it doesn't hurt
837 * other P6 variant */
838 apic_write(APIC_LVTPC, APIC_DM_NMI);
839 }
840 /* start the cycle over again */
841 write_watchdog_counter(wd->perfctr_msr, NULL);
Don Zickus3adbbcce2006-09-26 10:52:26 +0200842 rc = 1;
843 } else if (nmi_watchdog == NMI_IO_APIC) {
844 /* don't know how to accurately check for this.
845 * just assume it was a watchdog timer interrupt
846 * This matches the old behaviour.
847 */
848 rc = 1;
849 } else
850 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 }
Don Zickusb7471c62006-09-26 10:52:26 +0200852done:
Don Zickus3adbbcce2006-09-26 10:52:26 +0200853 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854}
855
Don Zickus2fbe7b22006-09-26 10:52:27 +0200856int do_nmi_callback(struct pt_regs * regs, int cpu)
857{
858#ifdef CONFIG_SYSCTL
859 if (unknown_nmi_panic)
860 return unknown_nmi_panic_callback(regs, cpu);
861#endif
862 return 0;
863}
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865#ifdef CONFIG_SYSCTL
866
867static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
868{
869 unsigned char reason = get_nmi_reason();
870 char buf[64];
871
Don Zickus2fbe7b22006-09-26 10:52:27 +0200872 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
873 die_nmi(regs, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 return 0;
875}
876
Don Zickus407984f2006-09-26 10:52:27 +0200877/*
Don Zickuse33e89a2006-09-26 10:52:27 +0200878 * proc handler for /proc/sys/kernel/nmi
Don Zickus407984f2006-09-26 10:52:27 +0200879 */
880int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
881 void __user *buffer, size_t *length, loff_t *ppos)
882{
883 int old_state;
884
885 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
886 old_state = nmi_watchdog_enabled;
887 proc_dointvec(table, write, file, buffer, length, ppos);
888 if (!!old_state == !!nmi_watchdog_enabled)
889 return 0;
890
891 if (atomic_read(&nmi_active) < 0) {
Don Zickuse33e89a2006-09-26 10:52:27 +0200892 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
893 return -EIO;
Don Zickus407984f2006-09-26 10:52:27 +0200894 }
895
896 if (nmi_watchdog == NMI_DEFAULT) {
897 if (nmi_known_cpu() > 0)
898 nmi_watchdog = NMI_LOCAL_APIC;
899 else
900 nmi_watchdog = NMI_IO_APIC;
901 }
902
Don Zickuse33e89a2006-09-26 10:52:27 +0200903 if (nmi_watchdog == NMI_LOCAL_APIC) {
Don Zickus407984f2006-09-26 10:52:27 +0200904 if (nmi_watchdog_enabled)
905 enable_lapic_nmi_watchdog();
906 else
907 disable_lapic_nmi_watchdog();
Don Zickus407984f2006-09-26 10:52:27 +0200908 } else {
909 printk( KERN_WARNING
910 "NMI watchdog doesn't know what hardware to touch\n");
911 return -EIO;
912 }
913 return 0;
914}
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916#endif
917
918EXPORT_SYMBOL(nmi_active);
919EXPORT_SYMBOL(nmi_watchdog);
Don Zickus828f0af2006-09-26 10:52:26 +0200920EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
921EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
922EXPORT_SYMBOL(reserve_perfctr_nmi);
923EXPORT_SYMBOL(release_perfctr_nmi);
924EXPORT_SYMBOL(reserve_evntsel_nmi);
925EXPORT_SYMBOL(release_evntsel_nmi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926EXPORT_SYMBOL(disable_timer_nmi_watchdog);
927EXPORT_SYMBOL(enable_timer_nmi_watchdog);