blob: 62c010063974e5caaa51e1dfd44ea8e54ad76154 [file] [log] [blame]
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +02001/*
2 * local apic based NMI watchdog for various CPUs.
3 *
4 * This file also handles reservation of performance counters for coordination
5 * with other users (like oprofile).
6 *
7 * Note that these events normally don't tick when the CPU idles. This means
8 * the frequency varies with CPU load.
9 *
10 * Original code for K7/P6 written by Keith Owens
11 *
12 */
Andi Kleen09198e62007-05-02 19:27:20 +020013
14#include <linux/percpu.h>
15#include <linux/module.h>
16#include <linux/kernel.h>
17#include <linux/bitops.h>
18#include <linux/smp.h>
19#include <linux/nmi.h>
20#include <asm/apic.h>
21#include <asm/intel_arch_perfmon.h>
22
23struct nmi_watchdog_ctlblk {
24 unsigned int cccr_msr;
25 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
26 unsigned int evntsel_msr; /* the MSR to select the events to handle */
27};
28
29/* Interface defining a CPU specific perfctr watchdog */
30struct wd_ops {
31 int (*reserve)(void);
32 void (*unreserve)(void);
33 int (*setup)(unsigned nmi_hz);
34 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
Björn Steinbrink54c6ed72007-06-16 10:15:56 -070035 void (*stop)(void);
Andi Kleen09198e62007-05-02 19:27:20 +020036 unsigned perfctr;
37 unsigned evntsel;
38 u64 checkbit;
39};
40
Jan Beulichd1e08472007-10-17 18:04:39 +020041static const struct wd_ops *wd_ops;
Andi Kleen09198e62007-05-02 19:27:20 +020042
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020043/*
44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0.
46 *
47 * It will be the max for all platforms (for now)
Andi Kleen09198e62007-05-02 19:27:20 +020048 */
49#define NMI_MAX_COUNTER_BITS 66
50
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020051/*
52 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
Andi Kleen09198e62007-05-02 19:27:20 +020053 * evtsel_nmi_owner tracks the ownership of the event selection
54 * - different performance counters/ event selection may be reserved for
55 * different subsystems this reservation system just tries to coordinate
56 * things a little
57 */
58static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
59static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
60
61static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
62
63/* converts an msr to an appropriate reservation bit */
64static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
65{
Andi Kleen5dcccd82007-07-04 01:38:13 +020066 /* returns the bit offset of the performance counter register */
67 switch (boot_cpu_data.x86_vendor) {
68 case X86_VENDOR_AMD:
69 return (msr - MSR_K7_PERFCTR0);
70 case X86_VENDOR_INTEL:
71 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
72 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
73
74 switch (boot_cpu_data.x86) {
75 case 6:
76 return (msr - MSR_P6_PERFCTR0);
77 case 15:
78 return (msr - MSR_P4_BPU_PERFCTR0);
79 }
80 }
81 return 0;
Andi Kleen09198e62007-05-02 19:27:20 +020082}
83
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020084/*
85 * converts an msr to an appropriate reservation bit
86 * returns the bit offset of the event selection register
87 */
Andi Kleen09198e62007-05-02 19:27:20 +020088static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
89{
Andi Kleen5dcccd82007-07-04 01:38:13 +020090 /* returns the bit offset of the event selection register */
91 switch (boot_cpu_data.x86_vendor) {
92 case X86_VENDOR_AMD:
93 return (msr - MSR_K7_EVNTSEL0);
94 case X86_VENDOR_INTEL:
95 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
96 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
97
98 switch (boot_cpu_data.x86) {
99 case 6:
100 return (msr - MSR_P6_EVNTSEL0);
101 case 15:
102 return (msr - MSR_P4_BSU_ESCR0);
103 }
104 }
105 return 0;
106
Andi Kleen09198e62007-05-02 19:27:20 +0200107}
108
109/* checks for a bit availability (hack for oprofile) */
110int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
111{
112 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
113
114 return (!test_bit(counter, perfctr_nmi_owner));
115}
116
117/* checks the an msr for availability */
118int avail_to_resrv_perfctr_nmi(unsigned int msr)
119{
120 unsigned int counter;
121
122 counter = nmi_perfctr_msr_to_bit(msr);
123 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
124
125 return (!test_bit(counter, perfctr_nmi_owner));
126}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
Andi Kleen09198e62007-05-02 19:27:20 +0200128
129int reserve_perfctr_nmi(unsigned int msr)
130{
131 unsigned int counter;
132
133 counter = nmi_perfctr_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200134 /* register not managed by the allocator? */
135 if (counter > NMI_MAX_COUNTER_BITS)
136 return 1;
Andi Kleen09198e62007-05-02 19:27:20 +0200137
138 if (!test_and_set_bit(counter, perfctr_nmi_owner))
139 return 1;
140 return 0;
141}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200142EXPORT_SYMBOL(reserve_perfctr_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200143
144void release_perfctr_nmi(unsigned int msr)
145{
146 unsigned int counter;
147
148 counter = nmi_perfctr_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200149 /* register not managed by the allocator? */
150 if (counter > NMI_MAX_COUNTER_BITS)
151 return;
Andi Kleen09198e62007-05-02 19:27:20 +0200152
153 clear_bit(counter, perfctr_nmi_owner);
154}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200155EXPORT_SYMBOL(release_perfctr_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200156
157int reserve_evntsel_nmi(unsigned int msr)
158{
159 unsigned int counter;
160
161 counter = nmi_evntsel_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200162 /* register not managed by the allocator? */
163 if (counter > NMI_MAX_COUNTER_BITS)
164 return 1;
Andi Kleen09198e62007-05-02 19:27:20 +0200165
166 if (!test_and_set_bit(counter, evntsel_nmi_owner))
167 return 1;
168 return 0;
169}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200170EXPORT_SYMBOL(reserve_evntsel_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200171
172void release_evntsel_nmi(unsigned int msr)
173{
174 unsigned int counter;
175
176 counter = nmi_evntsel_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200177 /* register not managed by the allocator? */
178 if (counter > NMI_MAX_COUNTER_BITS)
179 return;
Andi Kleen09198e62007-05-02 19:27:20 +0200180
181 clear_bit(counter, evntsel_nmi_owner);
182}
Andi Kleen09198e62007-05-02 19:27:20 +0200183EXPORT_SYMBOL(release_evntsel_nmi);
184
185void disable_lapic_nmi_watchdog(void)
186{
187 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
188
189 if (atomic_read(&nmi_active) <= 0)
190 return;
191
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
Cyrill Gorcunov1a1b1d12008-06-04 01:00:58 +0400193
194 if (wd_ops)
195 wd_ops->unreserve();
Andi Kleen09198e62007-05-02 19:27:20 +0200196
197 BUG_ON(atomic_read(&nmi_active) != 0);
198}
199
200void enable_lapic_nmi_watchdog(void)
201{
202 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
203
204 /* are we already enabled */
205 if (atomic_read(&nmi_active) != 0)
206 return;
207
208 /* are we lapic aware */
209 if (!wd_ops)
210 return;
211 if (!wd_ops->reserve()) {
212 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
213 return;
214 }
215
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
Andi Kleen09198e62007-05-02 19:27:20 +0200217 touch_nmi_watchdog();
218}
219
220/*
221 * Activate the NMI watchdog via the local APIC.
222 */
223
224static unsigned int adjust_for_32bit_ctr(unsigned int hz)
225{
226 u64 counter_val;
227 unsigned int retval = hz;
228
229 /*
230 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
231 * are writable, with higher bits sign extending from bit 31.
232 * So, we can only program the counter with 31 bit values and
233 * 32nd bit should be 1, for 33.. to be 1.
234 * Find the appropriate nmi_hz
235 */
236 counter_val = (u64)cpu_khz * 1000;
237 do_div(counter_val, retval);
238 if (counter_val > 0x7fffffffULL) {
239 u64 count = (u64)cpu_khz * 1000;
240 do_div(count, 0x7fffffffUL);
241 retval = count + 1;
242 }
243 return retval;
244}
245
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200246static void write_watchdog_counter(unsigned int perfctr_msr,
247 const char *descr, unsigned nmi_hz)
Andi Kleen09198e62007-05-02 19:27:20 +0200248{
249 u64 count = (u64)cpu_khz * 1000;
250
251 do_div(count, nmi_hz);
252 if(descr)
Thomas Gleixnercfc1b9a2008-07-21 21:35:38 +0200253 pr_debug("setting %s to -0x%08Lx\n", descr, count);
Andi Kleen09198e62007-05-02 19:27:20 +0200254 wrmsrl(perfctr_msr, 0 - count);
255}
256
257static void write_watchdog_counter32(unsigned int perfctr_msr,
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200258 const char *descr, unsigned nmi_hz)
Andi Kleen09198e62007-05-02 19:27:20 +0200259{
260 u64 count = (u64)cpu_khz * 1000;
261
262 do_div(count, nmi_hz);
263 if(descr)
Thomas Gleixnercfc1b9a2008-07-21 21:35:38 +0200264 pr_debug("setting %s to -0x%08Lx\n", descr, count);
Andi Kleen09198e62007-05-02 19:27:20 +0200265 wrmsr(perfctr_msr, (u32)(-count), 0);
266}
267
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200268/*
269 * AMD K7/K8/Family10h/Family11h support.
270 * AMD keeps this interface nicely stable so there is not much variety
271 */
Andi Kleen09198e62007-05-02 19:27:20 +0200272#define K7_EVNTSEL_ENABLE (1 << 22)
273#define K7_EVNTSEL_INT (1 << 20)
274#define K7_EVNTSEL_OS (1 << 17)
275#define K7_EVNTSEL_USR (1 << 16)
276#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
277#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
278
279static int setup_k7_watchdog(unsigned nmi_hz)
280{
281 unsigned int perfctr_msr, evntsel_msr;
282 unsigned int evntsel;
283 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
284
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200285 perfctr_msr = wd_ops->perfctr;
286 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200287
288 wrmsrl(perfctr_msr, 0UL);
289
290 evntsel = K7_EVNTSEL_INT
291 | K7_EVNTSEL_OS
292 | K7_EVNTSEL_USR
293 | K7_NMI_EVENT;
294
295 /* setup the timer */
296 wrmsr(evntsel_msr, evntsel, 0);
297 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 wd->perfctr_msr = perfctr_msr;
303 wd->evntsel_msr = evntsel_msr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200304 wd->cccr_msr = 0; /* unused */
Andi Kleen09198e62007-05-02 19:27:20 +0200305 return 1;
306}
307
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700308static void single_msr_stop_watchdog(void)
Andi Kleen09198e62007-05-02 19:27:20 +0200309{
310 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
311
312 wrmsr(wd->evntsel_msr, 0, 0);
313}
314
315static int single_msr_reserve(void)
316{
317 if (!reserve_perfctr_nmi(wd_ops->perfctr))
318 return 0;
319
320 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
321 release_perfctr_nmi(wd_ops->perfctr);
322 return 0;
323 }
324 return 1;
325}
326
327static void single_msr_unreserve(void)
328{
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700329 release_evntsel_nmi(wd_ops->evntsel);
330 release_perfctr_nmi(wd_ops->perfctr);
Andi Kleen09198e62007-05-02 19:27:20 +0200331}
332
333static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
334{
335 /* start the cycle over again */
336 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
337}
338
Jan Beulichd1e08472007-10-17 18:04:39 +0200339static const struct wd_ops k7_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200340 .reserve = single_msr_reserve,
341 .unreserve = single_msr_unreserve,
342 .setup = setup_k7_watchdog,
343 .rearm = single_msr_rearm,
344 .stop = single_msr_stop_watchdog,
345 .perfctr = MSR_K7_PERFCTR0,
346 .evntsel = MSR_K7_EVNTSEL0,
347 .checkbit = 1ULL << 47,
Andi Kleen09198e62007-05-02 19:27:20 +0200348};
349
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200350/*
351 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
352 */
Andi Kleen09198e62007-05-02 19:27:20 +0200353#define P6_EVNTSEL0_ENABLE (1 << 22)
354#define P6_EVNTSEL_INT (1 << 20)
355#define P6_EVNTSEL_OS (1 << 17)
356#define P6_EVNTSEL_USR (1 << 16)
357#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
358#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
359
360static int setup_p6_watchdog(unsigned nmi_hz)
361{
362 unsigned int perfctr_msr, evntsel_msr;
363 unsigned int evntsel;
364 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
365
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200366 perfctr_msr = wd_ops->perfctr;
367 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200368
Andi Kleen57c22f42007-07-22 11:12:39 +0200369 /* KVM doesn't implement this MSR */
370 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
371 return 0;
Andi Kleen09198e62007-05-02 19:27:20 +0200372
373 evntsel = P6_EVNTSEL_INT
374 | P6_EVNTSEL_OS
375 | P6_EVNTSEL_USR
376 | P6_NMI_EVENT;
377
378 /* setup the timer */
379 wrmsr(evntsel_msr, evntsel, 0);
380 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
381 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
382 apic_write(APIC_LVTPC, APIC_DM_NMI);
383 evntsel |= P6_EVNTSEL0_ENABLE;
384 wrmsr(evntsel_msr, evntsel, 0);
385
386 wd->perfctr_msr = perfctr_msr;
387 wd->evntsel_msr = evntsel_msr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200388 wd->cccr_msr = 0; /* unused */
Andi Kleen09198e62007-05-02 19:27:20 +0200389 return 1;
390}
391
392static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
393{
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200394 /*
395 * P6 based Pentium M need to re-unmask
Andi Kleen09198e62007-05-02 19:27:20 +0200396 * the apic vector but it doesn't hurt
397 * other P6 variant.
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200398 * ArchPerfom/Core Duo also needs this
399 */
Andi Kleen09198e62007-05-02 19:27:20 +0200400 apic_write(APIC_LVTPC, APIC_DM_NMI);
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200401
Andi Kleen09198e62007-05-02 19:27:20 +0200402 /* P6/ARCH_PERFMON has 32 bit counter write */
403 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
404}
405
Jan Beulichd1e08472007-10-17 18:04:39 +0200406static const struct wd_ops p6_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200407 .reserve = single_msr_reserve,
408 .unreserve = single_msr_unreserve,
409 .setup = setup_p6_watchdog,
410 .rearm = p6_rearm,
411 .stop = single_msr_stop_watchdog,
412 .perfctr = MSR_P6_PERFCTR0,
413 .evntsel = MSR_P6_EVNTSEL0,
414 .checkbit = 1ULL << 39,
Andi Kleen09198e62007-05-02 19:27:20 +0200415};
416
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200417/*
418 * Intel P4 performance counters.
419 * By far the most complicated of all.
420 */
421#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
422#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
423#define P4_ESCR_OS (1 << 3)
424#define P4_ESCR_USR (1 << 2)
425#define P4_CCCR_OVF_PMI0 (1 << 26)
426#define P4_CCCR_OVF_PMI1 (1 << 27)
427#define P4_CCCR_THRESHOLD(N) ((N) << 20)
428#define P4_CCCR_COMPLEMENT (1 << 19)
429#define P4_CCCR_COMPARE (1 << 18)
430#define P4_CCCR_REQUIRED (3 << 16)
431#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
432#define P4_CCCR_ENABLE (1 << 12)
433#define P4_CCCR_OVF (1 << 31)
Andi Kleen09198e62007-05-02 19:27:20 +0200434
Aristeu Rozanski28b166a2008-09-22 13:14:13 -0400435#define P4_CONTROLS 18
436static unsigned int p4_controls[18] = {
437 MSR_P4_BPU_CCCR0,
438 MSR_P4_BPU_CCCR1,
439 MSR_P4_BPU_CCCR2,
440 MSR_P4_BPU_CCCR3,
441 MSR_P4_MS_CCCR0,
442 MSR_P4_MS_CCCR1,
443 MSR_P4_MS_CCCR2,
444 MSR_P4_MS_CCCR3,
445 MSR_P4_FLAME_CCCR0,
446 MSR_P4_FLAME_CCCR1,
447 MSR_P4_FLAME_CCCR2,
448 MSR_P4_FLAME_CCCR3,
449 MSR_P4_IQ_CCCR0,
450 MSR_P4_IQ_CCCR1,
451 MSR_P4_IQ_CCCR2,
452 MSR_P4_IQ_CCCR3,
453 MSR_P4_IQ_CCCR4,
454 MSR_P4_IQ_CCCR5,
455};
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200456/*
457 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
458 * CRU_ESCR0 (with any non-null event selector) through a complemented
459 * max threshold. [IA32-Vol3, Section 14.9.9]
460 */
Andi Kleen09198e62007-05-02 19:27:20 +0200461static int setup_p4_watchdog(unsigned nmi_hz)
462{
463 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
464 unsigned int evntsel, cccr_val;
465 unsigned int misc_enable, dummy;
466 unsigned int ht_num;
467 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
468
469 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
470 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
471 return 0;
472
473#ifdef CONFIG_SMP
474 /* detect which hyperthread we are on */
475 if (smp_num_siblings == 2) {
476 unsigned int ebx, apicid;
477
478 ebx = cpuid_ebx(1);
479 apicid = (ebx >> 24) & 0xff;
480 ht_num = apicid & 1;
481 } else
482#endif
483 ht_num = 0;
484
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200485 /*
486 * performance counters are shared resources
Andi Kleen09198e62007-05-02 19:27:20 +0200487 * assign each hyperthread its own set
488 * (re-use the ESCR0 register, seems safe
489 * and keeps the cccr_val the same)
490 */
491 if (!ht_num) {
492 /* logical cpu 0 */
493 perfctr_msr = MSR_P4_IQ_PERFCTR0;
494 evntsel_msr = MSR_P4_CRU_ESCR0;
495 cccr_msr = MSR_P4_IQ_CCCR0;
496 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
Aristeu Rozanski28b166a2008-09-22 13:14:13 -0400497
498 /*
499 * If we're on the kdump kernel or other situation, we may
500 * still have other performance counter registers set to
501 * interrupt and they'll keep interrupting forever because
502 * of the P4_CCCR_OVF quirk. So we need to ACK all the
503 * pending interrupts and disable all the registers here,
504 * before reenabling the NMI delivery. Refer to p4_rearm()
505 * about the P4_CCCR_OVF quirk.
506 */
507 if (reset_devices) {
508 unsigned int low, high;
509 int i;
510
511 for (i = 0; i < P4_CONTROLS; i++) {
512 rdmsr(p4_controls[i], low, high);
513 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
514 wrmsr(p4_controls[i], low, high);
515 }
516 }
Andi Kleen09198e62007-05-02 19:27:20 +0200517 } else {
518 /* logical cpu 1 */
519 perfctr_msr = MSR_P4_IQ_PERFCTR1;
520 evntsel_msr = MSR_P4_CRU_ESCR0;
521 cccr_msr = MSR_P4_IQ_CCCR1;
Aristeu Rozanskidcc98412008-08-14 16:32:15 -0400522
523 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
524 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
525 cccr_val = P4_CCCR_OVF_PMI0;
526 else
527 cccr_val = P4_CCCR_OVF_PMI1;
528 cccr_val |= P4_CCCR_ESCR_SELECT(4);
Andi Kleen09198e62007-05-02 19:27:20 +0200529 }
530
531 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
532 | P4_ESCR_OS
533 | P4_ESCR_USR;
534
535 cccr_val |= P4_CCCR_THRESHOLD(15)
536 | P4_CCCR_COMPLEMENT
537 | P4_CCCR_COMPARE
538 | P4_CCCR_REQUIRED;
539
540 wrmsr(evntsel_msr, evntsel, 0);
541 wrmsr(cccr_msr, cccr_val, 0);
542 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
543 apic_write(APIC_LVTPC, APIC_DM_NMI);
544 cccr_val |= P4_CCCR_ENABLE;
545 wrmsr(cccr_msr, cccr_val, 0);
546 wd->perfctr_msr = perfctr_msr;
547 wd->evntsel_msr = evntsel_msr;
548 wd->cccr_msr = cccr_msr;
549 return 1;
550}
551
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700552static void stop_p4_watchdog(void)
Andi Kleen09198e62007-05-02 19:27:20 +0200553{
554 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
555 wrmsr(wd->cccr_msr, 0, 0);
556 wrmsr(wd->evntsel_msr, 0, 0);
557}
558
559static int p4_reserve(void)
560{
561 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
562 return 0;
563#ifdef CONFIG_SMP
564 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
565 goto fail1;
566#endif
567 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
568 goto fail2;
569 /* RED-PEN why is ESCR1 not reserved here? */
570 return 1;
571 fail2:
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1)
574 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
575 fail1:
576#endif
577 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
578 return 0;
579}
580
581static void p4_unreserve(void)
582{
583#ifdef CONFIG_SMP
584 if (smp_num_siblings > 1)
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700585 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
Andi Kleen09198e62007-05-02 19:27:20 +0200586#endif
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700587 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
588 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
Andi Kleen09198e62007-05-02 19:27:20 +0200589}
590
591static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
592{
593 unsigned dummy;
594 /*
595 * P4 quirks:
596 * - An overflown perfctr will assert its interrupt
597 * until the OVF flag in its CCCR is cleared.
598 * - LVTPC is masked on interrupt and must be
599 * unmasked by the LVTPC handler.
600 */
601 rdmsrl(wd->cccr_msr, dummy);
602 dummy &= ~P4_CCCR_OVF;
603 wrmsrl(wd->cccr_msr, dummy);
604 apic_write(APIC_LVTPC, APIC_DM_NMI);
605 /* start the cycle over again */
606 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
607}
608
Jan Beulichd1e08472007-10-17 18:04:39 +0200609static const struct wd_ops p4_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200610 .reserve = p4_reserve,
611 .unreserve = p4_unreserve,
612 .setup = setup_p4_watchdog,
613 .rearm = p4_rearm,
614 .stop = stop_p4_watchdog,
Andi Kleen09198e62007-05-02 19:27:20 +0200615 /* RED-PEN this is wrong for the other sibling */
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200616 .perfctr = MSR_P4_BPU_PERFCTR0,
617 .evntsel = MSR_P4_BSU_ESCR0,
618 .checkbit = 1ULL << 39,
Andi Kleen09198e62007-05-02 19:27:20 +0200619};
620
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200621/*
622 * Watchdog using the Intel architected PerfMon.
623 * Used for Core2 and hopefully all future Intel CPUs.
624 */
Andi Kleen09198e62007-05-02 19:27:20 +0200625#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
626#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
627
Jan Beulichd1e08472007-10-17 18:04:39 +0200628static struct wd_ops intel_arch_wd_ops;
629
Andi Kleen09198e62007-05-02 19:27:20 +0200630static int setup_intel_arch_watchdog(unsigned nmi_hz)
631{
632 unsigned int ebx;
633 union cpuid10_eax eax;
634 unsigned int unused;
635 unsigned int perfctr_msr, evntsel_msr;
636 unsigned int evntsel;
637 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
638
639 /*
640 * Check whether the Architectural PerfMon supports
641 * Unhalted Core Cycles Event or not.
642 * NOTE: Corresponding bit = 0 in ebx indicates event present.
643 */
644 cpuid(10, &(eax.full), &ebx, &unused, &unused);
645 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
646 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
647 return 0;
648
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200649 perfctr_msr = wd_ops->perfctr;
650 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200651
652 wrmsrl(perfctr_msr, 0UL);
653
654 evntsel = ARCH_PERFMON_EVENTSEL_INT
655 | ARCH_PERFMON_EVENTSEL_OS
656 | ARCH_PERFMON_EVENTSEL_USR
657 | ARCH_PERFMON_NMI_EVENT_SEL
658 | ARCH_PERFMON_NMI_EVENT_UMASK;
659
660 /* setup the timer */
661 wrmsr(evntsel_msr, evntsel, 0);
662 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
663 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
664 apic_write(APIC_LVTPC, APIC_DM_NMI);
665 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
666 wrmsr(evntsel_msr, evntsel, 0);
667
668 wd->perfctr_msr = perfctr_msr;
669 wd->evntsel_msr = evntsel_msr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200670 wd->cccr_msr = 0; /* unused */
Jan Beulichd1e08472007-10-17 18:04:39 +0200671 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
Andi Kleen09198e62007-05-02 19:27:20 +0200672 return 1;
673}
674
Jan Beulichd1e08472007-10-17 18:04:39 +0200675static struct wd_ops intel_arch_wd_ops __read_mostly = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200676 .reserve = single_msr_reserve,
677 .unreserve = single_msr_unreserve,
678 .setup = setup_intel_arch_watchdog,
679 .rearm = p6_rearm,
680 .stop = single_msr_stop_watchdog,
681 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
682 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
Andi Kleen09198e62007-05-02 19:27:20 +0200683};
684
685static void probe_nmi_watchdog(void)
686{
687 switch (boot_cpu_data.x86_vendor) {
688 case X86_VENDOR_AMD:
689 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
690 boot_cpu_data.x86 != 16)
691 return;
692 wd_ops = &k7_wd_ops;
693 break;
694 case X86_VENDOR_INTEL:
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200695 /*
696 * Work around Core Duo (Yonah) errata AE49 where perfctr1
697 * doesn't have a working enable bit.
698 */
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200699 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
Jan Beulich86d78f62008-04-22 16:28:41 +0100700 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
701 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200702 }
Andi Kleen09198e62007-05-02 19:27:20 +0200703 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
704 wd_ops = &intel_arch_wd_ops;
705 break;
706 }
707 switch (boot_cpu_data.x86) {
708 case 6:
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200709 if (boot_cpu_data.x86_model > 13)
Andi Kleen09198e62007-05-02 19:27:20 +0200710 return;
711
712 wd_ops = &p6_wd_ops;
713 break;
714 case 15:
Andi Kleen09198e62007-05-02 19:27:20 +0200715 wd_ops = &p4_wd_ops;
716 break;
717 default:
718 return;
719 }
720 break;
721 }
722}
723
724/* Interface to nmi.c */
725
726int lapic_watchdog_init(unsigned nmi_hz)
727{
728 if (!wd_ops) {
729 probe_nmi_watchdog();
Ingo Molnar9c9b81f2008-03-27 23:39:42 +0100730 if (!wd_ops) {
731 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
Andi Kleen09198e62007-05-02 19:27:20 +0200732 return -1;
Ingo Molnar9c9b81f2008-03-27 23:39:42 +0100733 }
Björn Steinbrinkfaa4cfa2007-06-16 10:15:55 -0700734
735 if (!wd_ops->reserve()) {
736 printk(KERN_ERR
737 "NMI watchdog: cannot reserve perfctrs\n");
738 return -1;
739 }
Andi Kleen09198e62007-05-02 19:27:20 +0200740 }
741
742 if (!(wd_ops->setup(nmi_hz))) {
743 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
744 raw_smp_processor_id());
745 return -1;
746 }
747
748 return 0;
749}
750
751void lapic_watchdog_stop(void)
752{
753 if (wd_ops)
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700754 wd_ops->stop();
Andi Kleen09198e62007-05-02 19:27:20 +0200755}
756
757unsigned lapic_adjust_nmi_hz(unsigned hz)
758{
759 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
760 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
761 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
762 hz = adjust_for_32bit_ctr(hz);
763 return hz;
764}
765
766int lapic_wd_event(unsigned nmi_hz)
767{
768 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
769 u64 ctr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200770
Andi Kleen09198e62007-05-02 19:27:20 +0200771 rdmsrl(wd->perfctr_msr, ctr);
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200772 if (ctr & wd_ops->checkbit) /* perfctr still running? */
Andi Kleen09198e62007-05-02 19:27:20 +0200773 return 0;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200774
Andi Kleen09198e62007-05-02 19:27:20 +0200775 wd_ops->rearm(wd, nmi_hz);
776 return 1;
777}
778
779int lapic_watchdog_ok(void)
780{
781 return wd_ops != NULL;
782}