blob: 6bff382094f58a2a40b0adebdf5fb264905614ba [file] [log] [blame]
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +02001/*
2 * local apic based NMI watchdog for various CPUs.
3 *
4 * This file also handles reservation of performance counters for coordination
5 * with other users (like oprofile).
6 *
7 * Note that these events normally don't tick when the CPU idles. This means
8 * the frequency varies with CPU load.
9 *
10 * Original code for K7/P6 written by Keith Owens
11 *
12 */
Andi Kleen09198e62007-05-02 19:27:20 +020013
14#include <linux/percpu.h>
15#include <linux/module.h>
16#include <linux/kernel.h>
17#include <linux/bitops.h>
18#include <linux/smp.h>
19#include <linux/nmi.h>
20#include <asm/apic.h>
21#include <asm/intel_arch_perfmon.h>
22
23struct nmi_watchdog_ctlblk {
24 unsigned int cccr_msr;
25 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
26 unsigned int evntsel_msr; /* the MSR to select the events to handle */
27};
28
29/* Interface defining a CPU specific perfctr watchdog */
30struct wd_ops {
31 int (*reserve)(void);
32 void (*unreserve)(void);
33 int (*setup)(unsigned nmi_hz);
34 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
Björn Steinbrink54c6ed72007-06-16 10:15:56 -070035 void (*stop)(void);
Andi Kleen09198e62007-05-02 19:27:20 +020036 unsigned perfctr;
37 unsigned evntsel;
38 u64 checkbit;
39};
40
Jan Beulichd1e08472007-10-17 18:04:39 +020041static const struct wd_ops *wd_ops;
Andi Kleen09198e62007-05-02 19:27:20 +020042
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020043/*
44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0.
46 *
47 * It will be the max for all platforms (for now)
Andi Kleen09198e62007-05-02 19:27:20 +020048 */
49#define NMI_MAX_COUNTER_BITS 66
50
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020051/*
52 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
Andi Kleen09198e62007-05-02 19:27:20 +020053 * evtsel_nmi_owner tracks the ownership of the event selection
54 * - different performance counters/ event selection may be reserved for
55 * different subsystems this reservation system just tries to coordinate
56 * things a little
57 */
58static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
59static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
60
61static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
62
63/* converts an msr to an appropriate reservation bit */
64static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
65{
Andi Kleen5dcccd82007-07-04 01:38:13 +020066 /* returns the bit offset of the performance counter register */
67 switch (boot_cpu_data.x86_vendor) {
68 case X86_VENDOR_AMD:
69 return (msr - MSR_K7_PERFCTR0);
70 case X86_VENDOR_INTEL:
71 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
72 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
73
74 switch (boot_cpu_data.x86) {
75 case 6:
76 return (msr - MSR_P6_PERFCTR0);
77 case 15:
78 return (msr - MSR_P4_BPU_PERFCTR0);
79 }
80 }
81 return 0;
Andi Kleen09198e62007-05-02 19:27:20 +020082}
83
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +020084/*
85 * converts an msr to an appropriate reservation bit
86 * returns the bit offset of the event selection register
87 */
Andi Kleen09198e62007-05-02 19:27:20 +020088static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
89{
Andi Kleen5dcccd82007-07-04 01:38:13 +020090 /* returns the bit offset of the event selection register */
91 switch (boot_cpu_data.x86_vendor) {
92 case X86_VENDOR_AMD:
93 return (msr - MSR_K7_EVNTSEL0);
94 case X86_VENDOR_INTEL:
95 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
96 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
97
98 switch (boot_cpu_data.x86) {
99 case 6:
100 return (msr - MSR_P6_EVNTSEL0);
101 case 15:
102 return (msr - MSR_P4_BSU_ESCR0);
103 }
104 }
105 return 0;
106
Andi Kleen09198e62007-05-02 19:27:20 +0200107}
108
109/* checks for a bit availability (hack for oprofile) */
110int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
111{
112 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
113
114 return (!test_bit(counter, perfctr_nmi_owner));
115}
116
117/* checks the an msr for availability */
118int avail_to_resrv_perfctr_nmi(unsigned int msr)
119{
120 unsigned int counter;
121
122 counter = nmi_perfctr_msr_to_bit(msr);
123 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
124
125 return (!test_bit(counter, perfctr_nmi_owner));
126}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
Andi Kleen09198e62007-05-02 19:27:20 +0200128
129int reserve_perfctr_nmi(unsigned int msr)
130{
131 unsigned int counter;
132
133 counter = nmi_perfctr_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200134 /* register not managed by the allocator? */
135 if (counter > NMI_MAX_COUNTER_BITS)
136 return 1;
Andi Kleen09198e62007-05-02 19:27:20 +0200137
138 if (!test_and_set_bit(counter, perfctr_nmi_owner))
139 return 1;
140 return 0;
141}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200142EXPORT_SYMBOL(reserve_perfctr_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200143
144void release_perfctr_nmi(unsigned int msr)
145{
146 unsigned int counter;
147
148 counter = nmi_perfctr_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200149 /* register not managed by the allocator? */
150 if (counter > NMI_MAX_COUNTER_BITS)
151 return;
Andi Kleen09198e62007-05-02 19:27:20 +0200152
153 clear_bit(counter, perfctr_nmi_owner);
154}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200155EXPORT_SYMBOL(release_perfctr_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200156
157int reserve_evntsel_nmi(unsigned int msr)
158{
159 unsigned int counter;
160
161 counter = nmi_evntsel_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200162 /* register not managed by the allocator? */
163 if (counter > NMI_MAX_COUNTER_BITS)
164 return 1;
Andi Kleen09198e62007-05-02 19:27:20 +0200165
166 if (!test_and_set_bit(counter, evntsel_nmi_owner))
167 return 1;
168 return 0;
169}
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200170EXPORT_SYMBOL(reserve_evntsel_nmi);
Andi Kleen09198e62007-05-02 19:27:20 +0200171
172void release_evntsel_nmi(unsigned int msr)
173{
174 unsigned int counter;
175
176 counter = nmi_evntsel_msr_to_bit(msr);
Stephane Eranian124d3952007-10-19 20:35:04 +0200177 /* register not managed by the allocator? */
178 if (counter > NMI_MAX_COUNTER_BITS)
179 return;
Andi Kleen09198e62007-05-02 19:27:20 +0200180
181 clear_bit(counter, evntsel_nmi_owner);
182}
Andi Kleen09198e62007-05-02 19:27:20 +0200183EXPORT_SYMBOL(release_evntsel_nmi);
184
185void disable_lapic_nmi_watchdog(void)
186{
187 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
188
189 if (atomic_read(&nmi_active) <= 0)
190 return;
191
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
Cyrill Gorcunov1a1b1d12008-06-04 01:00:58 +0400193
194 if (wd_ops)
195 wd_ops->unreserve();
Andi Kleen09198e62007-05-02 19:27:20 +0200196
197 BUG_ON(atomic_read(&nmi_active) != 0);
198}
199
200void enable_lapic_nmi_watchdog(void)
201{
202 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
203
204 /* are we already enabled */
205 if (atomic_read(&nmi_active) != 0)
206 return;
207
208 /* are we lapic aware */
209 if (!wd_ops)
210 return;
211 if (!wd_ops->reserve()) {
212 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
213 return;
214 }
215
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
Andi Kleen09198e62007-05-02 19:27:20 +0200217 touch_nmi_watchdog();
218}
219
220/*
221 * Activate the NMI watchdog via the local APIC.
222 */
223
224static unsigned int adjust_for_32bit_ctr(unsigned int hz)
225{
226 u64 counter_val;
227 unsigned int retval = hz;
228
229 /*
230 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
231 * are writable, with higher bits sign extending from bit 31.
232 * So, we can only program the counter with 31 bit values and
233 * 32nd bit should be 1, for 33.. to be 1.
234 * Find the appropriate nmi_hz
235 */
236 counter_val = (u64)cpu_khz * 1000;
237 do_div(counter_val, retval);
238 if (counter_val > 0x7fffffffULL) {
239 u64 count = (u64)cpu_khz * 1000;
240 do_div(count, 0x7fffffffUL);
241 retval = count + 1;
242 }
243 return retval;
244}
245
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200246static void write_watchdog_counter(unsigned int perfctr_msr,
247 const char *descr, unsigned nmi_hz)
Andi Kleen09198e62007-05-02 19:27:20 +0200248{
249 u64 count = (u64)cpu_khz * 1000;
250
251 do_div(count, nmi_hz);
252 if(descr)
Thomas Gleixnercfc1b9a2008-07-21 21:35:38 +0200253 pr_debug("setting %s to -0x%08Lx\n", descr, count);
Andi Kleen09198e62007-05-02 19:27:20 +0200254 wrmsrl(perfctr_msr, 0 - count);
255}
256
257static void write_watchdog_counter32(unsigned int perfctr_msr,
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200258 const char *descr, unsigned nmi_hz)
Andi Kleen09198e62007-05-02 19:27:20 +0200259{
260 u64 count = (u64)cpu_khz * 1000;
261
262 do_div(count, nmi_hz);
263 if(descr)
Thomas Gleixnercfc1b9a2008-07-21 21:35:38 +0200264 pr_debug("setting %s to -0x%08Lx\n", descr, count);
Andi Kleen09198e62007-05-02 19:27:20 +0200265 wrmsr(perfctr_msr, (u32)(-count), 0);
266}
267
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200268/*
269 * AMD K7/K8/Family10h/Family11h support.
270 * AMD keeps this interface nicely stable so there is not much variety
271 */
Andi Kleen09198e62007-05-02 19:27:20 +0200272#define K7_EVNTSEL_ENABLE (1 << 22)
273#define K7_EVNTSEL_INT (1 << 20)
274#define K7_EVNTSEL_OS (1 << 17)
275#define K7_EVNTSEL_USR (1 << 16)
276#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
277#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
278
279static int setup_k7_watchdog(unsigned nmi_hz)
280{
281 unsigned int perfctr_msr, evntsel_msr;
282 unsigned int evntsel;
283 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
284
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200285 perfctr_msr = wd_ops->perfctr;
286 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200287
288 wrmsrl(perfctr_msr, 0UL);
289
290 evntsel = K7_EVNTSEL_INT
291 | K7_EVNTSEL_OS
292 | K7_EVNTSEL_USR
293 | K7_NMI_EVENT;
294
295 /* setup the timer */
296 wrmsr(evntsel_msr, evntsel, 0);
297 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
Aristeu Rozanskib3e15bd2008-09-22 13:13:59 -0400298
299 /* initialize the wd struct before enabling */
300 wd->perfctr_msr = perfctr_msr;
301 wd->evntsel_msr = evntsel_msr;
302 wd->cccr_msr = 0; /* unused */
303
304 /* ok, everything is initialized, announce that we're set */
305 cpu_nmi_set_wd_enabled();
306
Andi Kleen09198e62007-05-02 19:27:20 +0200307 apic_write(APIC_LVTPC, APIC_DM_NMI);
308 evntsel |= K7_EVNTSEL_ENABLE;
309 wrmsr(evntsel_msr, evntsel, 0);
310
Andi Kleen09198e62007-05-02 19:27:20 +0200311 return 1;
312}
313
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700314static void single_msr_stop_watchdog(void)
Andi Kleen09198e62007-05-02 19:27:20 +0200315{
316 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
317
318 wrmsr(wd->evntsel_msr, 0, 0);
319}
320
321static int single_msr_reserve(void)
322{
323 if (!reserve_perfctr_nmi(wd_ops->perfctr))
324 return 0;
325
326 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
327 release_perfctr_nmi(wd_ops->perfctr);
328 return 0;
329 }
330 return 1;
331}
332
333static void single_msr_unreserve(void)
334{
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700335 release_evntsel_nmi(wd_ops->evntsel);
336 release_perfctr_nmi(wd_ops->perfctr);
Andi Kleen09198e62007-05-02 19:27:20 +0200337}
338
339static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
340{
341 /* start the cycle over again */
342 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
343}
344
Jan Beulichd1e08472007-10-17 18:04:39 +0200345static const struct wd_ops k7_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200346 .reserve = single_msr_reserve,
347 .unreserve = single_msr_unreserve,
348 .setup = setup_k7_watchdog,
349 .rearm = single_msr_rearm,
350 .stop = single_msr_stop_watchdog,
351 .perfctr = MSR_K7_PERFCTR0,
352 .evntsel = MSR_K7_EVNTSEL0,
353 .checkbit = 1ULL << 47,
Andi Kleen09198e62007-05-02 19:27:20 +0200354};
355
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200356/*
357 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
358 */
Andi Kleen09198e62007-05-02 19:27:20 +0200359#define P6_EVNTSEL0_ENABLE (1 << 22)
360#define P6_EVNTSEL_INT (1 << 20)
361#define P6_EVNTSEL_OS (1 << 17)
362#define P6_EVNTSEL_USR (1 << 16)
363#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
364#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
365
366static int setup_p6_watchdog(unsigned nmi_hz)
367{
368 unsigned int perfctr_msr, evntsel_msr;
369 unsigned int evntsel;
370 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
371
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200372 perfctr_msr = wd_ops->perfctr;
373 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200374
Andi Kleen57c22f42007-07-22 11:12:39 +0200375 /* KVM doesn't implement this MSR */
376 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
377 return 0;
Andi Kleen09198e62007-05-02 19:27:20 +0200378
379 evntsel = P6_EVNTSEL_INT
380 | P6_EVNTSEL_OS
381 | P6_EVNTSEL_USR
382 | P6_NMI_EVENT;
383
384 /* setup the timer */
385 wrmsr(evntsel_msr, evntsel, 0);
386 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
387 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
Aristeu Rozanskib3e15bd2008-09-22 13:13:59 -0400388
389 /* initialize the wd struct before enabling */
390 wd->perfctr_msr = perfctr_msr;
391 wd->evntsel_msr = evntsel_msr;
392 wd->cccr_msr = 0; /* unused */
393
394 /* ok, everything is initialized, announce that we're set */
395 cpu_nmi_set_wd_enabled();
396
Andi Kleen09198e62007-05-02 19:27:20 +0200397 apic_write(APIC_LVTPC, APIC_DM_NMI);
398 evntsel |= P6_EVNTSEL0_ENABLE;
399 wrmsr(evntsel_msr, evntsel, 0);
400
Andi Kleen09198e62007-05-02 19:27:20 +0200401 return 1;
402}
403
404static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
405{
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200406 /*
407 * P6 based Pentium M need to re-unmask
Andi Kleen09198e62007-05-02 19:27:20 +0200408 * the apic vector but it doesn't hurt
409 * other P6 variant.
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200410 * ArchPerfom/Core Duo also needs this
411 */
Andi Kleen09198e62007-05-02 19:27:20 +0200412 apic_write(APIC_LVTPC, APIC_DM_NMI);
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200413
Andi Kleen09198e62007-05-02 19:27:20 +0200414 /* P6/ARCH_PERFMON has 32 bit counter write */
415 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
416}
417
Jan Beulichd1e08472007-10-17 18:04:39 +0200418static const struct wd_ops p6_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200419 .reserve = single_msr_reserve,
420 .unreserve = single_msr_unreserve,
421 .setup = setup_p6_watchdog,
422 .rearm = p6_rearm,
423 .stop = single_msr_stop_watchdog,
424 .perfctr = MSR_P6_PERFCTR0,
425 .evntsel = MSR_P6_EVNTSEL0,
426 .checkbit = 1ULL << 39,
Andi Kleen09198e62007-05-02 19:27:20 +0200427};
428
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200429/*
430 * Intel P4 performance counters.
431 * By far the most complicated of all.
432 */
433#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
434#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
435#define P4_ESCR_OS (1 << 3)
436#define P4_ESCR_USR (1 << 2)
437#define P4_CCCR_OVF_PMI0 (1 << 26)
438#define P4_CCCR_OVF_PMI1 (1 << 27)
439#define P4_CCCR_THRESHOLD(N) ((N) << 20)
440#define P4_CCCR_COMPLEMENT (1 << 19)
441#define P4_CCCR_COMPARE (1 << 18)
442#define P4_CCCR_REQUIRED (3 << 16)
443#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
444#define P4_CCCR_ENABLE (1 << 12)
445#define P4_CCCR_OVF (1 << 31)
Andi Kleen09198e62007-05-02 19:27:20 +0200446
Aristeu Rozanski28b166a2008-09-22 13:14:13 -0400447#define P4_CONTROLS 18
448static unsigned int p4_controls[18] = {
449 MSR_P4_BPU_CCCR0,
450 MSR_P4_BPU_CCCR1,
451 MSR_P4_BPU_CCCR2,
452 MSR_P4_BPU_CCCR3,
453 MSR_P4_MS_CCCR0,
454 MSR_P4_MS_CCCR1,
455 MSR_P4_MS_CCCR2,
456 MSR_P4_MS_CCCR3,
457 MSR_P4_FLAME_CCCR0,
458 MSR_P4_FLAME_CCCR1,
459 MSR_P4_FLAME_CCCR2,
460 MSR_P4_FLAME_CCCR3,
461 MSR_P4_IQ_CCCR0,
462 MSR_P4_IQ_CCCR1,
463 MSR_P4_IQ_CCCR2,
464 MSR_P4_IQ_CCCR3,
465 MSR_P4_IQ_CCCR4,
466 MSR_P4_IQ_CCCR5,
467};
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200468/*
469 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
470 * CRU_ESCR0 (with any non-null event selector) through a complemented
471 * max threshold. [IA32-Vol3, Section 14.9.9]
472 */
Andi Kleen09198e62007-05-02 19:27:20 +0200473static int setup_p4_watchdog(unsigned nmi_hz)
474{
475 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
476 unsigned int evntsel, cccr_val;
477 unsigned int misc_enable, dummy;
478 unsigned int ht_num;
479 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
480
481 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
482 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
483 return 0;
484
485#ifdef CONFIG_SMP
486 /* detect which hyperthread we are on */
487 if (smp_num_siblings == 2) {
488 unsigned int ebx, apicid;
489
490 ebx = cpuid_ebx(1);
491 apicid = (ebx >> 24) & 0xff;
492 ht_num = apicid & 1;
493 } else
494#endif
495 ht_num = 0;
496
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200497 /*
498 * performance counters are shared resources
Andi Kleen09198e62007-05-02 19:27:20 +0200499 * assign each hyperthread its own set
500 * (re-use the ESCR0 register, seems safe
501 * and keeps the cccr_val the same)
502 */
503 if (!ht_num) {
504 /* logical cpu 0 */
505 perfctr_msr = MSR_P4_IQ_PERFCTR0;
506 evntsel_msr = MSR_P4_CRU_ESCR0;
507 cccr_msr = MSR_P4_IQ_CCCR0;
508 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
Aristeu Rozanski28b166a2008-09-22 13:14:13 -0400509
510 /*
511 * If we're on the kdump kernel or other situation, we may
512 * still have other performance counter registers set to
513 * interrupt and they'll keep interrupting forever because
514 * of the P4_CCCR_OVF quirk. So we need to ACK all the
515 * pending interrupts and disable all the registers here,
516 * before reenabling the NMI delivery. Refer to p4_rearm()
517 * about the P4_CCCR_OVF quirk.
518 */
519 if (reset_devices) {
520 unsigned int low, high;
521 int i;
522
523 for (i = 0; i < P4_CONTROLS; i++) {
524 rdmsr(p4_controls[i], low, high);
525 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
526 wrmsr(p4_controls[i], low, high);
527 }
528 }
Andi Kleen09198e62007-05-02 19:27:20 +0200529 } else {
530 /* logical cpu 1 */
531 perfctr_msr = MSR_P4_IQ_PERFCTR1;
532 evntsel_msr = MSR_P4_CRU_ESCR0;
533 cccr_msr = MSR_P4_IQ_CCCR1;
Aristeu Rozanskidcc98412008-08-14 16:32:15 -0400534
535 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
536 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
537 cccr_val = P4_CCCR_OVF_PMI0;
538 else
539 cccr_val = P4_CCCR_OVF_PMI1;
540 cccr_val |= P4_CCCR_ESCR_SELECT(4);
Andi Kleen09198e62007-05-02 19:27:20 +0200541 }
542
543 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
544 | P4_ESCR_OS
545 | P4_ESCR_USR;
546
547 cccr_val |= P4_CCCR_THRESHOLD(15)
548 | P4_CCCR_COMPLEMENT
549 | P4_CCCR_COMPARE
550 | P4_CCCR_REQUIRED;
551
552 wrmsr(evntsel_msr, evntsel, 0);
553 wrmsr(cccr_msr, cccr_val, 0);
554 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
Aristeu Rozanskib3e15bd2008-09-22 13:13:59 -0400555
Andi Kleen09198e62007-05-02 19:27:20 +0200556 wd->perfctr_msr = perfctr_msr;
557 wd->evntsel_msr = evntsel_msr;
558 wd->cccr_msr = cccr_msr;
Aristeu Rozanskib3e15bd2008-09-22 13:13:59 -0400559
560 /* ok, everything is initialized, announce that we're set */
561 cpu_nmi_set_wd_enabled();
562
563 apic_write(APIC_LVTPC, APIC_DM_NMI);
564 cccr_val |= P4_CCCR_ENABLE;
565 wrmsr(cccr_msr, cccr_val, 0);
Andi Kleen09198e62007-05-02 19:27:20 +0200566 return 1;
567}
568
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700569static void stop_p4_watchdog(void)
Andi Kleen09198e62007-05-02 19:27:20 +0200570{
571 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
572 wrmsr(wd->cccr_msr, 0, 0);
573 wrmsr(wd->evntsel_msr, 0, 0);
574}
575
576static int p4_reserve(void)
577{
578 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
579 return 0;
580#ifdef CONFIG_SMP
581 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
582 goto fail1;
583#endif
584 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
585 goto fail2;
586 /* RED-PEN why is ESCR1 not reserved here? */
587 return 1;
588 fail2:
589#ifdef CONFIG_SMP
590 if (smp_num_siblings > 1)
591 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
592 fail1:
593#endif
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
595 return 0;
596}
597
598static void p4_unreserve(void)
599{
600#ifdef CONFIG_SMP
601 if (smp_num_siblings > 1)
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700602 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
Andi Kleen09198e62007-05-02 19:27:20 +0200603#endif
Björn Steinbrinkda88ba12007-06-16 10:16:04 -0700604 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
605 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
Andi Kleen09198e62007-05-02 19:27:20 +0200606}
607
608static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
609{
610 unsigned dummy;
611 /*
612 * P4 quirks:
613 * - An overflown perfctr will assert its interrupt
614 * until the OVF flag in its CCCR is cleared.
615 * - LVTPC is masked on interrupt and must be
616 * unmasked by the LVTPC handler.
617 */
618 rdmsrl(wd->cccr_msr, dummy);
619 dummy &= ~P4_CCCR_OVF;
620 wrmsrl(wd->cccr_msr, dummy);
621 apic_write(APIC_LVTPC, APIC_DM_NMI);
622 /* start the cycle over again */
623 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
624}
625
Jan Beulichd1e08472007-10-17 18:04:39 +0200626static const struct wd_ops p4_wd_ops = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200627 .reserve = p4_reserve,
628 .unreserve = p4_unreserve,
629 .setup = setup_p4_watchdog,
630 .rearm = p4_rearm,
631 .stop = stop_p4_watchdog,
Andi Kleen09198e62007-05-02 19:27:20 +0200632 /* RED-PEN this is wrong for the other sibling */
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200633 .perfctr = MSR_P4_BPU_PERFCTR0,
634 .evntsel = MSR_P4_BSU_ESCR0,
635 .checkbit = 1ULL << 39,
Andi Kleen09198e62007-05-02 19:27:20 +0200636};
637
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200638/*
639 * Watchdog using the Intel architected PerfMon.
640 * Used for Core2 and hopefully all future Intel CPUs.
641 */
Andi Kleen09198e62007-05-02 19:27:20 +0200642#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
643#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
644
Jan Beulichd1e08472007-10-17 18:04:39 +0200645static struct wd_ops intel_arch_wd_ops;
646
Andi Kleen09198e62007-05-02 19:27:20 +0200647static int setup_intel_arch_watchdog(unsigned nmi_hz)
648{
649 unsigned int ebx;
650 union cpuid10_eax eax;
651 unsigned int unused;
652 unsigned int perfctr_msr, evntsel_msr;
653 unsigned int evntsel;
654 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
655
656 /*
657 * Check whether the Architectural PerfMon supports
658 * Unhalted Core Cycles Event or not.
659 * NOTE: Corresponding bit = 0 in ebx indicates event present.
660 */
661 cpuid(10, &(eax.full), &ebx, &unused, &unused);
662 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
663 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
664 return 0;
665
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200666 perfctr_msr = wd_ops->perfctr;
667 evntsel_msr = wd_ops->evntsel;
Andi Kleen09198e62007-05-02 19:27:20 +0200668
669 wrmsrl(perfctr_msr, 0UL);
670
671 evntsel = ARCH_PERFMON_EVENTSEL_INT
672 | ARCH_PERFMON_EVENTSEL_OS
673 | ARCH_PERFMON_EVENTSEL_USR
674 | ARCH_PERFMON_NMI_EVENT_SEL
675 | ARCH_PERFMON_NMI_EVENT_UMASK;
676
677 /* setup the timer */
678 wrmsr(evntsel_msr, evntsel, 0);
679 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
680 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
Andi Kleen09198e62007-05-02 19:27:20 +0200681
682 wd->perfctr_msr = perfctr_msr;
683 wd->evntsel_msr = evntsel_msr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200684 wd->cccr_msr = 0; /* unused */
Aristeu Rozanskib3e15bd2008-09-22 13:13:59 -0400685
686 /* ok, everything is initialized, announce that we're set */
687 cpu_nmi_set_wd_enabled();
688
689 apic_write(APIC_LVTPC, APIC_DM_NMI);
690 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
691 wrmsr(evntsel_msr, evntsel, 0);
Jan Beulichd1e08472007-10-17 18:04:39 +0200692 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
Andi Kleen09198e62007-05-02 19:27:20 +0200693 return 1;
694}
695
Jan Beulichd1e08472007-10-17 18:04:39 +0200696static struct wd_ops intel_arch_wd_ops __read_mostly = {
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200697 .reserve = single_msr_reserve,
698 .unreserve = single_msr_unreserve,
699 .setup = setup_intel_arch_watchdog,
700 .rearm = p6_rearm,
701 .stop = single_msr_stop_watchdog,
702 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
703 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
Andi Kleen09198e62007-05-02 19:27:20 +0200704};
705
706static void probe_nmi_watchdog(void)
707{
708 switch (boot_cpu_data.x86_vendor) {
709 case X86_VENDOR_AMD:
710 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
711 boot_cpu_data.x86 != 16)
712 return;
713 wd_ops = &k7_wd_ops;
714 break;
715 case X86_VENDOR_INTEL:
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200716 /*
717 * Work around Core Duo (Yonah) errata AE49 where perfctr1
718 * doesn't have a working enable bit.
719 */
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200720 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
Jan Beulich86d78f62008-04-22 16:28:41 +0100721 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
722 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
Stephane Eranian23d5ea52007-09-06 16:59:51 +0200723 }
Andi Kleen09198e62007-05-02 19:27:20 +0200724 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
725 wd_ops = &intel_arch_wd_ops;
726 break;
727 }
728 switch (boot_cpu_data.x86) {
729 case 6:
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200730 if (boot_cpu_data.x86_model > 13)
Andi Kleen09198e62007-05-02 19:27:20 +0200731 return;
732
733 wd_ops = &p6_wd_ops;
734 break;
735 case 15:
Andi Kleen09198e62007-05-02 19:27:20 +0200736 wd_ops = &p4_wd_ops;
737 break;
738 default:
739 return;
740 }
741 break;
742 }
743}
744
745/* Interface to nmi.c */
746
747int lapic_watchdog_init(unsigned nmi_hz)
748{
749 if (!wd_ops) {
750 probe_nmi_watchdog();
Ingo Molnar9c9b81f2008-03-27 23:39:42 +0100751 if (!wd_ops) {
752 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
Andi Kleen09198e62007-05-02 19:27:20 +0200753 return -1;
Ingo Molnar9c9b81f2008-03-27 23:39:42 +0100754 }
Björn Steinbrinkfaa4cfa2007-06-16 10:15:55 -0700755
756 if (!wd_ops->reserve()) {
757 printk(KERN_ERR
758 "NMI watchdog: cannot reserve perfctrs\n");
759 return -1;
760 }
Andi Kleen09198e62007-05-02 19:27:20 +0200761 }
762
763 if (!(wd_ops->setup(nmi_hz))) {
764 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
765 raw_smp_processor_id());
766 return -1;
767 }
768
769 return 0;
770}
771
772void lapic_watchdog_stop(void)
773{
774 if (wd_ops)
Björn Steinbrink54c6ed72007-06-16 10:15:56 -0700775 wd_ops->stop();
Andi Kleen09198e62007-05-02 19:27:20 +0200776}
777
778unsigned lapic_adjust_nmi_hz(unsigned hz)
779{
780 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
781 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
782 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
783 hz = adjust_for_32bit_ctr(hz);
784 return hz;
785}
786
787int lapic_wd_event(unsigned nmi_hz)
788{
789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
790 u64 ctr;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200791
Andi Kleen09198e62007-05-02 19:27:20 +0200792 rdmsrl(wd->perfctr_msr, ctr);
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200793 if (ctr & wd_ops->checkbit) /* perfctr still running? */
Andi Kleen09198e62007-05-02 19:27:20 +0200794 return 0;
Cyrill Gorcunov47a486c2008-06-24 22:52:03 +0200795
Andi Kleen09198e62007-05-02 19:27:20 +0200796 wd_ops->rearm(wd, nmi_hz);
797 return 1;
798}
799
800int lapic_watchdog_ok(void)
801{
802 return wd_ops != NULL;
803}