| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 1 | /* local apic based NMI watchdog for various CPUs. | 
 | 2 |    This file also handles reservation of performance counters for coordination | 
 | 3 |    with other users (like oprofile). | 
 | 4 |  | 
 | 5 |    Note that these events normally don't tick when the CPU idles. This means | 
 | 6 |    the frequency varies with CPU load. | 
 | 7 |  | 
 | 8 |    Original code for K7/P6 written by Keith Owens */ | 
 | 9 |  | 
 | 10 | #include <linux/percpu.h> | 
 | 11 | #include <linux/module.h> | 
 | 12 | #include <linux/kernel.h> | 
 | 13 | #include <linux/bitops.h> | 
 | 14 | #include <linux/smp.h> | 
 | 15 | #include <linux/nmi.h> | 
 | 16 | #include <asm/apic.h> | 
 | 17 | #include <asm/intel_arch_perfmon.h> | 
 | 18 |  | 
 | 19 | struct nmi_watchdog_ctlblk { | 
 | 20 | 	unsigned int cccr_msr; | 
 | 21 | 	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */ | 
 | 22 | 	unsigned int evntsel_msr;  /* the MSR to select the events to handle */ | 
 | 23 | }; | 
 | 24 |  | 
 | 25 | /* Interface defining a CPU specific perfctr watchdog */ | 
 | 26 | struct wd_ops { | 
 | 27 | 	int (*reserve)(void); | 
 | 28 | 	void (*unreserve)(void); | 
 | 29 | 	int (*setup)(unsigned nmi_hz); | 
 | 30 | 	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | 
| Björn Steinbrink | 54c6ed7 | 2007-06-16 10:15:56 -0700 | [diff] [blame] | 31 | 	void (*stop)(void); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 32 | 	unsigned perfctr; | 
 | 33 | 	unsigned evntsel; | 
 | 34 | 	u64 checkbit; | 
 | 35 | }; | 
 | 36 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 37 | static const struct wd_ops *wd_ops; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 38 |  | 
 | 39 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 
 | 40 |  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now) | 
 | 41 |  */ | 
 | 42 | #define NMI_MAX_COUNTER_BITS 66 | 
 | 43 |  | 
 | 44 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | 
 | 45 |  * evtsel_nmi_owner tracks the ownership of the event selection | 
 | 46 |  * - different performance counters/ event selection may be reserved for | 
 | 47 |  *   different subsystems this reservation system just tries to coordinate | 
 | 48 |  *   things a little | 
 | 49 |  */ | 
 | 50 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | 
 | 51 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | 
 | 52 |  | 
 | 53 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | 
 | 54 |  | 
 | 55 | /* converts an msr to an appropriate reservation bit */ | 
 | 56 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | 
 | 57 | { | 
| Andi Kleen | 5dcccd8 | 2007-07-04 01:38:13 +0200 | [diff] [blame] | 58 | 	/* returns the bit offset of the performance counter register */ | 
 | 59 | 	switch (boot_cpu_data.x86_vendor) { | 
 | 60 | 	case X86_VENDOR_AMD: | 
 | 61 | 		return (msr - MSR_K7_PERFCTR0); | 
 | 62 | 	case X86_VENDOR_INTEL: | 
 | 63 | 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 
 | 64 | 			return (msr - MSR_ARCH_PERFMON_PERFCTR0); | 
 | 65 |  | 
 | 66 | 		switch (boot_cpu_data.x86) { | 
 | 67 | 		case 6: | 
 | 68 | 			return (msr - MSR_P6_PERFCTR0); | 
 | 69 | 		case 15: | 
 | 70 | 			return (msr - MSR_P4_BPU_PERFCTR0); | 
 | 71 | 		} | 
 | 72 | 	} | 
 | 73 | 	return 0; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 74 | } | 
 | 75 |  | 
 | 76 | /* converts an msr to an appropriate reservation bit */ | 
 | 77 | /* returns the bit offset of the event selection register */ | 
 | 78 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | 
 | 79 | { | 
| Andi Kleen | 5dcccd8 | 2007-07-04 01:38:13 +0200 | [diff] [blame] | 80 | 	/* returns the bit offset of the event selection register */ | 
 | 81 | 	switch (boot_cpu_data.x86_vendor) { | 
 | 82 | 	case X86_VENDOR_AMD: | 
 | 83 | 		return (msr - MSR_K7_EVNTSEL0); | 
 | 84 | 	case X86_VENDOR_INTEL: | 
 | 85 | 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 
 | 86 | 			return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | 
 | 87 |  | 
 | 88 | 		switch (boot_cpu_data.x86) { | 
 | 89 | 		case 6: | 
 | 90 | 			return (msr - MSR_P6_EVNTSEL0); | 
 | 91 | 		case 15: | 
 | 92 | 			return (msr - MSR_P4_BSU_ESCR0); | 
 | 93 | 		} | 
 | 94 | 	} | 
 | 95 | 	return 0; | 
 | 96 |  | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 97 | } | 
 | 98 |  | 
 | 99 | /* checks for a bit availability (hack for oprofile) */ | 
 | 100 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | 
 | 101 | { | 
 | 102 | 	BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 
 | 103 |  | 
 | 104 | 	return (!test_bit(counter, perfctr_nmi_owner)); | 
 | 105 | } | 
 | 106 |  | 
 | 107 | /* checks the an msr for availability */ | 
 | 108 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | 
 | 109 | { | 
 | 110 | 	unsigned int counter; | 
 | 111 |  | 
 | 112 | 	counter = nmi_perfctr_msr_to_bit(msr); | 
 | 113 | 	BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 
 | 114 |  | 
 | 115 | 	return (!test_bit(counter, perfctr_nmi_owner)); | 
 | 116 | } | 
 | 117 |  | 
 | 118 | int reserve_perfctr_nmi(unsigned int msr) | 
 | 119 | { | 
 | 120 | 	unsigned int counter; | 
 | 121 |  | 
 | 122 | 	counter = nmi_perfctr_msr_to_bit(msr); | 
| Stephane Eranian | 124d395 | 2007-10-19 20:35:04 +0200 | [diff] [blame] | 123 | 	/* register not managed by the allocator? */ | 
 | 124 | 	if (counter > NMI_MAX_COUNTER_BITS) | 
 | 125 | 		return 1; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 126 |  | 
 | 127 | 	if (!test_and_set_bit(counter, perfctr_nmi_owner)) | 
 | 128 | 		return 1; | 
 | 129 | 	return 0; | 
 | 130 | } | 
 | 131 |  | 
 | 132 | void release_perfctr_nmi(unsigned int msr) | 
 | 133 | { | 
 | 134 | 	unsigned int counter; | 
 | 135 |  | 
 | 136 | 	counter = nmi_perfctr_msr_to_bit(msr); | 
| Stephane Eranian | 124d395 | 2007-10-19 20:35:04 +0200 | [diff] [blame] | 137 | 	/* register not managed by the allocator? */ | 
 | 138 | 	if (counter > NMI_MAX_COUNTER_BITS) | 
 | 139 | 		return; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 140 |  | 
 | 141 | 	clear_bit(counter, perfctr_nmi_owner); | 
 | 142 | } | 
 | 143 |  | 
 | 144 | int reserve_evntsel_nmi(unsigned int msr) | 
 | 145 | { | 
 | 146 | 	unsigned int counter; | 
 | 147 |  | 
 | 148 | 	counter = nmi_evntsel_msr_to_bit(msr); | 
| Stephane Eranian | 124d395 | 2007-10-19 20:35:04 +0200 | [diff] [blame] | 149 | 	/* register not managed by the allocator? */ | 
 | 150 | 	if (counter > NMI_MAX_COUNTER_BITS) | 
 | 151 | 		return 1; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 152 |  | 
 | 153 | 	if (!test_and_set_bit(counter, evntsel_nmi_owner)) | 
 | 154 | 		return 1; | 
 | 155 | 	return 0; | 
 | 156 | } | 
 | 157 |  | 
 | 158 | void release_evntsel_nmi(unsigned int msr) | 
 | 159 | { | 
 | 160 | 	unsigned int counter; | 
 | 161 |  | 
 | 162 | 	counter = nmi_evntsel_msr_to_bit(msr); | 
| Stephane Eranian | 124d395 | 2007-10-19 20:35:04 +0200 | [diff] [blame] | 163 | 	/* register not managed by the allocator? */ | 
 | 164 | 	if (counter > NMI_MAX_COUNTER_BITS) | 
 | 165 | 		return; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 166 |  | 
 | 167 | 	clear_bit(counter, evntsel_nmi_owner); | 
 | 168 | } | 
 | 169 |  | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 170 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 
 | 171 | EXPORT_SYMBOL(reserve_perfctr_nmi); | 
 | 172 | EXPORT_SYMBOL(release_perfctr_nmi); | 
 | 173 | EXPORT_SYMBOL(reserve_evntsel_nmi); | 
 | 174 | EXPORT_SYMBOL(release_evntsel_nmi); | 
 | 175 |  | 
 | 176 | void disable_lapic_nmi_watchdog(void) | 
 | 177 | { | 
 | 178 | 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | 
 | 179 |  | 
 | 180 | 	if (atomic_read(&nmi_active) <= 0) | 
 | 181 | 		return; | 
 | 182 |  | 
| Björn Steinbrink | 54c6ed7 | 2007-06-16 10:15:56 -0700 | [diff] [blame] | 183 | 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 184 | 	wd_ops->unreserve(); | 
 | 185 |  | 
 | 186 | 	BUG_ON(atomic_read(&nmi_active) != 0); | 
 | 187 | } | 
 | 188 |  | 
 | 189 | void enable_lapic_nmi_watchdog(void) | 
 | 190 | { | 
 | 191 | 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | 
 | 192 |  | 
 | 193 | 	/* are we already enabled */ | 
 | 194 | 	if (atomic_read(&nmi_active) != 0) | 
 | 195 | 		return; | 
 | 196 |  | 
 | 197 | 	/* are we lapic aware */ | 
 | 198 | 	if (!wd_ops) | 
 | 199 | 		return; | 
 | 200 | 	if (!wd_ops->reserve()) { | 
 | 201 | 		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | 
 | 202 | 		return; | 
 | 203 | 	} | 
 | 204 |  | 
 | 205 | 	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | 
 | 206 | 	touch_nmi_watchdog(); | 
 | 207 | } | 
 | 208 |  | 
 | 209 | /* | 
 | 210 |  * Activate the NMI watchdog via the local APIC. | 
 | 211 |  */ | 
 | 212 |  | 
 | 213 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | 
 | 214 | { | 
 | 215 | 	u64 counter_val; | 
 | 216 | 	unsigned int retval = hz; | 
 | 217 |  | 
 | 218 | 	/* | 
 | 219 | 	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | 
 | 220 | 	 * are writable, with higher bits sign extending from bit 31. | 
 | 221 | 	 * So, we can only program the counter with 31 bit values and | 
 | 222 | 	 * 32nd bit should be 1, for 33.. to be 1. | 
 | 223 | 	 * Find the appropriate nmi_hz | 
 | 224 | 	 */ | 
 | 225 | 	counter_val = (u64)cpu_khz * 1000; | 
 | 226 | 	do_div(counter_val, retval); | 
 | 227 |  	if (counter_val > 0x7fffffffULL) { | 
 | 228 | 		u64 count = (u64)cpu_khz * 1000; | 
 | 229 | 		do_div(count, 0x7fffffffUL); | 
 | 230 | 		retval = count + 1; | 
 | 231 | 	} | 
 | 232 | 	return retval; | 
 | 233 | } | 
 | 234 |  | 
 | 235 | static void | 
 | 236 | write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) | 
 | 237 | { | 
 | 238 | 	u64 count = (u64)cpu_khz * 1000; | 
 | 239 |  | 
 | 240 | 	do_div(count, nmi_hz); | 
 | 241 | 	if(descr) | 
 | 242 | 		Dprintk("setting %s to -0x%08Lx\n", descr, count); | 
 | 243 | 	wrmsrl(perfctr_msr, 0 - count); | 
 | 244 | } | 
 | 245 |  | 
 | 246 | static void write_watchdog_counter32(unsigned int perfctr_msr, | 
 | 247 | 		const char *descr, unsigned nmi_hz) | 
 | 248 | { | 
 | 249 | 	u64 count = (u64)cpu_khz * 1000; | 
 | 250 |  | 
 | 251 | 	do_div(count, nmi_hz); | 
 | 252 | 	if(descr) | 
 | 253 | 		Dprintk("setting %s to -0x%08Lx\n", descr, count); | 
 | 254 | 	wrmsr(perfctr_msr, (u32)(-count), 0); | 
 | 255 | } | 
 | 256 |  | 
 | 257 | /* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface | 
 | 258 |    nicely stable so there is not much variety */ | 
 | 259 |  | 
 | 260 | #define K7_EVNTSEL_ENABLE	(1 << 22) | 
 | 261 | #define K7_EVNTSEL_INT		(1 << 20) | 
 | 262 | #define K7_EVNTSEL_OS		(1 << 17) | 
 | 263 | #define K7_EVNTSEL_USR		(1 << 16) | 
 | 264 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76 | 
 | 265 | #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | 
 | 266 |  | 
 | 267 | static int setup_k7_watchdog(unsigned nmi_hz) | 
 | 268 | { | 
 | 269 | 	unsigned int perfctr_msr, evntsel_msr; | 
 | 270 | 	unsigned int evntsel; | 
 | 271 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 272 |  | 
| Stephane Eranian | 23d5ea5 | 2007-09-06 16:59:51 +0200 | [diff] [blame] | 273 | 	perfctr_msr = wd_ops->perfctr; | 
 | 274 | 	evntsel_msr = wd_ops->evntsel; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 275 |  | 
 | 276 | 	wrmsrl(perfctr_msr, 0UL); | 
 | 277 |  | 
 | 278 | 	evntsel = K7_EVNTSEL_INT | 
 | 279 | 		| K7_EVNTSEL_OS | 
 | 280 | 		| K7_EVNTSEL_USR | 
 | 281 | 		| K7_NMI_EVENT; | 
 | 282 |  | 
 | 283 | 	/* setup the timer */ | 
 | 284 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 285 | 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 
 | 286 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 287 | 	evntsel |= K7_EVNTSEL_ENABLE; | 
 | 288 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 289 |  | 
 | 290 | 	wd->perfctr_msr = perfctr_msr; | 
 | 291 | 	wd->evntsel_msr = evntsel_msr; | 
 | 292 | 	wd->cccr_msr = 0;  //unused | 
 | 293 | 	return 1; | 
 | 294 | } | 
 | 295 |  | 
| Björn Steinbrink | 54c6ed7 | 2007-06-16 10:15:56 -0700 | [diff] [blame] | 296 | static void single_msr_stop_watchdog(void) | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 297 | { | 
 | 298 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 299 |  | 
 | 300 | 	wrmsr(wd->evntsel_msr, 0, 0); | 
 | 301 | } | 
 | 302 |  | 
 | 303 | static int single_msr_reserve(void) | 
 | 304 | { | 
 | 305 | 	if (!reserve_perfctr_nmi(wd_ops->perfctr)) | 
 | 306 | 		return 0; | 
 | 307 |  | 
 | 308 | 	if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | 
 | 309 | 		release_perfctr_nmi(wd_ops->perfctr); | 
 | 310 | 		return 0; | 
 | 311 | 	} | 
 | 312 | 	return 1; | 
 | 313 | } | 
 | 314 |  | 
 | 315 | static void single_msr_unreserve(void) | 
 | 316 | { | 
| Björn Steinbrink | da88ba1 | 2007-06-16 10:16:04 -0700 | [diff] [blame] | 317 | 	release_evntsel_nmi(wd_ops->evntsel); | 
 | 318 | 	release_perfctr_nmi(wd_ops->perfctr); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 319 | } | 
 | 320 |  | 
 | 321 | static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 
 | 322 | { | 
 | 323 | 	/* start the cycle over again */ | 
 | 324 | 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | 
 | 325 | } | 
 | 326 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 327 | static const struct wd_ops k7_wd_ops = { | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 328 | 	.reserve = single_msr_reserve, | 
 | 329 | 	.unreserve = single_msr_unreserve, | 
 | 330 | 	.setup = setup_k7_watchdog, | 
 | 331 | 	.rearm = single_msr_rearm, | 
 | 332 | 	.stop = single_msr_stop_watchdog, | 
 | 333 | 	.perfctr = MSR_K7_PERFCTR0, | 
 | 334 | 	.evntsel = MSR_K7_EVNTSEL0, | 
| Björn Steinbrink | a284b05 | 2007-07-22 11:12:41 +0200 | [diff] [blame] | 335 | 	.checkbit = 1ULL<<47, | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 336 | }; | 
 | 337 |  | 
 | 338 | /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ | 
 | 339 |  | 
 | 340 | #define P6_EVNTSEL0_ENABLE	(1 << 22) | 
 | 341 | #define P6_EVNTSEL_INT		(1 << 20) | 
 | 342 | #define P6_EVNTSEL_OS		(1 << 17) | 
 | 343 | #define P6_EVNTSEL_USR		(1 << 16) | 
 | 344 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79 | 
 | 345 | #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED | 
 | 346 |  | 
 | 347 | static int setup_p6_watchdog(unsigned nmi_hz) | 
 | 348 | { | 
 | 349 | 	unsigned int perfctr_msr, evntsel_msr; | 
 | 350 | 	unsigned int evntsel; | 
 | 351 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 352 |  | 
| Stephane Eranian | 23d5ea5 | 2007-09-06 16:59:51 +0200 | [diff] [blame] | 353 | 	perfctr_msr = wd_ops->perfctr; | 
 | 354 | 	evntsel_msr = wd_ops->evntsel; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 355 |  | 
| Andi Kleen | 57c22f4 | 2007-07-22 11:12:39 +0200 | [diff] [blame] | 356 | 	/* KVM doesn't implement this MSR */ | 
 | 357 | 	if (wrmsr_safe(perfctr_msr, 0, 0) < 0) | 
 | 358 | 		return 0; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 359 |  | 
 | 360 | 	evntsel = P6_EVNTSEL_INT | 
 | 361 | 		| P6_EVNTSEL_OS | 
 | 362 | 		| P6_EVNTSEL_USR | 
 | 363 | 		| P6_NMI_EVENT; | 
 | 364 |  | 
 | 365 | 	/* setup the timer */ | 
 | 366 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 367 | 	nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 
 | 368 | 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 
 | 369 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 370 | 	evntsel |= P6_EVNTSEL0_ENABLE; | 
 | 371 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 372 |  | 
 | 373 | 	wd->perfctr_msr = perfctr_msr; | 
 | 374 | 	wd->evntsel_msr = evntsel_msr; | 
 | 375 | 	wd->cccr_msr = 0;  //unused | 
 | 376 | 	return 1; | 
 | 377 | } | 
 | 378 |  | 
 | 379 | static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 
 | 380 | { | 
 | 381 | 	/* P6 based Pentium M need to re-unmask | 
 | 382 | 	 * the apic vector but it doesn't hurt | 
 | 383 | 	 * other P6 variant. | 
 | 384 | 	 * ArchPerfom/Core Duo also needs this */ | 
 | 385 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 386 | 	/* P6/ARCH_PERFMON has 32 bit counter write */ | 
 | 387 | 	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); | 
 | 388 | } | 
 | 389 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 390 | static const struct wd_ops p6_wd_ops = { | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 391 | 	.reserve = single_msr_reserve, | 
 | 392 | 	.unreserve = single_msr_unreserve, | 
 | 393 | 	.setup = setup_p6_watchdog, | 
 | 394 | 	.rearm = p6_rearm, | 
 | 395 | 	.stop = single_msr_stop_watchdog, | 
 | 396 | 	.perfctr = MSR_P6_PERFCTR0, | 
 | 397 | 	.evntsel = MSR_P6_EVNTSEL0, | 
 | 398 | 	.checkbit = 1ULL<<39, | 
 | 399 | }; | 
 | 400 |  | 
 | 401 | /* Intel P4 performance counters. By far the most complicated of all. */ | 
 | 402 |  | 
 | 403 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7) | 
 | 404 | #define P4_ESCR_EVENT_SELECT(N)	((N)<<25) | 
 | 405 | #define P4_ESCR_OS		(1<<3) | 
 | 406 | #define P4_ESCR_USR		(1<<2) | 
 | 407 | #define P4_CCCR_OVF_PMI0	(1<<26) | 
 | 408 | #define P4_CCCR_OVF_PMI1	(1<<27) | 
 | 409 | #define P4_CCCR_THRESHOLD(N)	((N)<<20) | 
 | 410 | #define P4_CCCR_COMPLEMENT	(1<<19) | 
 | 411 | #define P4_CCCR_COMPARE		(1<<18) | 
 | 412 | #define P4_CCCR_REQUIRED	(3<<16) | 
 | 413 | #define P4_CCCR_ESCR_SELECT(N)	((N)<<13) | 
 | 414 | #define P4_CCCR_ENABLE		(1<<12) | 
 | 415 | #define P4_CCCR_OVF 		(1<<31) | 
 | 416 |  | 
 | 417 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 
 | 418 |    CRU_ESCR0 (with any non-null event selector) through a complemented | 
 | 419 |    max threshold. [IA32-Vol3, Section 14.9.9] */ | 
 | 420 |  | 
 | 421 | static int setup_p4_watchdog(unsigned nmi_hz) | 
 | 422 | { | 
 | 423 | 	unsigned int perfctr_msr, evntsel_msr, cccr_msr; | 
 | 424 | 	unsigned int evntsel, cccr_val; | 
 | 425 | 	unsigned int misc_enable, dummy; | 
 | 426 | 	unsigned int ht_num; | 
 | 427 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 428 |  | 
 | 429 | 	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | 
 | 430 | 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 
 | 431 | 		return 0; | 
 | 432 |  | 
 | 433 | #ifdef CONFIG_SMP | 
 | 434 | 	/* detect which hyperthread we are on */ | 
 | 435 | 	if (smp_num_siblings == 2) { | 
 | 436 | 		unsigned int ebx, apicid; | 
 | 437 |  | 
 | 438 |         	ebx = cpuid_ebx(1); | 
 | 439 | 	        apicid = (ebx >> 24) & 0xff; | 
 | 440 |         	ht_num = apicid & 1; | 
 | 441 | 	} else | 
 | 442 | #endif | 
 | 443 | 		ht_num = 0; | 
 | 444 |  | 
 | 445 | 	/* performance counters are shared resources | 
 | 446 | 	 * assign each hyperthread its own set | 
 | 447 | 	 * (re-use the ESCR0 register, seems safe | 
 | 448 | 	 * and keeps the cccr_val the same) | 
 | 449 | 	 */ | 
 | 450 | 	if (!ht_num) { | 
 | 451 | 		/* logical cpu 0 */ | 
 | 452 | 		perfctr_msr = MSR_P4_IQ_PERFCTR0; | 
 | 453 | 		evntsel_msr = MSR_P4_CRU_ESCR0; | 
 | 454 | 		cccr_msr = MSR_P4_IQ_CCCR0; | 
 | 455 | 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | 
 | 456 | 	} else { | 
 | 457 | 		/* logical cpu 1 */ | 
 | 458 | 		perfctr_msr = MSR_P4_IQ_PERFCTR1; | 
 | 459 | 		evntsel_msr = MSR_P4_CRU_ESCR0; | 
 | 460 | 		cccr_msr = MSR_P4_IQ_CCCR1; | 
 | 461 | 		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | 
 | 462 | 	} | 
 | 463 |  | 
 | 464 | 	evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 
 | 465 | 	 	| P4_ESCR_OS | 
 | 466 | 		| P4_ESCR_USR; | 
 | 467 |  | 
 | 468 | 	cccr_val |= P4_CCCR_THRESHOLD(15) | 
 | 469 | 		 | P4_CCCR_COMPLEMENT | 
 | 470 | 		 | P4_CCCR_COMPARE | 
 | 471 | 		 | P4_CCCR_REQUIRED; | 
 | 472 |  | 
 | 473 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 474 | 	wrmsr(cccr_msr, cccr_val, 0); | 
 | 475 | 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | 
 | 476 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 477 | 	cccr_val |= P4_CCCR_ENABLE; | 
 | 478 | 	wrmsr(cccr_msr, cccr_val, 0); | 
 | 479 | 	wd->perfctr_msr = perfctr_msr; | 
 | 480 | 	wd->evntsel_msr = evntsel_msr; | 
 | 481 | 	wd->cccr_msr = cccr_msr; | 
 | 482 | 	return 1; | 
 | 483 | } | 
 | 484 |  | 
| Björn Steinbrink | 54c6ed7 | 2007-06-16 10:15:56 -0700 | [diff] [blame] | 485 | static void stop_p4_watchdog(void) | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 486 | { | 
 | 487 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 488 | 	wrmsr(wd->cccr_msr, 0, 0); | 
 | 489 | 	wrmsr(wd->evntsel_msr, 0, 0); | 
 | 490 | } | 
 | 491 |  | 
 | 492 | static int p4_reserve(void) | 
 | 493 | { | 
 | 494 | 	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | 
 | 495 | 		return 0; | 
 | 496 | #ifdef CONFIG_SMP | 
 | 497 | 	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | 
 | 498 | 		goto fail1; | 
 | 499 | #endif | 
 | 500 | 	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | 
 | 501 | 		goto fail2; | 
 | 502 | 	/* RED-PEN why is ESCR1 not reserved here? */ | 
 | 503 | 	return 1; | 
 | 504 |  fail2: | 
 | 505 | #ifdef CONFIG_SMP | 
 | 506 | 	if (smp_num_siblings > 1) | 
 | 507 | 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | 
 | 508 |  fail1: | 
 | 509 | #endif | 
 | 510 | 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | 
 | 511 | 	return 0; | 
 | 512 | } | 
 | 513 |  | 
 | 514 | static void p4_unreserve(void) | 
 | 515 | { | 
 | 516 | #ifdef CONFIG_SMP | 
 | 517 | 	if (smp_num_siblings > 1) | 
| Björn Steinbrink | da88ba1 | 2007-06-16 10:16:04 -0700 | [diff] [blame] | 518 | 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 519 | #endif | 
| Björn Steinbrink | da88ba1 | 2007-06-16 10:16:04 -0700 | [diff] [blame] | 520 | 	release_evntsel_nmi(MSR_P4_CRU_ESCR0); | 
 | 521 | 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 522 | } | 
 | 523 |  | 
 | 524 | static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 
 | 525 | { | 
 | 526 | 	unsigned dummy; | 
 | 527 | 	/* | 
 | 528 |  	 * P4 quirks: | 
 | 529 | 	 * - An overflown perfctr will assert its interrupt | 
 | 530 | 	 *   until the OVF flag in its CCCR is cleared. | 
 | 531 | 	 * - LVTPC is masked on interrupt and must be | 
 | 532 | 	 *   unmasked by the LVTPC handler. | 
 | 533 | 	 */ | 
 | 534 | 	rdmsrl(wd->cccr_msr, dummy); | 
 | 535 | 	dummy &= ~P4_CCCR_OVF; | 
 | 536 | 	wrmsrl(wd->cccr_msr, dummy); | 
 | 537 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 538 | 	/* start the cycle over again */ | 
 | 539 | 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | 
 | 540 | } | 
 | 541 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 542 | static const struct wd_ops p4_wd_ops = { | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 543 | 	.reserve = p4_reserve, | 
 | 544 | 	.unreserve = p4_unreserve, | 
 | 545 | 	.setup = setup_p4_watchdog, | 
 | 546 | 	.rearm = p4_rearm, | 
 | 547 | 	.stop = stop_p4_watchdog, | 
 | 548 | 	/* RED-PEN this is wrong for the other sibling */ | 
 | 549 | 	.perfctr = MSR_P4_BPU_PERFCTR0, | 
 | 550 | 	.evntsel = MSR_P4_BSU_ESCR0, | 
 | 551 | 	.checkbit = 1ULL<<39, | 
 | 552 | }; | 
 | 553 |  | 
 | 554 | /* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully | 
 | 555 |    all future Intel CPUs. */ | 
 | 556 |  | 
 | 557 | #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 
 | 558 | #define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 
 | 559 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 560 | static struct wd_ops intel_arch_wd_ops; | 
 | 561 |  | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 562 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | 
 | 563 | { | 
 | 564 | 	unsigned int ebx; | 
 | 565 | 	union cpuid10_eax eax; | 
 | 566 | 	unsigned int unused; | 
 | 567 | 	unsigned int perfctr_msr, evntsel_msr; | 
 | 568 | 	unsigned int evntsel; | 
 | 569 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 570 |  | 
 | 571 | 	/* | 
 | 572 | 	 * Check whether the Architectural PerfMon supports | 
 | 573 | 	 * Unhalted Core Cycles Event or not. | 
 | 574 | 	 * NOTE: Corresponding bit = 0 in ebx indicates event present. | 
 | 575 | 	 */ | 
 | 576 | 	cpuid(10, &(eax.full), &ebx, &unused, &unused); | 
 | 577 | 	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | 
 | 578 | 	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 
 | 579 | 		return 0; | 
 | 580 |  | 
| Stephane Eranian | 23d5ea5 | 2007-09-06 16:59:51 +0200 | [diff] [blame] | 581 | 	perfctr_msr = wd_ops->perfctr; | 
 | 582 | 	evntsel_msr = wd_ops->evntsel; | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 583 |  | 
 | 584 | 	wrmsrl(perfctr_msr, 0UL); | 
 | 585 |  | 
 | 586 | 	evntsel = ARCH_PERFMON_EVENTSEL_INT | 
 | 587 | 		| ARCH_PERFMON_EVENTSEL_OS | 
 | 588 | 		| ARCH_PERFMON_EVENTSEL_USR | 
 | 589 | 		| ARCH_PERFMON_NMI_EVENT_SEL | 
 | 590 | 		| ARCH_PERFMON_NMI_EVENT_UMASK; | 
 | 591 |  | 
 | 592 | 	/* setup the timer */ | 
 | 593 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 594 | 	nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 
 | 595 | 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | 
 | 596 | 	apic_write(APIC_LVTPC, APIC_DM_NMI); | 
 | 597 | 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 
 | 598 | 	wrmsr(evntsel_msr, evntsel, 0); | 
 | 599 |  | 
 | 600 | 	wd->perfctr_msr = perfctr_msr; | 
 | 601 | 	wd->evntsel_msr = evntsel_msr; | 
 | 602 | 	wd->cccr_msr = 0;  //unused | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 603 | 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 604 | 	return 1; | 
 | 605 | } | 
 | 606 |  | 
| Jan Beulich | d1e0847 | 2007-10-17 18:04:39 +0200 | [diff] [blame] | 607 | static struct wd_ops intel_arch_wd_ops __read_mostly = { | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 608 | 	.reserve = single_msr_reserve, | 
 | 609 | 	.unreserve = single_msr_unreserve, | 
 | 610 | 	.setup = setup_intel_arch_watchdog, | 
 | 611 | 	.rearm = p6_rearm, | 
 | 612 | 	.stop = single_msr_stop_watchdog, | 
| Björn Steinbrink | e82f64e | 2007-07-21 17:10:06 +0200 | [diff] [blame] | 613 | 	.perfctr = MSR_ARCH_PERFMON_PERFCTR1, | 
 | 614 | 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1, | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 615 | }; | 
 | 616 |  | 
| Stephane Eranian | 23d5ea5 | 2007-09-06 16:59:51 +0200 | [diff] [blame] | 617 | static struct wd_ops coreduo_wd_ops = { | 
 | 618 | 	.reserve = single_msr_reserve, | 
 | 619 | 	.unreserve = single_msr_unreserve, | 
 | 620 | 	.setup = setup_intel_arch_watchdog, | 
 | 621 | 	.rearm = p6_rearm, | 
 | 622 | 	.stop = single_msr_stop_watchdog, | 
 | 623 | 	.perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 
 | 624 | 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0, | 
 | 625 | }; | 
 | 626 |  | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 627 | static void probe_nmi_watchdog(void) | 
 | 628 | { | 
 | 629 | 	switch (boot_cpu_data.x86_vendor) { | 
 | 630 | 	case X86_VENDOR_AMD: | 
 | 631 | 		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 
 | 632 | 		    boot_cpu_data.x86 != 16) | 
 | 633 | 			return; | 
 | 634 | 		wd_ops = &k7_wd_ops; | 
 | 635 | 		break; | 
 | 636 | 	case X86_VENDOR_INTEL: | 
| Stephane Eranian | 23d5ea5 | 2007-09-06 16:59:51 +0200 | [diff] [blame] | 637 | 		/* Work around Core Duo (Yonah) errata AE49 where perfctr1 | 
 | 638 | 		   doesn't have a working enable bit. */ | 
 | 639 | 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { | 
 | 640 | 			wd_ops = &coreduo_wd_ops; | 
 | 641 | 			break; | 
 | 642 | 		} | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 643 | 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 
 | 644 | 			wd_ops = &intel_arch_wd_ops; | 
 | 645 | 			break; | 
 | 646 | 		} | 
 | 647 | 		switch (boot_cpu_data.x86) { | 
 | 648 | 		case 6: | 
 | 649 | 			if (boot_cpu_data.x86_model > 0xd) | 
 | 650 | 				return; | 
 | 651 |  | 
 | 652 | 			wd_ops = &p6_wd_ops; | 
 | 653 | 			break; | 
 | 654 | 		case 15: | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 655 | 			wd_ops = &p4_wd_ops; | 
 | 656 | 			break; | 
 | 657 | 		default: | 
 | 658 | 			return; | 
 | 659 | 		} | 
 | 660 | 		break; | 
 | 661 | 	} | 
 | 662 | } | 
 | 663 |  | 
 | 664 | /* Interface to nmi.c */ | 
 | 665 |  | 
 | 666 | int lapic_watchdog_init(unsigned nmi_hz) | 
 | 667 | { | 
 | 668 | 	if (!wd_ops) { | 
 | 669 | 		probe_nmi_watchdog(); | 
| Ingo Molnar | 9c9b81f | 2008-03-27 23:39:42 +0100 | [diff] [blame] | 670 | 		if (!wd_ops) { | 
 | 671 | 			printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 672 | 			return -1; | 
| Ingo Molnar | 9c9b81f | 2008-03-27 23:39:42 +0100 | [diff] [blame] | 673 | 		} | 
| Björn Steinbrink | faa4cfa | 2007-06-16 10:15:55 -0700 | [diff] [blame] | 674 |  | 
 | 675 | 		if (!wd_ops->reserve()) { | 
 | 676 | 			printk(KERN_ERR | 
 | 677 | 				"NMI watchdog: cannot reserve perfctrs\n"); | 
 | 678 | 			return -1; | 
 | 679 | 		} | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 680 | 	} | 
 | 681 |  | 
 | 682 | 	if (!(wd_ops->setup(nmi_hz))) { | 
 | 683 | 		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | 
 | 684 | 		       raw_smp_processor_id()); | 
 | 685 | 		return -1; | 
 | 686 | 	} | 
 | 687 |  | 
 | 688 | 	return 0; | 
 | 689 | } | 
 | 690 |  | 
 | 691 | void lapic_watchdog_stop(void) | 
 | 692 | { | 
 | 693 | 	if (wd_ops) | 
| Björn Steinbrink | 54c6ed7 | 2007-06-16 10:15:56 -0700 | [diff] [blame] | 694 | 		wd_ops->stop(); | 
| Andi Kleen | 09198e6 | 2007-05-02 19:27:20 +0200 | [diff] [blame] | 695 | } | 
 | 696 |  | 
 | 697 | unsigned lapic_adjust_nmi_hz(unsigned hz) | 
 | 698 | { | 
 | 699 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 700 | 	if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | 
 | 701 | 	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | 
 | 702 | 		hz = adjust_for_32bit_ctr(hz); | 
 | 703 | 	return hz; | 
 | 704 | } | 
 | 705 |  | 
 | 706 | int lapic_wd_event(unsigned nmi_hz) | 
 | 707 | { | 
 | 708 | 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 
 | 709 | 	u64 ctr; | 
 | 710 | 	rdmsrl(wd->perfctr_msr, ctr); | 
 | 711 | 	if (ctr & wd_ops->checkbit) { /* perfctr still running? */ | 
 | 712 | 		return 0; | 
 | 713 | 	} | 
 | 714 | 	wd_ops->rearm(wd, nmi_hz); | 
 | 715 | 	return 1; | 
 | 716 | } | 
 | 717 |  | 
 | 718 | int lapic_watchdog_ok(void) | 
 | 719 | { | 
 | 720 | 	return wd_ops != NULL; | 
 | 721 | } |