blob: 8011a8e1c7d41d8a5baf2876c87697b37eec39ca [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it.
6 */
7
8#include <linux/init.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/string.h>
13#include <linux/rcupdate.h>
14#include <linux/kallsyms.h>
15#include <linux/sysdev.h>
16#include <linux/miscdevice.h>
17#include <linux/fs.h>
Randy Dunlapa9415642006-01-11 12:17:48 -080018#include <linux/capability.h>
Andi Kleen91c6d402005-07-28 21:15:39 -070019#include <linux/cpu.h>
20#include <linux/percpu.h>
Andi Kleen8c566ef2005-09-12 18:49:24 +020021#include <linux/ctype.h>
Andi Kleena98f0dd2007-02-13 13:26:23 +010022#include <linux/kmod.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <asm/processor.h>
24#include <asm/msr.h>
25#include <asm/mce.h>
26#include <asm/kdebug.h>
27#include <asm/uaccess.h>
Andi Kleen0a9c3ee2006-01-11 22:46:54 +010028#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029
30#define MISC_MCELOG_MINOR 227
Shaohua Li73ca5352006-01-11 22:43:06 +010031#define NR_BANKS 6
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Andi Kleen553f2652006-04-07 19:49:57 +020033atomic_t mce_entry;
34
Linus Torvalds1da177e2005-04-16 15:20:36 -070035static int mce_dont_init;
36
37/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
38 3: never panic or exit (for testing only) */
39static int tolerant = 1;
40static int banks;
41static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
42static unsigned long console_logged;
43static int notify_user;
Andi Kleen94ad8472005-04-16 15:25:09 -070044static int rip_msr;
Andi Kleene5835382005-11-05 17:25:54 +010045static int mce_bootlog = 1;
Andi Kleena98f0dd2007-02-13 13:26:23 +010046static atomic_t mce_events;
47
48static char trigger[128];
49static char *trigger_argv[2] = { trigger, NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51/*
52 * Lockless MCE logging infrastructure.
53 * This avoids deadlocks on printk locks without having to break locks. Also
54 * separate MCEs from kernel messages to avoid bogus bug reports.
55 */
56
57struct mce_log mcelog = {
58 MCE_LOG_SIGNATURE,
59 MCE_LOG_LEN,
60};
61
62void mce_log(struct mce *mce)
63{
64 unsigned next, entry;
Andi Kleena98f0dd2007-02-13 13:26:23 +010065 atomic_inc(&mce_events);
Linus Torvalds1da177e2005-04-16 15:20:36 -070066 mce->finished = 0;
Mike Waychison76441432005-09-30 00:01:27 +020067 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 for (;;) {
69 entry = rcu_dereference(mcelog.next);
Mike Waychison76441432005-09-30 00:01:27 +020070 /* The rmb forces the compiler to reload next in each
71 iteration */
72 rmb();
Andi Kleen673242c2005-09-12 18:49:24 +020073 for (;;) {
74 /* When the buffer fills up discard new entries. Assume
75 that the earlier errors are the more interesting. */
76 if (entry >= MCE_LOG_LEN) {
77 set_bit(MCE_OVERFLOW, &mcelog.flags);
78 return;
79 }
80 /* Old left over entry. Skip. */
81 if (mcelog.entry[entry].finished) {
82 entry++;
83 continue;
84 }
Mike Waychison76441432005-09-30 00:01:27 +020085 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070087 smp_rmb();
88 next = entry + 1;
89 if (cmpxchg(&mcelog.next, entry, next) == entry)
90 break;
91 }
92 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
Mike Waychison76441432005-09-30 00:01:27 +020093 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 mcelog.entry[entry].finished = 1;
Mike Waychison76441432005-09-30 00:01:27 +020095 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97 if (!test_and_set_bit(0, &console_logged))
98 notify_user = 1;
99}
100
101static void print_mce(struct mce *m)
102{
103 printk(KERN_EMERG "\n"
Andi Kleen48551702006-01-11 22:44:48 +0100104 KERN_EMERG "HARDWARE ERROR\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 KERN_EMERG
106 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
107 m->cpu, m->mcgstatus, m->bank, m->status);
108 if (m->rip) {
109 printk(KERN_EMERG
110 "RIP%s %02x:<%016Lx> ",
111 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
112 m->cs, m->rip);
113 if (m->cs == __KERNEL_CS)
114 print_symbol("{%s}", m->rip);
115 printk("\n");
116 }
117 printk(KERN_EMERG "TSC %Lx ", m->tsc);
118 if (m->addr)
119 printk("ADDR %Lx ", m->addr);
120 if (m->misc)
121 printk("MISC %Lx ", m->misc);
122 printk("\n");
Andi Kleen48551702006-01-11 22:44:48 +0100123 printk(KERN_EMERG "This is not a software problem!\n");
124 printk(KERN_EMERG
125 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126}
127
128static void mce_panic(char *msg, struct mce *backup, unsigned long start)
129{
130 int i;
131 oops_begin();
132 for (i = 0; i < MCE_LOG_LEN; i++) {
133 unsigned long tsc = mcelog.entry[i].tsc;
134 if (time_before(tsc, start))
135 continue;
136 print_mce(&mcelog.entry[i]);
137 if (backup && mcelog.entry[i].tsc == backup->tsc)
138 backup = NULL;
139 }
140 if (backup)
141 print_mce(backup);
142 if (tolerant >= 3)
143 printk("Fake panic: %s\n", msg);
144 else
145 panic(msg);
146}
147
148static int mce_available(struct cpuinfo_x86 *c)
149{
Akinobu Mita3d1712c2006-03-24 03:15:11 -0800150 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151}
152
Andi Kleen94ad8472005-04-16 15:25:09 -0700153static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
154{
155 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
156 m->rip = regs->rip;
157 m->cs = regs->cs;
158 } else {
159 m->rip = 0;
160 m->cs = 0;
161 }
162 if (rip_msr) {
163 /* Assume the RIP in the MSR is exact. Is this true? */
164 m->mcgstatus |= MCG_STATUS_EIPV;
165 rdmsrl(rip_msr, m->rip);
166 m->cs = 0;
167 }
168}
169
Andi Kleena98f0dd2007-02-13 13:26:23 +0100170static void do_mce_trigger(void)
171{
172 static atomic_t mce_logged;
173 int events = atomic_read(&mce_events);
174 if (events != atomic_read(&mce_logged) && trigger[0]) {
175 /* Small race window, but should be harmless. */
176 atomic_set(&mce_logged, events);
177 call_usermodehelper(trigger, trigger_argv, NULL, -1);
178 }
179}
180
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181/*
182 * The actual machine check handler
183 */
184
185void do_machine_check(struct pt_regs * regs, long error_code)
186{
187 struct mce m, panicm;
188 int nowayout = (tolerant < 1);
189 int kill_it = 0;
190 u64 mcestart = 0;
191 int i;
192 int panicm_found = 0;
193
Andi Kleen553f2652006-04-07 19:49:57 +0200194 atomic_inc(&mce_entry);
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 if (regs)
Jan Beulich6e3f3612006-01-11 22:42:14 +0100197 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 if (!banks)
Andi Kleen553f2652006-04-07 19:49:57 +0200199 goto out2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
201 memset(&m, 0, sizeof(struct mce));
Andi Kleen151f8cc2006-09-26 10:52:37 +0200202 m.cpu = smp_processor_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
204 if (!(m.mcgstatus & MCG_STATUS_RIPV))
205 kill_it = 1;
206
207 rdtscll(mcestart);
208 barrier();
209
210 for (i = 0; i < banks; i++) {
211 if (!bank[i])
212 continue;
213
214 m.misc = 0;
215 m.addr = 0;
216 m.bank = i;
217 m.tsc = 0;
218
219 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
220 if ((m.status & MCI_STATUS_VAL) == 0)
221 continue;
222
223 if (m.status & MCI_STATUS_EN) {
224 /* In theory _OVER could be a nowayout too, but
225 assume any overflowed errors were no fatal. */
226 nowayout |= !!(m.status & MCI_STATUS_PCC);
227 kill_it |= !!(m.status & MCI_STATUS_UC);
228 }
229
230 if (m.status & MCI_STATUS_MISCV)
231 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
232 if (m.status & MCI_STATUS_ADDRV)
233 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
234
Andi Kleen94ad8472005-04-16 15:25:09 -0700235 mce_get_rip(&m, regs);
Andi Kleend5172f22005-08-07 09:42:07 -0700236 if (error_code >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 rdtscll(m.tsc);
238 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
Andi Kleend5172f22005-08-07 09:42:07 -0700239 if (error_code != -2)
240 mce_log(&m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
242 /* Did this bank cause the exception? */
243 /* Assume that the bank with uncorrectable errors did it,
244 and that there is only a single one. */
245 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
246 panicm = m;
247 panicm_found = 1;
248 }
249
Randy Dunlap9f158332005-09-13 01:25:16 -0700250 add_taint(TAINT_MACHINE_CHECK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 }
252
253 /* Never do anything final in the polling timer */
Andi Kleena98f0dd2007-02-13 13:26:23 +0100254 if (!regs) {
255 /* Normal interrupt context here. Call trigger for any new
256 events. */
257 do_mce_trigger();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 goto out;
Andi Kleena98f0dd2007-02-13 13:26:23 +0100259 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
261 /* If we didn't find an uncorrectable error, pick
262 the last one (shouldn't happen, just being safe). */
263 if (!panicm_found)
264 panicm = m;
265 if (nowayout)
266 mce_panic("Machine check", &panicm, mcestart);
267 if (kill_it) {
268 int user_space = 0;
269
270 if (m.mcgstatus & MCG_STATUS_RIPV)
271 user_space = panicm.rip && (panicm.cs & 3);
272
273 /* When the machine was in user space and the CPU didn't get
274 confused it's normally not necessary to panic, unless you
275 are paranoid (tolerant == 0)
276
277 RED-PEN could be more tolerant for MCEs in idle,
278 but most likely they occur at boot anyways, where
279 it is best to just halt the machine. */
280 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
281 (unsigned)current->pid <= 1)
282 mce_panic("Uncorrected machine check", &panicm, mcestart);
283
284 /* do_exit takes an awful lot of locks and has as
285 slight risk of deadlocking. If you don't want that
286 don't set tolerant >= 2 */
287 if (tolerant < 3)
288 do_exit(SIGBUS);
289 }
290
291 out:
292 /* Last thing done in the machine check exception to clear state. */
293 wrmsrl(MSR_IA32_MCG_STATUS, 0);
Andi Kleen553f2652006-04-07 19:49:57 +0200294 out2:
295 atomic_dec(&mce_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296}
297
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200298#ifdef CONFIG_X86_MCE_INTEL
299/***
300 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
301 * @cpu: The CPU on which the event occured.
302 * @status: Event status information
303 *
304 * This function should be called by the thermal interrupt after the
305 * event has been processed and the decision was made to log the event
306 * further.
307 *
308 * The status parameter will be saved to the 'status' field of 'struct mce'
309 * and historically has been the register value of the
310 * MSR_IA32_THERMAL_STATUS (Intel) msr.
311 */
312void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
313{
314 struct mce m;
315
316 memset(&m, 0, sizeof(m));
317 m.cpu = cpu;
318 m.bank = MCE_THERMAL_BANK;
319 m.status = status;
320 rdtscll(m.tsc);
321 mce_log(&m);
322}
323#endif /* CONFIG_X86_MCE_INTEL */
324
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325/*
326 * Periodic polling timer for "silent" machine check errors.
327 */
328
329static int check_interval = 5 * 60; /* 5 minutes */
David Howells65f27f32006-11-22 14:55:48 +0000330static void mcheck_timer(struct work_struct *work);
331static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
333static void mcheck_check_cpu(void *info)
334{
335 if (mce_available(&current_cpu_data))
336 do_machine_check(NULL, 0);
337}
338
David Howells65f27f32006-11-22 14:55:48 +0000339static void mcheck_timer(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340{
341 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
342 schedule_delayed_work(&mcheck_work, check_interval * HZ);
343
344 /*
345 * It's ok to read stale data here for notify_user and
346 * console_logged as we'll simply get the updated versions
347 * on the next mcheck_timer execution and atomic operations
348 * on console_logged act as synchronization for notify_user
349 * writes.
350 */
351 if (notify_user && console_logged) {
352 notify_user = 0;
353 clear_bit(0, &console_logged);
354 printk(KERN_INFO "Machine check events logged\n");
355 }
356}
357
358
359static __init int periodic_mcheck_init(void)
360{
361 if (check_interval)
362 schedule_delayed_work(&mcheck_work, check_interval*HZ);
363 return 0;
364}
365__initcall(periodic_mcheck_init);
366
367
368/*
369 * Initialize Machine Checks for a CPU.
370 */
371static void mce_init(void *dummy)
372{
373 u64 cap;
374 int i;
375
376 rdmsrl(MSR_IA32_MCG_CAP, cap);
377 banks = cap & 0xff;
378 if (banks > NR_BANKS) {
379 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
380 banks = NR_BANKS;
381 }
Andi Kleen94ad8472005-04-16 15:25:09 -0700382 /* Use accurate RIP reporting if available. */
383 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
384 rip_msr = MSR_IA32_MCG_EIP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385
386 /* Log the machine checks left over from the previous reset.
387 This also clears all registers */
Andi Kleend5172f22005-08-07 09:42:07 -0700388 do_machine_check(NULL, mce_bootlog ? -1 : -2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390 set_in_cr4(X86_CR4_MCE);
391
392 if (cap & MCG_CTL_P)
393 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
394
395 for (i = 0; i < banks; i++) {
396 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
397 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
398 }
399}
400
401/* Add per CPU specific workarounds here */
Ashok Raje6982c62005-06-25 14:54:58 -0700402static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403{
404 /* This should be disabled by the BIOS, but isn't always */
405 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
406 /* disable GART TBL walk error reporting, which trips off
407 incorrectly with the IOMMU & 3ware & Cerberus. */
408 clear_bit(10, &bank[4]);
Andi Kleene5835382005-11-05 17:25:54 +0100409 /* Lots of broken BIOS around that don't clear them
410 by default and leave crap in there. Don't log. */
411 mce_bootlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
Andi Kleene5835382005-11-05 17:25:54 +0100413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414}
415
Ashok Raje6982c62005-06-25 14:54:58 -0700416static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
418 switch (c->x86_vendor) {
419 case X86_VENDOR_INTEL:
420 mce_intel_feature_init(c);
421 break;
Jacob Shin89b831e2005-11-05 17:25:53 +0100422 case X86_VENDOR_AMD:
423 mce_amd_feature_init(c);
424 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 default:
426 break;
427 }
428}
429
430/*
431 * Called for each booted CPU to set up machine checks.
432 * Must be called with preempt off.
433 */
Ashok Raje6982c62005-06-25 14:54:58 -0700434void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435{
Ashok Raj7ded5682006-02-03 21:51:23 +0100436 static cpumask_t mce_cpus = CPU_MASK_NONE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
438 mce_cpu_quirks(c);
439
440 if (mce_dont_init ||
441 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
442 !mce_available(c))
443 return;
444
445 mce_init(NULL);
446 mce_cpu_features(c);
447}
448
449/*
450 * Character device to read and clear the MCE log.
451 */
452
453static void collect_tscs(void *data)
454{
455 unsigned long *cpu_tsc = (unsigned long *)data;
456 rdtscll(cpu_tsc[smp_processor_id()]);
457}
458
459static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
460{
Andi Kleenf0de53b2005-04-16 15:25:10 -0700461 unsigned long *cpu_tsc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 static DECLARE_MUTEX(mce_read_sem);
463 unsigned next;
464 char __user *buf = ubuf;
465 int i, err;
466
Andi Kleenf0de53b2005-04-16 15:25:10 -0700467 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
468 if (!cpu_tsc)
469 return -ENOMEM;
470
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 down(&mce_read_sem);
472 next = rcu_dereference(mcelog.next);
473
474 /* Only supports full reads right now */
475 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
476 up(&mce_read_sem);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700477 kfree(cpu_tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 return -EINVAL;
479 }
480
481 err = 0;
Andi Kleen673242c2005-09-12 18:49:24 +0200482 for (i = 0; i < next; i++) {
483 unsigned long start = jiffies;
484 while (!mcelog.entry[i].finished) {
485 if (!time_before(jiffies, start + 2)) {
486 memset(mcelog.entry + i,0, sizeof(struct mce));
487 continue;
488 }
489 cpu_relax();
490 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 smp_rmb();
492 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
493 buf += sizeof(struct mce);
494 }
495
496 memset(mcelog.entry, 0, next * sizeof(struct mce));
497 mcelog.next = 0;
498
Paul E. McKenneyb2b18662005-06-25 14:55:38 -0700499 synchronize_sched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
501 /* Collect entries that were still getting written before the synchronize. */
502
503 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
504 for (i = next; i < MCE_LOG_LEN; i++) {
505 if (mcelog.entry[i].finished &&
506 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
507 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
508 smp_rmb();
509 buf += sizeof(struct mce);
510 memset(&mcelog.entry[i], 0, sizeof(struct mce));
511 }
512 }
513 up(&mce_read_sem);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700514 kfree(cpu_tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 return err ? -EFAULT : buf - ubuf;
516}
517
518static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
519{
520 int __user *p = (int __user *)arg;
521 if (!capable(CAP_SYS_ADMIN))
522 return -EPERM;
523 switch (cmd) {
524 case MCE_GET_RECORD_LEN:
525 return put_user(sizeof(struct mce), p);
526 case MCE_GET_LOG_LEN:
527 return put_user(MCE_LOG_LEN, p);
528 case MCE_GETCLEAR_FLAGS: {
529 unsigned flags;
530 do {
531 flags = mcelog.flags;
532 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
533 return put_user(flags, p);
534 }
535 default:
536 return -ENOTTY;
537 }
538}
539
Arjan van de Ven5dfe4c92007-02-12 00:55:31 -0800540static const struct file_operations mce_chrdev_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 .read = mce_read,
542 .ioctl = mce_ioctl,
543};
544
545static struct miscdevice mce_log_device = {
546 MISC_MCELOG_MINOR,
547 "mcelog",
548 &mce_chrdev_ops,
549};
550
551/*
552 * Old style boot options parsing. Only for compatibility.
553 */
554
555static int __init mcheck_disable(char *str)
556{
557 mce_dont_init = 1;
OGAWA Hirofumi9b410462006-03-31 02:30:33 -0800558 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559}
560
561/* mce=off disables machine check. Note you can reenable it later
Andi Kleend5172f22005-08-07 09:42:07 -0700562 using sysfs.
Andi Kleen8c566ef2005-09-12 18:49:24 +0200563 mce=TOLERANCELEVEL (number, see above)
Andi Kleene5835382005-11-05 17:25:54 +0100564 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
565 mce=nobootlog Don't log MCEs from before booting. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566static int __init mcheck_enable(char *str)
567{
Andi Kleend5172f22005-08-07 09:42:07 -0700568 if (*str == '=')
569 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 if (!strcmp(str, "off"))
571 mce_dont_init = 1;
Andi Kleene5835382005-11-05 17:25:54 +0100572 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
573 mce_bootlog = str[0] == 'b';
Andi Kleen8c566ef2005-09-12 18:49:24 +0200574 else if (isdigit(str[0]))
575 get_option(&str, &tolerant);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 else
577 printk("mce= argument %s ignored. Please use /sys", str);
OGAWA Hirofumi9b410462006-03-31 02:30:33 -0800578 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579}
580
581__setup("nomce", mcheck_disable);
582__setup("mce", mcheck_enable);
583
584/*
585 * Sysfs support
586 */
587
Andi Kleen413588c2005-09-12 18:49:24 +0200588/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
589 Only one CPU is active at this time, the others get readded later using
590 CPU hotplug. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591static int mce_resume(struct sys_device *dev)
592{
Andi Kleen413588c2005-09-12 18:49:24 +0200593 mce_init(NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 return 0;
595}
596
597/* Reinit MCEs after user configuration changes */
598static void mce_restart(void)
599{
600 if (check_interval)
601 cancel_delayed_work(&mcheck_work);
602 /* Timer race is harmless here */
603 on_each_cpu(mce_init, NULL, 1, 1);
604 if (check_interval)
605 schedule_delayed_work(&mcheck_work, check_interval*HZ);
606}
607
608static struct sysdev_class mce_sysclass = {
609 .resume = mce_resume,
610 set_kset_name("machinecheck"),
611};
612
Jacob Shinfff2e892006-06-26 13:58:50 +0200613DEFINE_PER_CPU(struct sys_device, device_mce);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
615/* Why are there no generic functions for this? */
616#define ACCESSOR(name, var, start) \
617 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
618 return sprintf(buf, "%lx\n", (unsigned long)var); \
619 } \
620 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
621 char *end; \
622 unsigned long new = simple_strtoul(buf, &end, 0); \
623 if (end == buf) return -EINVAL; \
624 var = new; \
625 start; \
626 return end-buf; \
627 } \
628 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
629
Andi Kleena98f0dd2007-02-13 13:26:23 +0100630/* TBD should generate these dynamically based on number of available banks */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631ACCESSOR(bank0ctl,bank[0],mce_restart())
632ACCESSOR(bank1ctl,bank[1],mce_restart())
633ACCESSOR(bank2ctl,bank[2],mce_restart())
634ACCESSOR(bank3ctl,bank[3],mce_restart())
635ACCESSOR(bank4ctl,bank[4],mce_restart())
Shaohua Li73ca5352006-01-11 22:43:06 +0100636ACCESSOR(bank5ctl,bank[5],mce_restart())
Andi Kleena98f0dd2007-02-13 13:26:23 +0100637
638static ssize_t show_trigger(struct sys_device *s, char *buf)
639{
640 strcpy(buf, trigger);
641 strcat(buf, "\n");
642 return strlen(trigger) + 1;
643}
644
645static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
646{
647 char *p;
648 int len;
649 strncpy(trigger, buf, sizeof(trigger));
650 trigger[sizeof(trigger)-1] = 0;
651 len = strlen(trigger);
652 p = strchr(trigger, '\n');
653 if (*p) *p = 0;
654 return len;
655}
656
657static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658ACCESSOR(tolerant,tolerant,)
659ACCESSOR(check_interval,check_interval,mce_restart())
Andi Kleena98f0dd2007-02-13 13:26:23 +0100660static struct sysdev_attribute *mce_attributes[] = {
661 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
662 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
663 &attr_tolerant, &attr_check_interval, &attr_trigger,
664 NULL
665};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666
Andi Kleen91c6d402005-07-28 21:15:39 -0700667/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
668static __cpuinit int mce_create_device(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669{
670 int err;
Shaohua Li73ca5352006-01-11 22:43:06 +0100671 int i;
Andi Kleen91c6d402005-07-28 21:15:39 -0700672 if (!mce_available(&cpu_data[cpu]))
673 return -EIO;
674
675 per_cpu(device_mce,cpu).id = cpu;
676 per_cpu(device_mce,cpu).cls = &mce_sysclass;
677
678 err = sysdev_register(&per_cpu(device_mce,cpu));
679
680 if (!err) {
Andi Kleena98f0dd2007-02-13 13:26:23 +0100681 for (i = 0; mce_attributes[i]; i++)
Shaohua Li73ca5352006-01-11 22:43:06 +0100682 sysdev_create_file(&per_cpu(device_mce,cpu),
Andi Kleena98f0dd2007-02-13 13:26:23 +0100683 mce_attributes[i]);
Andi Kleen91c6d402005-07-28 21:15:39 -0700684 }
685 return err;
686}
687
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -0700688static void mce_remove_device(unsigned int cpu)
Andi Kleen91c6d402005-07-28 21:15:39 -0700689{
Shaohua Li73ca5352006-01-11 22:43:06 +0100690 int i;
691
Andi Kleena98f0dd2007-02-13 13:26:23 +0100692 for (i = 0; mce_attributes[i]; i++)
Shaohua Li73ca5352006-01-11 22:43:06 +0100693 sysdev_remove_file(&per_cpu(device_mce,cpu),
Andi Kleena98f0dd2007-02-13 13:26:23 +0100694 mce_attributes[i]);
Andi Kleen91c6d402005-07-28 21:15:39 -0700695 sysdev_unregister(&per_cpu(device_mce,cpu));
Rafael J. Wysockid4c45712006-12-07 02:14:12 +0100696 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
Andi Kleen91c6d402005-07-28 21:15:39 -0700697}
Andi Kleen91c6d402005-07-28 21:15:39 -0700698
699/* Get notified when a cpu comes on/off. Be hotplug friendly. */
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -0700700static int
Andi Kleen91c6d402005-07-28 21:15:39 -0700701mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
702{
703 unsigned int cpu = (unsigned long)hcpu;
704
705 switch (action) {
706 case CPU_ONLINE:
707 mce_create_device(cpu);
708 break;
Andi Kleen91c6d402005-07-28 21:15:39 -0700709 case CPU_DEAD:
710 mce_remove_device(cpu);
711 break;
Andi Kleen91c6d402005-07-28 21:15:39 -0700712 }
713 return NOTIFY_OK;
714}
715
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -0700716static struct notifier_block mce_cpu_notifier = {
Andi Kleen91c6d402005-07-28 21:15:39 -0700717 .notifier_call = mce_cpu_callback,
718};
719
720static __init int mce_init_device(void)
721{
722 int err;
723 int i = 0;
724
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 if (!mce_available(&boot_cpu_data))
726 return -EIO;
727 err = sysdev_class_register(&mce_sysclass);
Andi Kleen91c6d402005-07-28 21:15:39 -0700728
729 for_each_online_cpu(i) {
730 mce_create_device(i);
731 }
732
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -0700733 register_hotcpu_notifier(&mce_cpu_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 misc_register(&mce_log_device);
735 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736}
Andi Kleen91c6d402005-07-28 21:15:39 -0700737
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738device_initcall(mce_init_device);