| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 1 | /* | 
 | 2 |  *  Copyright (C) 1991, 1992  Linus Torvalds | 
 | 3 |  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | 
| Don Zickus | 9c48f1c | 2011-09-30 15:06:21 -0400 | [diff] [blame] | 4 |  *  Copyright (C) 2011	Don Zickus Red Hat, Inc. | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 5 |  * | 
 | 6 |  *  Pentium III FXSR, SSE support | 
 | 7 |  *	Gareth Hughes <gareth@valinux.com>, May 2000 | 
 | 8 |  */ | 
 | 9 |  | 
 | 10 | /* | 
 | 11 |  * Handle hardware traps and faults. | 
 | 12 |  */ | 
 | 13 | #include <linux/spinlock.h> | 
 | 14 | #include <linux/kprobes.h> | 
 | 15 | #include <linux/kdebug.h> | 
 | 16 | #include <linux/nmi.h> | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 17 | #include <linux/delay.h> | 
 | 18 | #include <linux/hardirq.h> | 
 | 19 | #include <linux/slab.h> | 
| Paul Gortmaker | 69c60c8 | 2011-05-26 12:22:53 -0400 | [diff] [blame] | 20 | #include <linux/export.h> | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 21 |  | 
 | 22 | #if defined(CONFIG_EDAC) | 
 | 23 | #include <linux/edac.h> | 
 | 24 | #endif | 
 | 25 |  | 
 | 26 | #include <linux/atomic.h> | 
 | 27 | #include <asm/traps.h> | 
 | 28 | #include <asm/mach_traps.h> | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 29 | #include <asm/nmi.h> | 
| Mathias Nyman | 6fd36ba | 2011-11-10 13:45:24 +0000 | [diff] [blame] | 30 | #include <asm/x86_init.h> | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 31 |  | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 32 | struct nmi_desc { | 
 | 33 | 	spinlock_t lock; | 
 | 34 | 	struct list_head head; | 
 | 35 | }; | 
 | 36 |  | 
 | 37 | static struct nmi_desc nmi_desc[NMI_MAX] =  | 
 | 38 | { | 
 | 39 | 	{ | 
 | 40 | 		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), | 
 | 41 | 		.head = LIST_HEAD_INIT(nmi_desc[0].head), | 
 | 42 | 	}, | 
 | 43 | 	{ | 
 | 44 | 		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), | 
 | 45 | 		.head = LIST_HEAD_INIT(nmi_desc[1].head), | 
 | 46 | 	}, | 
| Don Zickus | 553222f | 2012-03-29 16:11:16 -0400 | [diff] [blame] | 47 | 	{ | 
 | 48 | 		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), | 
 | 49 | 		.head = LIST_HEAD_INIT(nmi_desc[2].head), | 
 | 50 | 	}, | 
 | 51 | 	{ | 
 | 52 | 		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), | 
 | 53 | 		.head = LIST_HEAD_INIT(nmi_desc[3].head), | 
 | 54 | 	}, | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 55 |  | 
 | 56 | }; | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 57 |  | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 58 | struct nmi_stats { | 
 | 59 | 	unsigned int normal; | 
 | 60 | 	unsigned int unknown; | 
 | 61 | 	unsigned int external; | 
 | 62 | 	unsigned int swallow; | 
 | 63 | }; | 
 | 64 |  | 
 | 65 | static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); | 
 | 66 |  | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 67 | static int ignore_nmis; | 
 | 68 |  | 
 | 69 | int unknown_nmi_panic; | 
 | 70 | /* | 
 | 71 |  * Prevent NMI reason port (0x61) being accessed simultaneously, can | 
 | 72 |  * only be used in NMI handler. | 
 | 73 |  */ | 
 | 74 | static DEFINE_RAW_SPINLOCK(nmi_reason_lock); | 
 | 75 |  | 
 | 76 | static int __init setup_unknown_nmi_panic(char *str) | 
 | 77 | { | 
 | 78 | 	unknown_nmi_panic = 1; | 
 | 79 | 	return 1; | 
 | 80 | } | 
 | 81 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | 
 | 82 |  | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 83 | #define nmi_to_desc(type) (&nmi_desc[type]) | 
 | 84 |  | 
| Steven Rostedt | 4a6d70c | 2012-04-24 16:31:07 -0400 | [diff] [blame] | 85 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 86 | { | 
 | 87 | 	struct nmi_desc *desc = nmi_to_desc(type); | 
 | 88 | 	struct nmiaction *a; | 
 | 89 | 	int handled=0; | 
 | 90 |  | 
 | 91 | 	rcu_read_lock(); | 
 | 92 |  | 
 | 93 | 	/* | 
 | 94 | 	 * NMIs are edge-triggered, which means if you have enough | 
 | 95 | 	 * of them concurrently, you can lose some because only one | 
 | 96 | 	 * can be latched at any given time.  Walk the whole list | 
 | 97 | 	 * to handle those situations. | 
 | 98 | 	 */ | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 99 | 	list_for_each_entry_rcu(a, &desc->head, list) | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 100 | 		handled += a->handler(type, regs); | 
 | 101 |  | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 102 | 	rcu_read_unlock(); | 
 | 103 |  | 
 | 104 | 	/* return total number of NMI events handled */ | 
 | 105 | 	return handled; | 
 | 106 | } | 
 | 107 |  | 
| Li Zhong | 72b3fb2 | 2012-03-29 16:11:17 -0400 | [diff] [blame] | 108 | int __register_nmi_handler(unsigned int type, struct nmiaction *action) | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 109 | { | 
 | 110 | 	struct nmi_desc *desc = nmi_to_desc(type); | 
 | 111 | 	unsigned long flags; | 
 | 112 |  | 
| Li Zhong | 72b3fb2 | 2012-03-29 16:11:17 -0400 | [diff] [blame] | 113 | 	if (!action->handler) | 
 | 114 | 		return -EINVAL; | 
 | 115 |  | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 116 | 	spin_lock_irqsave(&desc->lock, flags); | 
 | 117 |  | 
 | 118 | 	/* | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 119 | 	 * most handlers of type NMI_UNKNOWN never return because | 
 | 120 | 	 * they just assume the NMI is theirs.  Just a sanity check | 
 | 121 | 	 * to manage expectations | 
 | 122 | 	 */ | 
 | 123 | 	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); | 
| Don Zickus | 553222f | 2012-03-29 16:11:16 -0400 | [diff] [blame] | 124 | 	WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); | 
 | 125 | 	WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 126 |  | 
 | 127 | 	/* | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 128 | 	 * some handlers need to be executed first otherwise a fake | 
 | 129 | 	 * event confuses some handlers (kdump uses this flag) | 
 | 130 | 	 */ | 
 | 131 | 	if (action->flags & NMI_FLAG_FIRST) | 
 | 132 | 		list_add_rcu(&action->list, &desc->head); | 
 | 133 | 	else | 
 | 134 | 		list_add_tail_rcu(&action->list, &desc->head); | 
 | 135 | 	 | 
 | 136 | 	spin_unlock_irqrestore(&desc->lock, flags); | 
 | 137 | 	return 0; | 
 | 138 | } | 
| Li Zhong | 72b3fb2 | 2012-03-29 16:11:17 -0400 | [diff] [blame] | 139 | EXPORT_SYMBOL(__register_nmi_handler); | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 140 |  | 
| Li Zhong | 72b3fb2 | 2012-03-29 16:11:17 -0400 | [diff] [blame] | 141 | void unregister_nmi_handler(unsigned int type, const char *name) | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 142 | { | 
 | 143 | 	struct nmi_desc *desc = nmi_to_desc(type); | 
 | 144 | 	struct nmiaction *n; | 
 | 145 | 	unsigned long flags; | 
 | 146 |  | 
 | 147 | 	spin_lock_irqsave(&desc->lock, flags); | 
 | 148 |  | 
 | 149 | 	list_for_each_entry_rcu(n, &desc->head, list) { | 
 | 150 | 		/* | 
 | 151 | 		 * the name passed in to describe the nmi handler | 
 | 152 | 		 * is used as the lookup key | 
 | 153 | 		 */ | 
 | 154 | 		if (!strcmp(n->name, name)) { | 
 | 155 | 			WARN(in_nmi(), | 
 | 156 | 				"Trying to free NMI (%s) from NMI context!\n", n->name); | 
 | 157 | 			list_del_rcu(&n->list); | 
 | 158 | 			break; | 
 | 159 | 		} | 
 | 160 | 	} | 
 | 161 |  | 
 | 162 | 	spin_unlock_irqrestore(&desc->lock, flags); | 
 | 163 | 	synchronize_rcu(); | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 164 | } | 
| Don Zickus | c9126b2 | 2011-09-30 15:06:20 -0400 | [diff] [blame] | 165 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); | 
 | 166 |  | 
| Steven Rostedt | 4a6d70c | 2012-04-24 16:31:07 -0400 | [diff] [blame] | 167 | static __kprobes void | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 168 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | 
 | 169 | { | 
| Don Zickus | 553222f | 2012-03-29 16:11:16 -0400 | [diff] [blame] | 170 | 	/* check to see if anyone registered against these types of errors */ | 
 | 171 | 	if (nmi_handle(NMI_SERR, regs, false)) | 
 | 172 | 		return; | 
 | 173 |  | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 174 | 	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", | 
 | 175 | 		 reason, smp_processor_id()); | 
 | 176 |  | 
 | 177 | 	/* | 
 | 178 | 	 * On some machines, PCI SERR line is used to report memory | 
 | 179 | 	 * errors. EDAC makes use of it. | 
 | 180 | 	 */ | 
 | 181 | #if defined(CONFIG_EDAC) | 
 | 182 | 	if (edac_handler_set()) { | 
 | 183 | 		edac_atomic_assert_error(); | 
 | 184 | 		return; | 
 | 185 | 	} | 
 | 186 | #endif | 
 | 187 |  | 
 | 188 | 	if (panic_on_unrecovered_nmi) | 
 | 189 | 		panic("NMI: Not continuing"); | 
 | 190 |  | 
 | 191 | 	pr_emerg("Dazed and confused, but trying to continue\n"); | 
 | 192 |  | 
 | 193 | 	/* Clear and disable the PCI SERR error line. */ | 
 | 194 | 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; | 
 | 195 | 	outb(reason, NMI_REASON_PORT); | 
 | 196 | } | 
 | 197 |  | 
| Steven Rostedt | 4a6d70c | 2012-04-24 16:31:07 -0400 | [diff] [blame] | 198 | static __kprobes void | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 199 | io_check_error(unsigned char reason, struct pt_regs *regs) | 
 | 200 | { | 
 | 201 | 	unsigned long i; | 
 | 202 |  | 
| Don Zickus | 553222f | 2012-03-29 16:11:16 -0400 | [diff] [blame] | 203 | 	/* check to see if anyone registered against these types of errors */ | 
 | 204 | 	if (nmi_handle(NMI_IO_CHECK, regs, false)) | 
 | 205 | 		return; | 
 | 206 |  | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 207 | 	pr_emerg( | 
 | 208 | 	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | 
 | 209 | 		 reason, smp_processor_id()); | 
| Jan Beulich | 57da8b9 | 2012-05-09 08:47:37 +0100 | [diff] [blame] | 210 | 	show_regs(regs); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 211 |  | 
 | 212 | 	if (panic_on_io_nmi) | 
 | 213 | 		panic("NMI IOCK error: Not continuing"); | 
 | 214 |  | 
 | 215 | 	/* Re-enable the IOCK line, wait for a few seconds */ | 
 | 216 | 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | 
 | 217 | 	outb(reason, NMI_REASON_PORT); | 
 | 218 |  | 
 | 219 | 	i = 20000; | 
 | 220 | 	while (--i) { | 
 | 221 | 		touch_nmi_watchdog(); | 
 | 222 | 		udelay(100); | 
 | 223 | 	} | 
 | 224 |  | 
 | 225 | 	reason &= ~NMI_REASON_CLEAR_IOCHK; | 
 | 226 | 	outb(reason, NMI_REASON_PORT); | 
 | 227 | } | 
 | 228 |  | 
| Steven Rostedt | 4a6d70c | 2012-04-24 16:31:07 -0400 | [diff] [blame] | 229 | static __kprobes void | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 230 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | 
 | 231 | { | 
| Don Zickus | 9c48f1c | 2011-09-30 15:06:21 -0400 | [diff] [blame] | 232 | 	int handled; | 
 | 233 |  | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 234 | 	/* | 
 | 235 | 	 * Use 'false' as back-to-back NMIs are dealt with one level up. | 
 | 236 | 	 * Of course this makes having multiple 'unknown' handlers useless | 
 | 237 | 	 * as only the first one is ever run (unless it can actually determine | 
 | 238 | 	 * if it caused the NMI) | 
 | 239 | 	 */ | 
 | 240 | 	handled = nmi_handle(NMI_UNKNOWN, regs, false); | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 241 | 	if (handled) { | 
 | 242 | 		__this_cpu_add(nmi_stats.unknown, handled); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 243 | 		return; | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 244 | 	} | 
 | 245 |  | 
 | 246 | 	__this_cpu_add(nmi_stats.unknown, 1); | 
 | 247 |  | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 248 | 	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 
 | 249 | 		 reason, smp_processor_id()); | 
 | 250 |  | 
 | 251 | 	pr_emerg("Do you have a strange power saving mode enabled?\n"); | 
 | 252 | 	if (unknown_nmi_panic || panic_on_unrecovered_nmi) | 
 | 253 | 		panic("NMI: Not continuing"); | 
 | 254 |  | 
 | 255 | 	pr_emerg("Dazed and confused, but trying to continue\n"); | 
 | 256 | } | 
 | 257 |  | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 258 | static DEFINE_PER_CPU(bool, swallow_nmi); | 
 | 259 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | 
 | 260 |  | 
| Steven Rostedt | 4a6d70c | 2012-04-24 16:31:07 -0400 | [diff] [blame] | 261 | static __kprobes void default_do_nmi(struct pt_regs *regs) | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 262 | { | 
 | 263 | 	unsigned char reason = 0; | 
| Don Zickus | 9c48f1c | 2011-09-30 15:06:21 -0400 | [diff] [blame] | 264 | 	int handled; | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 265 | 	bool b2b = false; | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 266 |  | 
 | 267 | 	/* | 
 | 268 | 	 * CPU-specific NMI must be processed before non-CPU-specific | 
 | 269 | 	 * NMI, otherwise we may lose it, because the CPU-specific | 
 | 270 | 	 * NMI can not be detected/processed on other CPUs. | 
 | 271 | 	 */ | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 272 |  | 
 | 273 | 	/* | 
 | 274 | 	 * Back-to-back NMIs are interesting because they can either | 
 | 275 | 	 * be two NMI or more than two NMIs (any thing over two is dropped | 
 | 276 | 	 * due to NMI being edge-triggered).  If this is the second half | 
 | 277 | 	 * of the back-to-back NMI, assume we dropped things and process | 
 | 278 | 	 * more handlers.  Otherwise reset the 'swallow' NMI behaviour | 
 | 279 | 	 */ | 
 | 280 | 	if (regs->ip == __this_cpu_read(last_nmi_rip)) | 
 | 281 | 		b2b = true; | 
 | 282 | 	else | 
 | 283 | 		__this_cpu_write(swallow_nmi, false); | 
 | 284 |  | 
 | 285 | 	__this_cpu_write(last_nmi_rip, regs->ip); | 
 | 286 |  | 
 | 287 | 	handled = nmi_handle(NMI_LOCAL, regs, b2b); | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 288 | 	__this_cpu_add(nmi_stats.normal, handled); | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 289 | 	if (handled) { | 
 | 290 | 		/* | 
 | 291 | 		 * There are cases when a NMI handler handles multiple | 
 | 292 | 		 * events in the current NMI.  One of these events may | 
 | 293 | 		 * be queued for in the next NMI.  Because the event is | 
 | 294 | 		 * already handled, the next NMI will result in an unknown | 
 | 295 | 		 * NMI.  Instead lets flag this for a potential NMI to | 
 | 296 | 		 * swallow. | 
 | 297 | 		 */ | 
 | 298 | 		if (handled > 1) | 
 | 299 | 			__this_cpu_write(swallow_nmi, true); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 300 | 		return; | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 301 | 	} | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 302 |  | 
 | 303 | 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ | 
 | 304 | 	raw_spin_lock(&nmi_reason_lock); | 
| Jacob Pan | 064a59b | 2011-11-10 13:43:05 +0000 | [diff] [blame] | 305 | 	reason = x86_platform.get_nmi_reason(); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 306 |  | 
 | 307 | 	if (reason & NMI_REASON_MASK) { | 
 | 308 | 		if (reason & NMI_REASON_SERR) | 
 | 309 | 			pci_serr_error(reason, regs); | 
 | 310 | 		else if (reason & NMI_REASON_IOCHK) | 
 | 311 | 			io_check_error(reason, regs); | 
 | 312 | #ifdef CONFIG_X86_32 | 
 | 313 | 		/* | 
 | 314 | 		 * Reassert NMI in case it became active | 
 | 315 | 		 * meanwhile as it's edge-triggered: | 
 | 316 | 		 */ | 
 | 317 | 		reassert_nmi(); | 
 | 318 | #endif | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 319 | 		__this_cpu_add(nmi_stats.external, 1); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 320 | 		raw_spin_unlock(&nmi_reason_lock); | 
 | 321 | 		return; | 
 | 322 | 	} | 
 | 323 | 	raw_spin_unlock(&nmi_reason_lock); | 
 | 324 |  | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 325 | 	/* | 
 | 326 | 	 * Only one NMI can be latched at a time.  To handle | 
 | 327 | 	 * this we may process multiple nmi handlers at once to | 
 | 328 | 	 * cover the case where an NMI is dropped.  The downside | 
 | 329 | 	 * to this approach is we may process an NMI prematurely, | 
 | 330 | 	 * while its real NMI is sitting latched.  This will cause | 
 | 331 | 	 * an unknown NMI on the next run of the NMI processing. | 
 | 332 | 	 * | 
 | 333 | 	 * We tried to flag that condition above, by setting the | 
 | 334 | 	 * swallow_nmi flag when we process more than one event. | 
 | 335 | 	 * This condition is also only present on the second half | 
 | 336 | 	 * of a back-to-back NMI, so we flag that condition too. | 
 | 337 | 	 * | 
 | 338 | 	 * If both are true, we assume we already processed this | 
 | 339 | 	 * NMI previously and we swallow it.  Otherwise we reset | 
 | 340 | 	 * the logic. | 
 | 341 | 	 * | 
 | 342 | 	 * There are scenarios where we may accidentally swallow | 
 | 343 | 	 * a 'real' unknown NMI.  For example, while processing | 
 | 344 | 	 * a perf NMI another perf NMI comes in along with a | 
 | 345 | 	 * 'real' unknown NMI.  These two NMIs get combined into | 
 | 346 | 	 * one (as descibed above).  When the next NMI gets | 
 | 347 | 	 * processed, it will be flagged by perf as handled, but | 
 | 348 | 	 * noone will know that there was a 'real' unknown NMI sent | 
 | 349 | 	 * also.  As a result it gets swallowed.  Or if the first | 
 | 350 | 	 * perf NMI returns two events handled then the second | 
 | 351 | 	 * NMI will get eaten by the logic below, again losing a | 
 | 352 | 	 * 'real' unknown NMI.  But this is the best we can do | 
 | 353 | 	 * for now. | 
 | 354 | 	 */ | 
 | 355 | 	if (b2b && __this_cpu_read(swallow_nmi)) | 
| Don Zickus | efc3aac | 2011-09-30 15:06:23 -0400 | [diff] [blame] | 356 | 		__this_cpu_add(nmi_stats.swallow, 1); | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 357 | 	else | 
 | 358 | 		unknown_nmi_error(reason, regs); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 359 | } | 
 | 360 |  | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 361 | /* | 
 | 362 |  * NMIs can hit breakpoints which will cause it to lose its | 
 | 363 |  * NMI context with the CPU when the breakpoint does an iret. | 
 | 364 |  */ | 
 | 365 | #ifdef CONFIG_X86_32 | 
 | 366 | /* | 
 | 367 |  * For i386, NMIs use the same stack as the kernel, and we can | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 368 |  * add a workaround to the iret problem in C (preventing nested | 
 | 369 |  * NMIs if an NMI takes a trap). Simply have 3 states the NMI | 
 | 370 |  * can be in: | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 371 |  * | 
 | 372 |  *  1) not running | 
 | 373 |  *  2) executing | 
 | 374 |  *  3) latched | 
 | 375 |  * | 
 | 376 |  * When no NMI is in progress, it is in the "not running" state. | 
 | 377 |  * When an NMI comes in, it goes into the "executing" state. | 
 | 378 |  * Normally, if another NMI is triggered, it does not interrupt | 
 | 379 |  * the running NMI and the HW will simply latch it so that when | 
 | 380 |  * the first NMI finishes, it will restart the second NMI. | 
 | 381 |  * (Note, the latch is binary, thus multiple NMIs triggering, | 
 | 382 |  *  when one is running, are ignored. Only one NMI is restarted.) | 
 | 383 |  * | 
 | 384 |  * If an NMI hits a breakpoint that executes an iret, another | 
 | 385 |  * NMI can preempt it. We do not want to allow this new NMI | 
 | 386 |  * to run, but we want to execute it when the first one finishes. | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 387 |  * We set the state to "latched", and the exit of the first NMI will | 
 | 388 |  * perform a dec_return, if the result is zero (NOT_RUNNING), then | 
 | 389 |  * it will simply exit the NMI handler. If not, the dec_return | 
 | 390 |  * would have set the state to NMI_EXECUTING (what we want it to | 
 | 391 |  * be when we are running). In this case, we simply jump back | 
 | 392 |  * to rerun the NMI handler again, and restart the 'latched' NMI. | 
 | 393 |  * | 
 | 394 |  * No trap (breakpoint or page fault) should be hit before nmi_restart, | 
 | 395 |  * thus there is no race between the first check of state for NOT_RUNNING | 
 | 396 |  * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | 
 | 397 |  * at this point. | 
| Steven Rostedt | 70fb74a | 2012-06-07 11:54:37 -0400 | [diff] [blame] | 398 |  * | 
 | 399 |  * In case the NMI takes a page fault, we need to save off the CR2 | 
 | 400 |  * because the NMI could have preempted another page fault and corrupt | 
 | 401 |  * the CR2 that is about to be read. As nested NMIs must be restarted | 
 | 402 |  * and they can not take breakpoints or page faults, the update of the | 
 | 403 |  * CR2 must be done before converting the nmi state back to NOT_RUNNING. | 
 | 404 |  * Otherwise, there would be a race of another nested NMI coming in | 
 | 405 |  * after setting state to NOT_RUNNING but before updating the nmi_cr2. | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 406 |  */ | 
 | 407 | enum nmi_states { | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 408 | 	NMI_NOT_RUNNING = 0, | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 409 | 	NMI_EXECUTING, | 
 | 410 | 	NMI_LATCHED, | 
 | 411 | }; | 
 | 412 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 
| Steven Rostedt | 70fb74a | 2012-06-07 11:54:37 -0400 | [diff] [blame] | 413 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | 
| Steven Rostedt | 228bdaa | 2011-12-09 03:02:19 -0500 | [diff] [blame] | 414 |  | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 415 | #define nmi_nesting_preprocess(regs)					\ | 
 | 416 | 	do {								\ | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 417 | 		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\ | 
 | 418 | 			this_cpu_write(nmi_state, NMI_LATCHED);		\ | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 419 | 			return;						\ | 
 | 420 | 		}							\ | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 421 | 		this_cpu_write(nmi_state, NMI_EXECUTING);		\ | 
| Steven Rostedt | 70fb74a | 2012-06-07 11:54:37 -0400 | [diff] [blame] | 422 | 		this_cpu_write(nmi_cr2, read_cr2());			\ | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 423 | 	} while (0);							\ | 
 | 424 | 	nmi_restart: | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 425 |  | 
 | 426 | #define nmi_nesting_postprocess()					\ | 
 | 427 | 	do {								\ | 
| Steven Rostedt | 70fb74a | 2012-06-07 11:54:37 -0400 | [diff] [blame] | 428 | 		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\ | 
 | 429 | 			write_cr2(this_cpu_read(nmi_cr2));		\ | 
| Steven Rostedt | c7d65a7 | 2012-06-07 11:03:00 -0400 | [diff] [blame] | 430 | 		if (this_cpu_dec_return(nmi_state))			\ | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 431 | 			goto nmi_restart;				\ | 
 | 432 | 	} while (0) | 
 | 433 | #else /* x86_64 */ | 
 | 434 | /* | 
 | 435 |  * In x86_64 things are a bit more difficult. This has the same problem | 
 | 436 |  * where an NMI hitting a breakpoint that calls iret will remove the | 
 | 437 |  * NMI context, allowing a nested NMI to enter. What makes this more | 
 | 438 |  * difficult is that both NMIs and breakpoints have their own stack. | 
 | 439 |  * When a new NMI or breakpoint is executed, the stack is set to a fixed | 
 | 440 |  * point. If an NMI is nested, it will have its stack set at that same | 
 | 441 |  * fixed address that the first NMI had, and will start corrupting the | 
 | 442 |  * stack. This is handled in entry_64.S, but the same problem exists with | 
 | 443 |  * the breakpoint stack. | 
 | 444 |  * | 
 | 445 |  * If a breakpoint is being processed, and the debug stack is being used, | 
 | 446 |  * if an NMI comes in and also hits a breakpoint, the stack pointer | 
 | 447 |  * will be set to the same fixed address as the breakpoint that was | 
 | 448 |  * interrupted, causing that stack to be corrupted. To handle this case, | 
 | 449 |  * check if the stack that was interrupted is the debug stack, and if | 
 | 450 |  * so, change the IDT so that new breakpoints will use the current stack | 
 | 451 |  * and not switch to the fixed address. On return of the NMI, switch back | 
 | 452 |  * to the original IDT. | 
 | 453 |  */ | 
 | 454 | static DEFINE_PER_CPU(int, update_debug_stack); | 
 | 455 |  | 
 | 456 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | 
 | 457 | { | 
| Steven Rostedt | 228bdaa | 2011-12-09 03:02:19 -0500 | [diff] [blame] | 458 | 	/* | 
 | 459 | 	 * If we interrupted a breakpoint, it is possible that | 
 | 460 | 	 * the nmi handler will have breakpoints too. We need to | 
 | 461 | 	 * change the IDT such that breakpoints that happen here | 
 | 462 | 	 * continue to use the NMI stack. | 
 | 463 | 	 */ | 
 | 464 | 	if (unlikely(is_debug_stack(regs->sp))) { | 
 | 465 | 		debug_stack_set_zero(); | 
| Steven Rostedt | c0525a6 | 2012-05-30 11:43:19 -0400 | [diff] [blame] | 466 | 		this_cpu_write(update_debug_stack, 1); | 
| Steven Rostedt | 228bdaa | 2011-12-09 03:02:19 -0500 | [diff] [blame] | 467 | 	} | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 468 | } | 
 | 469 |  | 
 | 470 | static inline void nmi_nesting_postprocess(void) | 
 | 471 | { | 
| Steven Rostedt | c0525a6 | 2012-05-30 11:43:19 -0400 | [diff] [blame] | 472 | 	if (unlikely(this_cpu_read(update_debug_stack))) { | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 473 | 		debug_stack_reset(); | 
| Steven Rostedt | c0525a6 | 2012-05-30 11:43:19 -0400 | [diff] [blame] | 474 | 		this_cpu_write(update_debug_stack, 0); | 
 | 475 | 	} | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 476 | } | 
 | 477 | #endif | 
 | 478 |  | 
 | 479 | dotraplinkage notrace __kprobes void | 
 | 480 | do_nmi(struct pt_regs *regs, long error_code) | 
 | 481 | { | 
 | 482 | 	nmi_nesting_preprocess(regs); | 
 | 483 |  | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 484 | 	nmi_enter(); | 
 | 485 |  | 
 | 486 | 	inc_irq_stat(__nmi_count); | 
 | 487 |  | 
 | 488 | 	if (!ignore_nmis) | 
 | 489 | 		default_do_nmi(regs); | 
 | 490 |  | 
 | 491 | 	nmi_exit(); | 
| Steven Rostedt | 228bdaa | 2011-12-09 03:02:19 -0500 | [diff] [blame] | 492 |  | 
| Steven Rostedt | ccd49c2 | 2011-12-13 16:44:16 -0500 | [diff] [blame] | 493 | 	/* On i386, may loop back to preprocess */ | 
 | 494 | 	nmi_nesting_postprocess(); | 
| Don Zickus | 1d48922 | 2011-09-30 15:06:19 -0400 | [diff] [blame] | 495 | } | 
 | 496 |  | 
 | 497 | void stop_nmi(void) | 
 | 498 | { | 
 | 499 | 	ignore_nmis++; | 
 | 500 | } | 
 | 501 |  | 
 | 502 | void restart_nmi(void) | 
 | 503 | { | 
 | 504 | 	ignore_nmis--; | 
 | 505 | } | 
| Don Zickus | b227e23 | 2011-09-30 15:06:22 -0400 | [diff] [blame] | 506 |  | 
 | 507 | /* reset the back-to-back NMI logic */ | 
 | 508 | void local_touch_nmi(void) | 
 | 509 | { | 
 | 510 | 	__this_cpu_write(last_nmi_rip, 0); | 
 | 511 | } |