| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * ras.c | 
 | 3 |  * Copyright (C) 2001 Dave Engebretsen IBM Corporation | 
 | 4 |  *  | 
 | 5 |  * This program is free software; you can redistribute it and/or modify | 
 | 6 |  * it under the terms of the GNU General Public License as published by | 
 | 7 |  * the Free Software Foundation; either version 2 of the License, or | 
 | 8 |  * (at your option) any later version. | 
 | 9 |  *  | 
 | 10 |  * This program is distributed in the hope that it will be useful, | 
 | 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 | 13 |  * GNU General Public License for more details. | 
 | 14 |  *  | 
 | 15 |  * You should have received a copy of the GNU General Public License | 
 | 16 |  * along with this program; if not, write to the Free Software | 
 | 17 |  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA | 
 | 18 |  */ | 
 | 19 |  | 
 | 20 | /* Change Activity: | 
 | 21 |  * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. | 
 | 22 |  * End Change Activity  | 
 | 23 |  */ | 
 | 24 |  | 
 | 25 | #include <linux/errno.h> | 
 | 26 | #include <linux/threads.h> | 
 | 27 | #include <linux/kernel_stat.h> | 
 | 28 | #include <linux/signal.h> | 
 | 29 | #include <linux/sched.h> | 
 | 30 | #include <linux/ioport.h> | 
 | 31 | #include <linux/interrupt.h> | 
 | 32 | #include <linux/timex.h> | 
 | 33 | #include <linux/init.h> | 
 | 34 | #include <linux/slab.h> | 
 | 35 | #include <linux/pci.h> | 
 | 36 | #include <linux/delay.h> | 
 | 37 | #include <linux/irq.h> | 
 | 38 | #include <linux/random.h> | 
 | 39 | #include <linux/sysrq.h> | 
 | 40 | #include <linux/bitops.h> | 
 | 41 |  | 
 | 42 | #include <asm/uaccess.h> | 
 | 43 | #include <asm/system.h> | 
 | 44 | #include <asm/io.h> | 
 | 45 | #include <asm/pgtable.h> | 
 | 46 | #include <asm/irq.h> | 
 | 47 | #include <asm/cache.h> | 
 | 48 | #include <asm/prom.h> | 
 | 49 | #include <asm/ptrace.h> | 
 | 50 | #include <asm/iSeries/LparData.h> | 
 | 51 | #include <asm/machdep.h> | 
 | 52 | #include <asm/rtas.h> | 
 | 53 | #include <asm/ppcdebug.h> | 
 | 54 |  | 
 | 55 | static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; | 
 | 56 | static DEFINE_SPINLOCK(ras_log_buf_lock); | 
 | 57 |  | 
 | 58 | char mce_data_buf[RTAS_ERROR_LOG_MAX] | 
 | 59 | ; | 
 | 60 | /* This is true if we are using the firmware NMI handler (typically LPAR) */ | 
 | 61 | extern int fwnmi_active; | 
 | 62 |  | 
 | 63 | extern void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr); | 
 | 64 |  | 
 | 65 | static int ras_get_sensor_state_token; | 
 | 66 | static int ras_check_exception_token; | 
 | 67 |  | 
 | 68 | #define EPOW_SENSOR_TOKEN	9 | 
 | 69 | #define EPOW_SENSOR_INDEX	0 | 
 | 70 | #define RAS_VECTOR_OFFSET	0x500 | 
 | 71 |  | 
 | 72 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id, | 
 | 73 | 					struct pt_regs * regs); | 
 | 74 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id, | 
 | 75 | 					struct pt_regs * regs); | 
 | 76 |  | 
 | 77 | /* #define DEBUG */ | 
 | 78 |  | 
 | 79 | static void request_ras_irqs(struct device_node *np, char *propname, | 
 | 80 | 			irqreturn_t (*handler)(int, void *, struct pt_regs *), | 
 | 81 | 			const char *name) | 
 | 82 | { | 
 | 83 | 	unsigned int *ireg, len, i; | 
 | 84 | 	int virq, n_intr; | 
 | 85 |  | 
 | 86 | 	ireg = (unsigned int *)get_property(np, propname, &len); | 
 | 87 | 	if (ireg == NULL) | 
 | 88 | 		return; | 
 | 89 | 	n_intr = prom_n_intr_cells(np); | 
 | 90 | 	len /= n_intr * sizeof(*ireg); | 
 | 91 |  | 
 | 92 | 	for (i = 0; i < len; i++) { | 
 | 93 | 		virq = virt_irq_create_mapping(*ireg); | 
 | 94 | 		if (virq == NO_IRQ) { | 
 | 95 | 			printk(KERN_ERR "Unable to allocate interrupt " | 
 | 96 | 			       "number for %s\n", np->full_name); | 
 | 97 | 			return; | 
 | 98 | 		} | 
 | 99 | 		if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { | 
 | 100 | 			printk(KERN_ERR "Unable to request interrupt %d for " | 
 | 101 | 			       "%s\n", irq_offset_up(virq), np->full_name); | 
 | 102 | 			return; | 
 | 103 | 		} | 
 | 104 | 		ireg += n_intr; | 
 | 105 | 	} | 
 | 106 | } | 
 | 107 |  | 
 | 108 | /* | 
 | 109 |  * Initialize handlers for the set of interrupts caused by hardware errors | 
 | 110 |  * and power system events. | 
 | 111 |  */ | 
 | 112 | static int __init init_ras_IRQ(void) | 
 | 113 | { | 
 | 114 | 	struct device_node *np; | 
 | 115 |  | 
 | 116 | 	ras_get_sensor_state_token = rtas_token("get-sensor-state"); | 
 | 117 | 	ras_check_exception_token = rtas_token("check-exception"); | 
 | 118 |  | 
 | 119 | 	/* Internal Errors */ | 
 | 120 | 	np = of_find_node_by_path("/event-sources/internal-errors"); | 
 | 121 | 	if (np != NULL) { | 
 | 122 | 		request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, | 
 | 123 | 				 "RAS_ERROR"); | 
 | 124 | 		request_ras_irqs(np, "interrupts", ras_error_interrupt, | 
 | 125 | 				 "RAS_ERROR"); | 
 | 126 | 		of_node_put(np); | 
 | 127 | 	} | 
 | 128 |  | 
 | 129 | 	/* EPOW Events */ | 
 | 130 | 	np = of_find_node_by_path("/event-sources/epow-events"); | 
 | 131 | 	if (np != NULL) { | 
 | 132 | 		request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, | 
 | 133 | 				 "RAS_EPOW"); | 
 | 134 | 		request_ras_irqs(np, "interrupts", ras_epow_interrupt, | 
 | 135 | 				 "RAS_EPOW"); | 
 | 136 | 		of_node_put(np); | 
 | 137 | 	} | 
 | 138 |  | 
 | 139 | 	return 1; | 
 | 140 | } | 
 | 141 | __initcall(init_ras_IRQ); | 
 | 142 |  | 
 | 143 | /* | 
 | 144 |  * Handle power subsystem events (EPOW). | 
 | 145 |  * | 
 | 146 |  * Presently we just log the event has occurred.  This should be fixed | 
 | 147 |  * to examine the type of power failure and take appropriate action where | 
 | 148 |  * the time horizon permits something useful to be done. | 
 | 149 |  */ | 
 | 150 | static irqreturn_t | 
 | 151 | ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) | 
 | 152 | { | 
 | 153 | 	int status = 0xdeadbeef; | 
 | 154 | 	int state = 0; | 
 | 155 | 	int critical; | 
 | 156 |  | 
 | 157 | 	status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, | 
 | 158 | 			   EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); | 
 | 159 |  | 
 | 160 | 	if (state > 3) | 
 | 161 | 		critical = 1;  /* Time Critical */ | 
 | 162 | 	else | 
 | 163 | 		critical = 0; | 
 | 164 |  | 
 | 165 | 	spin_lock(&ras_log_buf_lock); | 
 | 166 |  | 
 | 167 | 	status = rtas_call(ras_check_exception_token, 6, 1, NULL, | 
 | 168 | 			   RAS_VECTOR_OFFSET, | 
 | 169 | 			   virt_irq_to_real(irq_offset_down(irq)), | 
 | 170 | 			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, | 
 | 171 | 			   critical, __pa(&ras_log_buf), | 
 | 172 | 				rtas_get_error_log_max()); | 
 | 173 |  | 
 | 174 | 	udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", | 
 | 175 | 		    *((unsigned long *)&ras_log_buf), status, state); | 
 | 176 | 	printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", | 
 | 177 | 	       *((unsigned long *)&ras_log_buf), status, state); | 
 | 178 |  | 
 | 179 | 	/* format and print the extended information */ | 
 | 180 | 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); | 
 | 181 |  | 
 | 182 | 	spin_unlock(&ras_log_buf_lock); | 
 | 183 | 	return IRQ_HANDLED; | 
 | 184 | } | 
 | 185 |  | 
 | 186 | /* | 
 | 187 |  * Handle hardware error interrupts. | 
 | 188 |  * | 
 | 189 |  * RTAS check-exception is called to collect data on the exception.  If | 
 | 190 |  * the error is deemed recoverable, we log a warning and return. | 
 | 191 |  * For nonrecoverable errors, an error is logged and we stop all processing | 
 | 192 |  * as quickly as possible in order to prevent propagation of the failure. | 
 | 193 |  */ | 
 | 194 | static irqreturn_t | 
 | 195 | ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) | 
 | 196 | { | 
 | 197 | 	struct rtas_error_log *rtas_elog; | 
 | 198 | 	int status = 0xdeadbeef; | 
 | 199 | 	int fatal; | 
 | 200 |  | 
 | 201 | 	spin_lock(&ras_log_buf_lock); | 
 | 202 |  | 
 | 203 | 	status = rtas_call(ras_check_exception_token, 6, 1, NULL, | 
 | 204 | 			   RAS_VECTOR_OFFSET, | 
 | 205 | 			   virt_irq_to_real(irq_offset_down(irq)), | 
 | 206 | 			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */, | 
 | 207 | 			   __pa(&ras_log_buf), | 
 | 208 | 				rtas_get_error_log_max()); | 
 | 209 |  | 
 | 210 | 	rtas_elog = (struct rtas_error_log *)ras_log_buf; | 
 | 211 |  | 
 | 212 | 	if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) | 
 | 213 | 		fatal = 1; | 
 | 214 | 	else | 
 | 215 | 		fatal = 0; | 
 | 216 |  | 
 | 217 | 	/* format and print the extended information */ | 
 | 218 | 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); | 
 | 219 |  | 
 | 220 | 	if (fatal) { | 
 | 221 | 		udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", | 
 | 222 | 			    *((unsigned long *)&ras_log_buf), status); | 
 | 223 | 		printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", | 
 | 224 | 		       *((unsigned long *)&ras_log_buf), status); | 
 | 225 |  | 
 | 226 | #ifndef DEBUG | 
 | 227 | 		/* Don't actually power off when debugging so we can test | 
 | 228 | 		 * without actually failing while injecting errors. | 
 | 229 | 		 * Error data will not be logged to syslog. | 
 | 230 | 		 */ | 
 | 231 | 		ppc_md.power_off(); | 
 | 232 | #endif | 
 | 233 | 	} else { | 
 | 234 | 		udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", | 
 | 235 | 			    *((unsigned long *)&ras_log_buf), status); | 
 | 236 | 		printk(KERN_WARNING | 
 | 237 | 		       "Warning: Recoverable hardware error <0x%lx 0x%x>\n", | 
 | 238 | 		       *((unsigned long *)&ras_log_buf), status); | 
 | 239 | 	} | 
 | 240 |  | 
 | 241 | 	spin_unlock(&ras_log_buf_lock); | 
 | 242 | 	return IRQ_HANDLED; | 
 | 243 | } | 
 | 244 |  | 
 | 245 | /* Get the error information for errors coming through the | 
 | 246 |  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect | 
 | 247 |  * the actual r3 if possible, and a ptr to the error log entry | 
 | 248 |  * will be returned if found. | 
 | 249 |  * | 
 | 250 |  * The mce_data_buf does not have any locks or protection around it, | 
 | 251 |  * if a second machine check comes in, or a system reset is done | 
 | 252 |  * before we have logged the error, then we will get corruption in the | 
 | 253 |  * error log.  This is preferable over holding off on calling | 
 | 254 |  * ibm,nmi-interlock which would result in us checkstopping if a | 
 | 255 |  * second machine check did come in. | 
 | 256 |  */ | 
 | 257 | static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) | 
 | 258 | { | 
 | 259 | 	unsigned long errdata = regs->gpr[3]; | 
 | 260 | 	struct rtas_error_log *errhdr = NULL; | 
 | 261 | 	unsigned long *savep; | 
 | 262 |  | 
 | 263 | 	if ((errdata >= 0x7000 && errdata < 0x7fff0) || | 
 | 264 | 	    (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { | 
 | 265 | 		savep = __va(errdata); | 
 | 266 | 		regs->gpr[3] = savep[0];	/* restore original r3 */ | 
 | 267 | 		memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); | 
 | 268 | 		memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); | 
 | 269 | 		errhdr = (struct rtas_error_log *)mce_data_buf; | 
 | 270 | 	} else { | 
 | 271 | 		printk("FWNMI: corrupt r3\n"); | 
 | 272 | 	} | 
 | 273 | 	return errhdr; | 
 | 274 | } | 
 | 275 |  | 
 | 276 | /* Call this when done with the data returned by FWNMI_get_errinfo. | 
 | 277 |  * It will release the saved data area for other CPUs in the | 
 | 278 |  * partition to receive FWNMI errors. | 
 | 279 |  */ | 
 | 280 | static void fwnmi_release_errinfo(void) | 
 | 281 | { | 
 | 282 | 	int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); | 
 | 283 | 	if (ret != 0) | 
 | 284 | 		printk("FWNMI: nmi-interlock failed: %d\n", ret); | 
 | 285 | } | 
 | 286 |  | 
 | 287 | void pSeries_system_reset_exception(struct pt_regs *regs) | 
 | 288 | { | 
 | 289 | 	if (fwnmi_active) { | 
 | 290 | 		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); | 
 | 291 | 		if (errhdr) { | 
 | 292 | 			/* XXX Should look at FWNMI information */ | 
 | 293 | 		} | 
 | 294 | 		fwnmi_release_errinfo(); | 
 | 295 | 	} | 
 | 296 | } | 
 | 297 |  | 
 | 298 | /* | 
 | 299 |  * See if we can recover from a machine check exception. | 
 | 300 |  * This is only called on power4 (or above) and only via | 
 | 301 |  * the Firmware Non-Maskable Interrupts (fwnmi) handler | 
 | 302 |  * which provides the error analysis for us. | 
 | 303 |  * | 
 | 304 |  * Return 1 if corrected (or delivered a signal). | 
 | 305 |  * Return 0 if there is nothing we can do. | 
 | 306 |  */ | 
 | 307 | static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) | 
 | 308 | { | 
 | 309 | 	int nonfatal = 0; | 
 | 310 |  | 
 | 311 | 	if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { | 
 | 312 | 		/* Platform corrected itself */ | 
 | 313 | 		nonfatal = 1; | 
 | 314 | 	} else if ((regs->msr & MSR_RI) && | 
 | 315 | 		   user_mode(regs) && | 
 | 316 | 		   err->severity == RTAS_SEVERITY_ERROR_SYNC && | 
 | 317 | 		   err->disposition == RTAS_DISP_NOT_RECOVERED && | 
 | 318 | 		   err->target == RTAS_TARGET_MEMORY && | 
 | 319 | 		   err->type == RTAS_TYPE_ECC_UNCORR && | 
 | 320 | 		   !(current->pid == 0 || current->pid == 1)) { | 
 | 321 | 		/* Kill off a user process with an ECC error */ | 
 | 322 | 		printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", | 
 | 323 | 		       current->pid); | 
 | 324 | 		/* XXX something better for ECC error? */ | 
 | 325 | 		_exception(SIGBUS, regs, BUS_ADRERR, regs->nip); | 
 | 326 | 		nonfatal = 1; | 
 | 327 | 	} | 
 | 328 |  | 
 | 329 |  	log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal); | 
 | 330 |  | 
 | 331 | 	return nonfatal; | 
 | 332 | } | 
 | 333 |  | 
 | 334 | /* | 
 | 335 |  * Handle a machine check. | 
 | 336 |  * | 
 | 337 |  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) | 
 | 338 |  * should be present.  If so the handler which called us tells us if the | 
 | 339 |  * error was recovered (never true if RI=0). | 
 | 340 |  * | 
 | 341 |  * On hardware prior to Power 4 these exceptions were asynchronous which | 
 | 342 |  * means we can't tell exactly where it occurred and so we can't recover. | 
 | 343 |  */ | 
 | 344 | int pSeries_machine_check_exception(struct pt_regs *regs) | 
 | 345 | { | 
 | 346 | 	struct rtas_error_log *errp; | 
 | 347 |  | 
 | 348 | 	if (fwnmi_active) { | 
 | 349 | 		errp = fwnmi_get_errinfo(regs); | 
 | 350 | 		fwnmi_release_errinfo(); | 
 | 351 | 		if (errp && recover_mce(regs, errp)) | 
 | 352 | 			return 1; | 
 | 353 | 	} | 
 | 354 |  | 
 | 355 | 	return 0; | 
 | 356 | } |