Blame - arch/ppc64/kernel/eeh.c - android_kernel_oneplus_msm8996

blob: bb11569d2b4be10b351957ded3224f9731825f69 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* eeh.c
				3	* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	4	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	9	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	* GNU General Public License for more details.
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	14	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15	* You should have received a copy of the GNU General Public License
				16	* along with this program; if not, write to the Free Software
				17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				18	*/
				19
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include <linux/init.h>
				21	#include <linux/list.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include <linux/notifier.h>
				23	#include <linux/pci.h>
				24	#include <linux/proc_fs.h>
				25	#include <linux/rbtree.h>
				26	#include <linux/seq_file.h>
				27	#include <linux/spinlock.h>
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	28	#include <asm/atomic.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	29	#include <asm/eeh.h>
				30	#include <asm/io.h>
				31	#include <asm/machdep.h>
				32	#include <asm/rtas.h>
				33	#include <asm/atomic.h>
				34	#include <asm/systemcfg.h>
Stephen Rothwell	d387899	2005-09-28 02:50:25 +1000	[diff] [blame]	35	#include <asm/ppc-pci.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	36
				37	#undef DEBUG
				38
				39	/** Overview:
				40	* EEH, or "Extended Error Handling" is a PCI bridge technology for
				41	* dealing with PCI bus errors that can't be dealt with within the
				42	* usual PCI framework, except by check-stopping the CPU. Systems
				43	* that are designed for high-availability/reliability cannot afford
				44	* to crash due to a "mere" PCI error, thus the need for EEH.
				45	* An EEH-capable bridge operates by converting a detected error
				46	* into a "slot freeze", taking the PCI adapter off-line, making
				47	* the slot behave, from the OS'es point of view, as if the slot
				48	* were "empty": all reads return 0xff's and all writes are silently
				49	* ignored. EEH slot isolation events can be triggered by parity
				50	* errors on the address or data busses (e.g. during posted writes),
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	51	* which in turn might be caused by low voltage on the bus, dust,
				52	* vibration, humidity, radioactivity or plain-old failed hardware.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	*
				54	* Note, however, that one of the leading causes of EEH slot
				55	* freeze events are buggy device drivers, buggy device microcode,
				56	* or buggy device hardware. This is because any attempt by the
				57	* device to bus-master data to a memory address that is not
				58	* assigned to the device will trigger a slot freeze. (The idea
				59	* is to prevent devices-gone-wild from corrupting system memory).
				60	* Buggy hardware/drivers will have a miserable time co-existing
				61	* with EEH.
				62	*
				63	* Ideally, a PCI device driver, when suspecting that an isolation
				64	* event has occured (e.g. by reading 0xff's), will then ask EEH
				65	* whether this is the case, and then take appropriate steps to
				66	* reset the PCI slot, the PCI device, and then resume operations.
				67	* However, until that day, the checking is done here, with the
				68	* eeh_check_failure() routine embedded in the MMIO macros. If
				69	* the slot is found to be isolated, an "EEH Event" is synthesized
				70	* and sent out for processing.
				71	*/
				72
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	73	/* EEH event workqueue setup. */
				74	static DEFINE_SPINLOCK(eeh_eventlist_lock);
				75	LIST_HEAD(eeh_eventlist);
				76	static void eeh_event_handler(void *);
				77	DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
				78
				79	static struct notifier_block *eeh_notifier_chain;
				80
				81	/*
				82	* If a device driver keeps reading an MMIO register in an interrupt
				83	* handler after a slot isolation event has occurred, we assume it
				84	* is broken and panic. This sets the threshold for how many read
				85	* attempts we allow before panicking.
				86	*/
				87	#define EEH_MAX_FAILS 1000
				88	static atomic_t eeh_fail_count;
				89
				90	/* RTAS tokens */
				91	static int ibm_set_eeh_option;
				92	static int ibm_set_slot_reset;
				93	static int ibm_read_slot_reset_state;
				94	static int ibm_read_slot_reset_state2;
				95	static int ibm_slot_error_detail;
				96
				97	static int eeh_subsystem_enabled;
				98
				99	/* Buffer for reporting slot-error-detail rtas calls */
				100	static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
				101	static DEFINE_SPINLOCK(slot_errbuf_lock);
				102	static int eeh_error_buf_size;
				103
				104	/* System monitoring statistics */
				105	static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
				106	static DEFINE_PER_CPU(unsigned long, false_positives);
				107	static DEFINE_PER_CPU(unsigned long, ignored_failures);
				108	static DEFINE_PER_CPU(unsigned long, slot_resets);
				109
				110	/**
				111	* The pci address cache subsystem. This subsystem places
				112	* PCI device address resources into a red-black tree, sorted
				113	* according to the address range, so that given only an i/o
				114	* address, the corresponding PCI device can be quickly
				115	* found. It is safe to perform an address lookup in an interrupt
				116	* context; this ability is an important feature.
				117	*
				118	* Currently, the only customer of this code is the EEH subsystem;
				119	* thus, this code has been somewhat tailored to suit EEH better.
				120	* In particular, the cache does not hold the addresses of devices
				121	* for which EEH is not enabled.
				122	*
				123	* (Implementation Note: The RB tree seems to be better/faster
				124	* than any hash algo I could think of for this problem, even
				125	* with the penalty of slow pointer chases for d-cache misses).
				126	*/
				127	struct pci_io_addr_range
				128	{
				129	struct rb_node rb_node;
				130	unsigned long addr_lo;
				131	unsigned long addr_hi;
				132	struct pci_dev *pcidev;
				133	unsigned int flags;
				134	};
				135
				136	static struct pci_io_addr_cache
				137	{
				138	struct rb_root rb_root;
				139	spinlock_t piar_lock;
				140	} pci_io_addr_cache_root;
				141
				142	static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
				143	{
				144	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
				145
				146	while (n) {
				147	struct pci_io_addr_range *piar;
				148	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
				149
				150	if (addr < piar->addr_lo) {
				151	n = n->rb_left;
				152	} else {
				153	if (addr > piar->addr_hi) {
				154	n = n->rb_right;
				155	} else {
				156	pci_dev_get(piar->pcidev);
				157	return piar->pcidev;
				158	}
				159	}
				160	}
				161
				162	return NULL;
				163	}
				164
				165	/**
				166	* pci_get_device_by_addr - Get device, given only address
				167	* @addr: mmio (PIO) phys address or i/o port number
				168	*
				169	* Given an mmio phys address, or a port number, find a pci device
				170	* that implements this address. Be sure to pci_dev_put the device
				171	* when finished. I/O port numbers are assumed to be offset
				172	* from zero (that is, they do not have pci_io_addr added in).
				173	* It is safe to call this function within an interrupt.
				174	*/
				175	static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
				176	{
				177	struct pci_dev *dev;
				178	unsigned long flags;
				179
				180	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				181	dev = __pci_get_device_by_addr(addr);
				182	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				183	return dev;
				184	}
				185
				186	#ifdef DEBUG
				187	/*
				188	* Handy-dandy debug print routine, does nothing more
				189	* than print out the contents of our addr cache.
				190	*/
				191	static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
				192	{
				193	struct rb_node *n;
				194	int cnt = 0;
				195
				196	n = rb_first(&cache->rb_root);
				197	while (n) {
				198	struct pci_io_addr_range *piar;
				199	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	200	printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201	(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	202	piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	203	cnt++;
				204	n = rb_next(n);
				205	}
				206	}
				207	#endif
				208
				209	/* Insert address range into the rb tree. */
				210	static struct pci_io_addr_range *
				211	pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
				212	unsigned long ahi, unsigned int flags)
				213	{
				214	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
				215	struct rb_node *parent = NULL;
				216	struct pci_io_addr_range *piar;
				217
				218	/* Walk tree, find a place to insert into tree */
				219	while (*p) {
				220	parent = *p;
				221	piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
				222	if (alo < piar->addr_lo) {
				223	p = &parent->rb_left;
				224	} else if (ahi > piar->addr_hi) {
				225	p = &parent->rb_right;
				226	} else {
				227	if (dev != piar->pcidev \|\|
				228	alo != piar->addr_lo \|\| ahi != piar->addr_hi) {
				229	printk(KERN_WARNING "PIAR: overlapping address range\n");
				230	}
				231	return piar;
				232	}
				233	}
				234	piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
				235	if (!piar)
				236	return NULL;
				237
				238	piar->addr_lo = alo;
				239	piar->addr_hi = ahi;
				240	piar->pcidev = dev;
				241	piar->flags = flags;
				242
				243	rb_link_node(&piar->rb_node, parent, p);
				244	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
				245
				246	return piar;
				247	}
				248
				249	static void __pci_addr_cache_insert_device(struct pci_dev *dev)
				250	{
				251	struct device_node *dn;
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	252	struct pci_dn *pdn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	253	int i;
				254	int inserted = 0;
				255
				256	dn = pci_device_to_OF_node(dev);
				257	if (!dn) {
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	258	printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	259	return;
				260	}
				261
				262	/* Skip any devices for which EEH is not enabled. */
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	263	pdn = PCI_DN(dn);
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	264	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) \|\|
				265	pdn->eeh_mode & EEH_MODE_NOCHECK) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	266	#ifdef DEBUG
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	267	printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
				268	pci_name(dev), pdn->node->full_name);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	#endif
				270	return;
				271	}
				272
				273	/* The cache holds a reference to the device... */
				274	pci_dev_get(dev);
				275
				276	/* Walk resources on this device, poke them into the tree */
				277	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
				278	unsigned long start = pci_resource_start(dev,i);
				279	unsigned long end = pci_resource_end(dev,i);
				280	unsigned int flags = pci_resource_flags(dev,i);
				281
				282	/* We are interested only bus addresses, not dma or other stuff */
				283	if (0 == (flags & (IORESOURCE_IO \| IORESOURCE_MEM)))
				284	continue;
				285	if (start == 0 \|\| ~start == 0 \|\| end == 0 \|\| ~end == 0)
				286	continue;
				287	pci_addr_cache_insert(dev, start, end, flags);
				288	inserted = 1;
				289	}
				290
				291	/* If there was nothing to add, the cache has no reference... */
				292	if (!inserted)
				293	pci_dev_put(dev);
				294	}
				295
				296	/**
				297	* pci_addr_cache_insert_device - Add a device to the address cache
				298	* @dev: PCI device whose I/O addresses we are interested in.
				299	*
				300	* In order to support the fast lookup of devices based on addresses,
				301	* we maintain a cache of devices that can be quickly searched.
				302	* This routine adds a device to that cache.
				303	*/
				304	void pci_addr_cache_insert_device(struct pci_dev *dev)
				305	{
				306	unsigned long flags;
				307
				308	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				309	__pci_addr_cache_insert_device(dev);
				310	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				311	}
				312
				313	static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
				314	{
				315	struct rb_node *n;
				316	int removed = 0;
				317
				318	restart:
				319	n = rb_first(&pci_io_addr_cache_root.rb_root);
				320	while (n) {
				321	struct pci_io_addr_range *piar;
				322	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
				323
				324	if (piar->pcidev == dev) {
				325	rb_erase(n, &pci_io_addr_cache_root.rb_root);
				326	removed = 1;
				327	kfree(piar);
				328	goto restart;
				329	}
				330	n = rb_next(n);
				331	}
				332
				333	/* The cache no longer holds its reference to this device... */
				334	if (removed)
				335	pci_dev_put(dev);
				336	}
				337
				338	/**
				339	* pci_addr_cache_remove_device - remove pci device from addr cache
				340	* @dev: device to remove
				341	*
				342	* Remove a device from the addr-cache tree.
				343	* This is potentially expensive, since it will walk
				344	* the tree multiple times (once per resource).
				345	* But so what; device removal doesn't need to be that fast.
				346	*/
				347	void pci_addr_cache_remove_device(struct pci_dev *dev)
				348	{
				349	unsigned long flags;
				350
				351	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				352	__pci_addr_cache_remove_device(dev);
				353	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				354	}
				355
				356	/**
				357	* pci_addr_cache_build - Build a cache of I/O addresses
				358	*
				359	* Build a cache of pci i/o addresses. This cache will be used to
				360	* find the pci device that corresponds to a given address.
				361	* This routine scans all pci busses to build the cache.
				362	* Must be run late in boot process, after the pci controllers
				363	* have been scaned for devices (after all device resources are known).
				364	*/
				365	void __init pci_addr_cache_build(void)
				366	{
				367	struct pci_dev *dev = NULL;
				368
				369	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
				370
				371	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
				372	/* Ignore PCI bridges ( XXX why ??) */
				373	if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
				374	continue;
				375	}
				376	pci_addr_cache_insert_device(dev);
				377	}
				378
				379	#ifdef DEBUG
				380	/* Verify tree built up above, echo back the list of addrs. */
				381	pci_addr_cache_print(&pci_io_addr_cache_root);
				382	#endif
				383	}
				384
				385	/* --------------------------------------------------------------- */
				386	/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
				387
				388	/**
				389	* eeh_register_notifier - Register to find out about EEH events.
				390	* @nb: notifier block to callback on events
				391	*/
				392	int eeh_register_notifier(struct notifier_block *nb)
				393	{
				394	return notifier_chain_register(&eeh_notifier_chain, nb);
				395	}
				396
				397	/**
				398	* eeh_unregister_notifier - Unregister to an EEH event notifier.
				399	* @nb: notifier block to callback on events
				400	*/
				401	int eeh_unregister_notifier(struct notifier_block *nb)
				402	{
				403	return notifier_chain_unregister(&eeh_notifier_chain, nb);
				404	}
				405
				406	/**
				407	* read_slot_reset_state - Read the reset state of a device node's slot
				408	* @dn: device node to read
				409	* @rets: array to return results in
				410	*/
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	411	static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	412	{
				413	int token, outputs;
				414
				415	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
				416	token = ibm_read_slot_reset_state2;
				417	outputs = 4;
				418	} else {
				419	token = ibm_read_slot_reset_state;
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	420	rets[2] = 0; /* fake PE Unavailable info */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	421	outputs = 3;
				422	}
				423
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	424	return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
				425	BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	426	}
				427
				428	/**
				429	* eeh_panic - call panic() for an eeh event that cannot be handled.
				430	* The philosophy of this routine is that it is better to panic and
				431	* halt the OS than it is to risk possible data corruption by
				432	* oblivious device drivers that don't know better.
				433	*
				434	* @dev pci device that had an eeh event
				435	* @reset_state current reset state of the device slot
				436	*/
				437	static void eeh_panic(struct pci_dev *dev, int reset_state)
				438	{
				439	/*
				440	* XXX We should create a separate sysctl for this.
				441	*
				442	* Since the panic_on_oops sysctl is used to halt the system
				443	* in light of potential corruption, we can use it here.
				444	*/
				445	if (panic_on_oops)
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	446	panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
				447	pci_name(dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	448	else {
				449	__get_cpu_var(ignored_failures)++;
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	450	printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
				451	reset_state, pci_name(dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	452	}
				453	}
				454
				455	/**
				456	* eeh_event_handler - dispatch EEH events. The detection of a frozen
				457	* slot can occur inside an interrupt, where it can be hard to do
				458	* anything about it. The goal of this routine is to pull these
				459	* detection events out of the context of the interrupt handler, and
				460	* re-dispatch them for processing at a later time in a normal context.
				461	*
				462	* @dummy - unused
				463	*/
				464	static void eeh_event_handler(void *dummy)
				465	{
				466	unsigned long flags;
				467	struct eeh_event *event;
				468
				469	while (1) {
				470	spin_lock_irqsave(&eeh_eventlist_lock, flags);
				471	event = NULL;
				472	if (!list_empty(&eeh_eventlist)) {
				473	event = list_entry(eeh_eventlist.next, struct eeh_event, list);
				474	list_del(&event->list);
				475	}
				476	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
				477	if (event == NULL)
				478	break;
				479
				480	printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	481	"%s\n", event->reset_state,
				482	pci_name(event->dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	483
				484	atomic_set(&eeh_fail_count, 0);
				485	notifier_call_chain (&eeh_notifier_chain,
				486	EEH_NOTIFY_FREEZE, event);
				487
				488	__get_cpu_var(slot_resets)++;
				489
				490	pci_dev_put(event->dev);
				491	kfree(event);
				492	}
				493	}
				494
				495	/**
				496	* eeh_token_to_phys - convert EEH address token to phys address
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	497	* @token i/o token, should be address in the form 0xA....
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	498	*/
				499	static inline unsigned long eeh_token_to_phys(unsigned long token)
				500	{
				501	pte_t *ptep;
				502	unsigned long pa;
				503
David Gibson	20cee16	2005-06-21 17:15:31 -0700	[diff] [blame]	504	ptep = find_linux_pte(init_mm.pgd, token);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505	if (!ptep)
				506	return token;
				507	pa = pte_pfn(*ptep) << PAGE_SHIFT;
				508
				509	return pa \| (token & (PAGE_SIZE-1));
				510	}
				511
				512	/**
				513	* eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
				514	* @dn device node
				515	* @dev pci device, if known
				516	*
				517	* Check for an EEH failure for the given device node. Call this
				518	* routine if the result of a read was all 0xff's and you want to
				519	* find out if this is due to an EEH slot freeze. This routine
				520	* will query firmware for the EEH status.
				521	*
				522	* Returns 0 if there has not been an EEH error; otherwise returns
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	523	* a non-zero value and queues up a slot isolation event notification.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	*
				525	* It is safe to call this routine in an interrupt context.
				526	*/
				527	int eeh_dn_check_failure(struct device_node dn, struct pci_dev dev)
				528	{
				529	int ret;
				530	int rets[3];
				531	unsigned long flags;
				532	int rc, reset_state;
				533	struct eeh_event *event;
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	534	struct pci_dn *pdn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	535
				536	__get_cpu_var(total_mmio_ffs)++;
				537
				538	if (!eeh_subsystem_enabled)
				539	return 0;
				540
				541	if (!dn)
				542	return 0;
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	543	pdn = PCI_DN(dn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	544
				545	/* Access to IO BARs might get this far and still not want checking. */
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	546	if (!pdn->eeh_capable \|\| !(pdn->eeh_mode & EEH_MODE_SUPPORTED) \|\|
				547	pdn->eeh_mode & EEH_MODE_NOCHECK) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548	return 0;
				549	}
				550
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	551	if (!pdn->eeh_config_addr) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	552	return 0;
				553	}
				554
				555	/*
				556	* If we already have a pending isolation event for this
				557	* slot, we know it's bad already, we don't need to check...
				558	*/
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	559	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560	atomic_inc(&eeh_fail_count);
				561	if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
				562	/* re-read the slot reset state */
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	563	if (read_slot_reset_state(pdn, rets) != 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	564	rets[0] = -1; /* reset state unknown */
				565	eeh_panic(dev, rets[0]);
				566	}
				567	return 0;
				568	}
				569
				570	/*
				571	* Now test for an EEH failure. This is VERY expensive.
				572	* Note that the eeh_config_addr may be a parent device
				573	* in the case of a device behind a bridge, or it may be
				574	* function zero of a multi-function device.
				575	* In any case they must share a common PHB.
				576	*/
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	577	ret = read_slot_reset_state(pdn, rets);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	578	if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 \|\| rets[0] == 4))) {
				579	__get_cpu_var(false_positives)++;
				580	return 0;
				581	}
				582
				583	/* prevent repeated reports of this failure */
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	584	pdn->eeh_mode \|= EEH_MODE_ISOLATED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	585
				586	reset_state = rets[0];
				587
				588	spin_lock_irqsave(&slot_errbuf_lock, flags);
				589	memset(slot_errbuf, 0, eeh_error_buf_size);
				590
				591	rc = rtas_call(ibm_slot_error_detail,
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	592	8, 1, NULL, pdn->eeh_config_addr,
				593	BUID_HI(pdn->phb->buid),
				594	BUID_LO(pdn->phb->buid), NULL, 0,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	595	virt_to_phys(slot_errbuf),
				596	eeh_error_buf_size,
				597	1 /* Temporary Error */);
				598
				599	if (rc == 0)
				600	log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
				601	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
				602
				603	printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
				604	rets[0], dn->name, dn->full_name);
				605	event = kmalloc(sizeof(*event), GFP_ATOMIC);
				606	if (event == NULL) {
				607	eeh_panic(dev, reset_state);
				608	return 1;
				609	}
				610
				611	event->dev = dev;
				612	event->dn = dn;
				613	event->reset_state = reset_state;
				614
				615	/* We may or may not be called in an interrupt context */
				616	spin_lock_irqsave(&eeh_eventlist_lock, flags);
				617	list_add(&event->list, &eeh_eventlist);
				618	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
				619
				620	/* Most EEH events are due to device driver bugs. Having
				621	* a stack trace will help the device-driver authors figure
				622	* out what happened. So print that out. */
				623	dump_stack();
				624	schedule_work(&eeh_event_wq);
				625
				626	return 0;
				627	}
				628
				629	EXPORT_SYMBOL(eeh_dn_check_failure);
				630
				631	/**
				632	* eeh_check_failure - check if all 1's data is due to EEH slot freeze
				633	* @token i/o token, should be address in the form 0xA....
				634	* @val value, should be all 1's (XXX why do we need this arg??)
				635	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636	* Check for an EEH failure at the given token address. Call this
				637	* routine if the result of a read was all 0xff's and you want to
				638	* find out if this is due to an EEH slot freeze event. This routine
				639	* will query firmware for the EEH status.
				640	*
				641	* Note this routine is safe to call in an interrupt context.
				642	*/
				643	unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
				644	{
				645	unsigned long addr;
				646	struct pci_dev *dev;
				647	struct device_node *dn;
				648
				649	/* Finding the phys addr + pci device; this is pretty quick. */
				650	addr = eeh_token_to_phys((unsigned long __force) token);
				651	dev = pci_get_device_by_addr(addr);
				652	if (!dev)
				653	return val;
				654
				655	dn = pci_device_to_OF_node(dev);
				656	eeh_dn_check_failure (dn, dev);
				657
				658	pci_dev_put(dev);
				659	return val;
				660	}
				661
				662	EXPORT_SYMBOL(eeh_check_failure);
				663
				664	struct eeh_early_enable_info {
				665	unsigned int buid_hi;
				666	unsigned int buid_lo;
				667	};
				668
				669	/* Enable eeh for the given device node. */
				670	static void early_enable_eeh(struct device_node dn, void *data)
				671	{
				672	struct eeh_early_enable_info *info = data;
				673	int ret;
				674	char *status = get_property(dn, "status", NULL);
				675	u32 class_code = (u32 )get_property(dn, "class-code", NULL);
				676	u32 vendor_id = (u32 )get_property(dn, "vendor-id", NULL);
				677	u32 device_id = (u32 )get_property(dn, "device-id", NULL);
				678	u32 *regs;
				679	int enable;
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	680	struct pci_dn *pdn = PCI_DN(dn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	681
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	682	pdn->eeh_mode = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	683
				684	if (status && strcmp(status, "ok") != 0)
				685	return NULL; /* ignore devices with bad status */
				686
				687	/* Ignore bad nodes. */
				688	if (!class_code \|\| !vendor_id \|\| !device_id)
				689	return NULL;
				690
				691	/* There is nothing to check on PCI to ISA bridges */
				692	if (dn->type && !strcmp(dn->type, "isa")) {
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	693	pdn->eeh_mode \|= EEH_MODE_NOCHECK;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	694	return NULL;
				695	}
				696
				697	/*
				698	* Now decide if we are going to "Disable" EEH checking
				699	* for this device. We still run with the EEH hardware active,
				700	* but we won't be checking for ff's. This means a driver
				701	* could return bad data (very bad!), an interrupt handler could
				702	* hang waiting on status bits that won't change, etc.
				703	* But there are a few cases like display devices that make sense.
				704	*/
				705	enable = 1; /* i.e. we will do checking */
				706	if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
				707	enable = 0;
				708
				709	if (!enable)
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	710	pdn->eeh_mode \|= EEH_MODE_NOCHECK;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	711
				712	/* Ok... see if this device supports EEH. Some do, some don't,
				713	* and the only way to find out is to check each and every one. */
				714	regs = (u32 *)get_property(dn, "reg", NULL);
				715	if (regs) {
				716	/* First register entry is addr (00BBSS00) */
				717	/* Try to enable eeh */
				718	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
				719	regs[0], info->buid_hi, info->buid_lo,
				720	EEH_ENABLE);
				721	if (ret == 0) {
				722	eeh_subsystem_enabled = 1;
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	723	pdn->eeh_mode \|= EEH_MODE_SUPPORTED;
				724	pdn->eeh_config_addr = regs[0];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	725	#ifdef DEBUG
				726	printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
				727	#endif
				728	} else {
				729
				730	/* This device doesn't support EEH, but it may have an
				731	* EEH parent, in which case we mark it as supported. */
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	732	if (dn->parent && PCI_DN(dn->parent)
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	733	&& (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	734	/* Parent supports EEH. */
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	735	pdn->eeh_mode \|= EEH_MODE_SUPPORTED;
				736	pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	737	return NULL;
				738	}
				739	}
				740	} else {
				741	printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
				742	dn->full_name);
				743	}
				744
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	745	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	746	}
				747
				748	/*
				749	* Initialize EEH by trying to enable it for all of the adapters in the system.
				750	* As a side effect we can determine here if eeh is supported at all.
				751	* Note that we leave EEH on so failed config cycles won't cause a machine
				752	* check. If a user turns off EEH for a particular adapter they are really
				753	* telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
				754	* grant access to a slot if EEH isn't enabled, and so we always enable
				755	* EEH for all slots/all devices.
				756	*
				757	* The eeh-force-off option disables EEH checking globally, for all slots.
				758	* Even if force-off is set, the EEH hardware is still enabled, so that
				759	* newer systems can boot.
				760	*/
				761	void __init eeh_init(void)
				762	{
				763	struct device_node phb, np;
				764	struct eeh_early_enable_info info;
				765
				766	np = of_find_node_by_path("/rtas");
				767	if (np == NULL)
				768	return;
				769
				770	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
				771	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
				772	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
				773	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
				774	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
				775
				776	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
				777	return;
				778
				779	eeh_error_buf_size = rtas_token("rtas-error-log-max");
				780	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
				781	eeh_error_buf_size = 1024;
				782	}
				783	if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
				784	printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
				785	"buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
				786	eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
				787	}
				788
				789	/* Enable EEH for all adapters. Note that eeh requires buid's */
				790	for (phb = of_find_node_by_name(NULL, "pci"); phb;
				791	phb = of_find_node_by_name(phb, "pci")) {
				792	unsigned long buid;
				793
				794	buid = get_phb_buid(phb);
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	795	if (buid == 0 \|\| PCI_DN(phb) == NULL)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	796	continue;
				797
				798	info.buid_lo = BUID_LO(buid);
				799	info.buid_hi = BUID_HI(buid);
				800	traverse_pci_devices(phb, early_enable_eeh, &info);
				801	}
				802
				803	if (eeh_subsystem_enabled)
				804	printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
				805	else
				806	printk(KERN_WARNING "EEH: No capable adapters found\n");
				807	}
				808
				809	/**
				810	* eeh_add_device_early - enable EEH for the indicated device_node
				811	* @dn: device node for which to set up EEH
				812	*
				813	* This routine must be used to perform EEH initialization for PCI
				814	* devices that were added after system boot (e.g. hotplug, dlpar).
				815	* This routine must be called before any i/o is performed to the
				816	* adapter (inluding any config-space i/o).
				817	* Whether this actually enables EEH or not for this device depends
				818	* on the CEC architecture, type of the device, on earlier boot
				819	* command-line arguments & etc.
				820	*/
				821	void eeh_add_device_early(struct device_node *dn)
				822	{
				823	struct pci_controller *phb;
				824	struct eeh_early_enable_info info;
				825
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	826	if (!dn \|\| !PCI_DN(dn))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	827	return;
Paul Mackerras	1635317	2005-09-06 13:17:54 +1000	[diff] [blame]	828	phb = PCI_DN(dn)->phb;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	829	if (NULL == phb \|\| 0 == phb->buid) {
Linas Vepstas	6937650	2005-11-03 18:47:50 -0600	[diff] [blame^]	830	printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
				831	dn->full_name);
				832	dump_stack();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	833	return;
				834	}
				835
				836	info.buid_hi = BUID_HI(phb->buid);
				837	info.buid_lo = BUID_LO(phb->buid);
				838	early_enable_eeh(dn, &info);
				839	}
				840	EXPORT_SYMBOL(eeh_add_device_early);
				841
				842	/**
				843	* eeh_add_device_late - perform EEH initialization for the indicated pci device
				844	* @dev: pci device for which to set up EEH
				845	*
				846	* This routine must be used to complete EEH initialization for PCI
				847	* devices that were added after system boot (e.g. hotplug, dlpar).
				848	*/
				849	void eeh_add_device_late(struct pci_dev *dev)
				850	{
				851	if (!dev \|\| !eeh_subsystem_enabled)
				852	return;
				853
				854	#ifdef DEBUG
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	855	printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	856	#endif
				857
				858	pci_addr_cache_insert_device (dev);
				859	}
				860	EXPORT_SYMBOL(eeh_add_device_late);
				861
				862	/**
				863	* eeh_remove_device - undo EEH setup for the indicated pci device
				864	* @dev: pci device to be removed
				865	*
				866	* This routine should be when a device is removed from a running
				867	* system (e.g. by hotplug or dlpar).
				868	*/
				869	void eeh_remove_device(struct pci_dev *dev)
				870	{
				871	if (!dev \|\| !eeh_subsystem_enabled)
				872	return;
				873
				874	/* Unregister the device with the EEH/PCI address search system */
				875	#ifdef DEBUG
Adrian Bunk	982245f	2005-07-17 04:22:20 +0200	[diff] [blame]	876	printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877	#endif
				878	pci_addr_cache_remove_device(dev);
				879	}
				880	EXPORT_SYMBOL(eeh_remove_device);
				881
				882	static int proc_eeh_show(struct seq_file m, void v)
				883	{
				884	unsigned int cpu;
				885	unsigned long ffs = 0, positives = 0, failures = 0;
				886	unsigned long resets = 0;
				887
				888	for_each_cpu(cpu) {
				889	ffs += per_cpu(total_mmio_ffs, cpu);
				890	positives += per_cpu(false_positives, cpu);
				891	failures += per_cpu(ignored_failures, cpu);
				892	resets += per_cpu(slot_resets, cpu);
				893	}
				894
				895	if (0 == eeh_subsystem_enabled) {
				896	seq_printf(m, "EEH Subsystem is globally disabled\n");
				897	seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
				898	} else {
				899	seq_printf(m, "EEH Subsystem is enabled\n");
				900	seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
				901	"eeh_false_positives=%ld\n"
				902	"eeh_ignored_failures=%ld\n"
				903	"eeh_slot_resets=%ld\n"
				904	"eeh_fail_count=%d\n",
				905	ffs, positives, failures, resets,
				906	eeh_fail_count.counter);
				907	}
				908
				909	return 0;
				910	}
				911
				912	static int proc_eeh_open(struct inode inode, struct file file)
				913	{
				914	return single_open(file, proc_eeh_show, NULL);
				915	}
				916
				917	static struct file_operations proc_eeh_operations = {
				918	.open = proc_eeh_open,
				919	.read = seq_read,
				920	.llseek = seq_lseek,
				921	.release = single_release,
				922	};
				923
				924	static int __init eeh_init_proc(void)
				925	{
				926	struct proc_dir_entry *e;
				927
				928	if (systemcfg->platform & PLATFORM_PSERIES) {
				929	e = create_proc_entry("ppc64/eeh", 0, NULL);
				930	if (e)
				931	e->proc_fops = &proc_eeh_operations;
				932	}
				933
				934	return 0;
				935	}
				936	__initcall(eeh_init_proc);