[PATCH] powerpc: Move more ppc64 files with no ppc32 equivalent to powerpc

This patch moves a bunch more files from arch/ppc64 and
include/asm-ppc64 which have no equivalents in ppc32 code into
arch/powerpc and include/asm-powerpc.  The file affected are:
	hvcall.h
	proc_ppc64.c
	sysfs.c
	lparcfg.c
	rtas_pci.c

The only changes apart from the move and corresponding Makefile
changes are:
	- #ifndef/#define in includes updated to _ASM_POWERPC_ form
	- trailing whitespace removed
	- comments giving full paths removed

Built and booted on POWER5 LPAR (ARCH=powerpc and ARCH=ppc64), built
for 32-bit powermac (ARCH=powerpc).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 92cfabf..c04bbd3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,13 +16,17 @@
 obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
 				   signal_64.o ptrace32.o systbl.o \
 				   paca.o ioctl32.o cpu_setup_power4.o \
-				   firmware.o
+				   firmware.o sysfs.o
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
 obj-$(CONFIG_POWER4)		+= idle_power4.o
 obj-$(CONFIG_PPC_OF)		+= of_device.o
-obj-$(CONFIG_PPC_RTAS)		+= rtas.o
+procfs-$(CONFIG_PPC64)		:= proc_ppc64.o
+obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
+rtaspci-$(CONFIG_PPC64)		:= rtas_pci.o
+obj-$(CONFIG_PPC_RTAS)		+= rtas.o $(rtaspci-y)
 obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
 obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
+obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_IBMVIO)		+= vio.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 0000000..5e954fa
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,612 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ *    Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ *    SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ *    seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ *    Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/* find a better place for this function... */
+void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+	if (rc == 0)		/* success, return */
+		return;
+/* check for null tag ? */
+	if (rc == H_Hardware)
+		printk(KERN_INFO
+		       "plpar-hcall (%s) failed with hardware fault\n", tag);
+	else if (rc == H_Function)
+		printk(KERN_INFO
+		       "plpar-hcall (%s) failed; function not allowed\n", tag);
+	else if (rc == H_Authority)
+		printk(KERN_INFO
+		       "plpar-hcall (%s) failed; not authorized to this function\n",
+		       tag);
+	else if (rc == H_Parameter)
+		printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+		       tag);
+	else
+		printk(KERN_INFO
+		       "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+		       tag, rc);
+
+}
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+	unsigned long sum_purr = 0;
+	int cpu;
+	struct paca_struct *lpaca;
+
+	for_each_cpu(cpu) {
+		lpaca = paca + cpu;
+		sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+		printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+			cpu, lpaca->lppaca.emulated_time_base);
+#endif
+	}
+	return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+	unsigned long pool_id, lp_index;
+	int shared, entitled_capacity, max_entitled_capacity;
+	int processors, max_processors;
+	struct paca_struct *lpaca = get_paca();
+	unsigned long purr = get_purr();
+
+	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+	shared = (int)(lpaca->lppaca_ptr->shared_proc);
+	seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+		   e2a(xItExtVpdPanel.mfgID[2]),
+		   e2a(xItExtVpdPanel.mfgID[3]),
+		   e2a(xItExtVpdPanel.systemSerial[1]),
+		   e2a(xItExtVpdPanel.systemSerial[2]),
+		   e2a(xItExtVpdPanel.systemSerial[3]),
+		   e2a(xItExtVpdPanel.systemSerial[4]),
+		   e2a(xItExtVpdPanel.systemSerial[5]));
+
+	seq_printf(m, "system_type=%c%c%c%c\n",
+		   e2a(xItExtVpdPanel.machineType[0]),
+		   e2a(xItExtVpdPanel.machineType[1]),
+		   e2a(xItExtVpdPanel.machineType[2]),
+		   e2a(xItExtVpdPanel.machineType[3]));
+
+	lp_index = HvLpConfig_getLpIndex();
+	seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+	seq_printf(m, "system_active_processors=%d\n",
+		   (int)HvLpConfig_getSystemPhysicalProcessors());
+
+	seq_printf(m, "system_potential_processors=%d\n",
+		   (int)HvLpConfig_getSystemPhysicalProcessors());
+
+	processors = (int)HvLpConfig_getPhysicalProcessors();
+	seq_printf(m, "partition_active_processors=%d\n", processors);
+
+	max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+	seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+	if (shared) {
+		entitled_capacity = HvLpConfig_getSharedProcUnits();
+		max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+	} else {
+		entitled_capacity = processors * 100;
+		max_entitled_capacity = max_processors * 100;
+	}
+	seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+	seq_printf(m, "partition_max_entitled_capacity=%d\n",
+		   max_entitled_capacity);
+
+	if (shared) {
+		pool_id = HvLpConfig_getSharedPoolIndex();
+		seq_printf(m, "pool=%d\n", (int)pool_id);
+		seq_printf(m, "pool_capacity=%d\n",
+			   (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+				 100));
+		seq_printf(m, "purr=%ld\n", purr);
+	}
+
+	seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+	return 0;
+}
+#endif				/* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ *  entitled_capacity,unallocated_capacity,
+ *  aggregation, resource_capability).
+ *
+ *  R4 = Entitled Processor Capacity Percentage.
+ *  R5 = Unallocated Processor Capacity Percentage.
+ *  R6 (AABBCCDDEEFFGGHH).
+ *      XXXX - reserved (0)
+ *          XXXX - reserved (0)
+ *              XXXX - Group Number
+ *                  XXXX - Pool Number.
+ *  R7 (IIJJKKLLMMNNOOPP).
+ *      XX - reserved. (0)
+ *        XX - bit 0-6 reserved (0).   bit 7 is Capped indicator.
+ *          XX - variable processor Capacity Weight
+ *            XX - Unallocated Variable Processor Capacity Weight.
+ *              XXXX - Active processors in Physical Processor Pool.
+ *                  XXXX  - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+			      unsigned long *unallocated,
+			      unsigned long *aggregation,
+			      unsigned long *resource)
+{
+	unsigned long rc;
+	rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+			      aggregation, resource);
+
+	log_plpar_hcall_return(rc, "H_GET_PPP");
+
+	return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+	unsigned long rc;
+	unsigned long dummy;
+	rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+	log_plpar_hcall_return(rc, "H_PIC");
+}
+
+static unsigned long get_purr(void);
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications                       */
+
+static unsigned long get_purr(void)
+{
+	unsigned long sum_purr = 0;
+	int cpu;
+	struct cpu_usage *cu;
+
+	for_each_cpu(cpu) {
+		cu = &per_cpu(cpu_usage_array, cpu);
+		sum_purr += cu->current_tb;
+	}
+	return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call.  Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+	int call_status;
+
+	char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+	if (!local_buffer) {
+		printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+		       __FILE__, __FUNCTION__, __LINE__);
+		return;
+	}
+
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				SPLPAR_CHARACTERISTICS_TOKEN,
+				__pa(rtas_data_buf));
+	memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+	spin_unlock(&rtas_data_buf_lock);
+
+	if (call_status != 0) {
+		printk(KERN_INFO
+		       "%s %s Error calling get-system-parameter (0x%x)\n",
+		       __FILE__, __FUNCTION__, call_status);
+	} else {
+		int splpar_strlen;
+		int idx, w_idx;
+		char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+		if (!workbuffer) {
+			printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+			       __FILE__, __FUNCTION__, __LINE__);
+			kfree(local_buffer);
+			return;
+		}
+#ifdef LPARCFG_DEBUG
+		printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+		splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+		local_buffer += 2;	/* step over strlen value */
+
+		memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+		w_idx = 0;
+		idx = 0;
+		while ((*local_buffer) && (idx < splpar_strlen)) {
+			workbuffer[w_idx++] = local_buffer[idx++];
+			if ((local_buffer[idx] == ',')
+			    || (local_buffer[idx] == '\0')) {
+				workbuffer[w_idx] = '\0';
+				if (w_idx) {
+					/* avoid the empty string */
+					seq_printf(m, "%s\n", workbuffer);
+				}
+				memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+				idx++;	/* skip the comma */
+				w_idx = 0;
+			} else if (local_buffer[idx] == '=') {
+				/* code here to replace workbuffer contents
+				   with different keyword strings */
+				if (0 == strcmp(workbuffer, "MaxEntCap")) {
+					strcpy(workbuffer,
+					       "partition_max_entitled_capacity");
+					w_idx = strlen(workbuffer);
+				}
+				if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+					strcpy(workbuffer,
+					       "system_potential_processors");
+					w_idx = strlen(workbuffer);
+				}
+			}
+		}
+		kfree(workbuffer);
+		local_buffer -= 2;	/* back up over strlen value */
+	}
+	kfree(local_buffer);
+}
+
+static int lparcfg_count_active_processors(void);
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+	struct device_node *cpus_dn = NULL;
+	int count = 0;
+
+	while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+		printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+		count++;
+	}
+	return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+	int partition_potential_processors;
+	int partition_active_processors;
+	struct device_node *rootdn;
+	const char *model = "";
+	const char *system_id = "";
+	unsigned int *lp_index_ptr, lp_index = 0;
+	struct device_node *rtas_node;
+	int *lrdrp;
+
+	rootdn = find_path_device("/");
+	if (rootdn) {
+		model = get_property(rootdn, "model", NULL);
+		system_id = get_property(rootdn, "system-id", NULL);
+		lp_index_ptr = (unsigned int *)
+		    get_property(rootdn, "ibm,partition-no", NULL);
+		if (lp_index_ptr)
+			lp_index = *lp_index_ptr;
+	}
+
+	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+	seq_printf(m, "serial_number=%s\n", system_id);
+
+	seq_printf(m, "system_type=%s\n", model);
+
+	seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+	rtas_node = find_path_device("/rtas");
+	lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+	if (lrdrp == NULL) {
+		partition_potential_processors = _systemcfg->processorCount;
+	} else {
+		partition_potential_processors = *(lrdrp + 4);
+	}
+
+	partition_active_processors = lparcfg_count_active_processors();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		unsigned long h_entitled, h_unallocated;
+		unsigned long h_aggregation, h_resource;
+		unsigned long pool_idle_time, pool_procs;
+		unsigned long purr;
+
+		h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+			  &h_resource);
+
+		seq_printf(m, "R4=0x%lx\n", h_entitled);
+		seq_printf(m, "R5=0x%lx\n", h_unallocated);
+		seq_printf(m, "R6=0x%lx\n", h_aggregation);
+		seq_printf(m, "R7=0x%lx\n", h_resource);
+
+		purr = get_purr();
+
+		/* this call handles the ibm,get-system-parameter contents */
+		parse_system_parameter_string(m);
+
+		seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+		seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+		seq_printf(m, "system_active_processors=%ld\n",
+			   (h_resource >> 0 * 8) & 0xffff);
+
+		/* pool related entries are apropriate for shared configs */
+		if (paca[0].lppaca.shared_proc) {
+
+			h_pic(&pool_idle_time, &pool_procs);
+
+			seq_printf(m, "pool=%ld\n",
+				   (h_aggregation >> 0 * 8) & 0xffff);
+
+			/* report pool_capacity in percentage */
+			seq_printf(m, "pool_capacity=%ld\n",
+				   ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+			seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+			seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+		}
+
+		seq_printf(m, "unallocated_capacity_weight=%ld\n",
+			   (h_resource >> 4 * 8) & 0xFF);
+
+		seq_printf(m, "capacity_weight=%ld\n",
+			   (h_resource >> 5 * 8) & 0xFF);
+
+		seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+		seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+		seq_printf(m, "purr=%ld\n", purr);
+
+	} else {		/* non SPLPAR case */
+
+		seq_printf(m, "system_active_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "system_potential_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "partition_max_entitled_capacity=%d\n",
+			   partition_potential_processors * 100);
+
+		seq_printf(m, "partition_entitled_capacity=%d\n",
+			   partition_active_processors * 100);
+	}
+
+	seq_printf(m, "partition_active_processors=%d\n",
+		   partition_active_processors);
+
+	seq_printf(m, "partition_potential_processors=%d\n",
+		   partition_potential_processors);
+
+	seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+	return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity).  Format of input is "param_name=value";
+ * anything after value is ignored.  Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight".  We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t * off)
+{
+	char *kbuf;
+	char *tmp;
+	u64 new_entitled, *new_entitled_ptr = &new_entitled;
+	u8 new_weight, *new_weight_ptr = &new_weight;
+
+	unsigned long current_entitled;	/* parameters for h_get_ppp */
+	unsigned long dummy;
+	unsigned long resource;
+	u8 current_weight;
+
+	ssize_t retval = -ENOMEM;
+
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		goto out;
+
+	retval = -EFAULT;
+	if (copy_from_user(kbuf, buf, count))
+		goto out;
+
+	retval = -EINVAL;
+	kbuf[count - 1] = '\0';
+	tmp = strchr(kbuf, '=');
+	if (!tmp)
+		goto out;
+
+	*tmp++ = '\0';
+
+	if (!strcmp(kbuf, "partition_entitled_capacity")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			goto out;
+		new_weight_ptr = &current_weight;
+	} else if (!strcmp(kbuf, "capacity_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			goto out;
+		new_entitled_ptr = &current_entitled;
+	} else
+		goto out;
+
+	/* Get our current parameters */
+	retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+	if (retval) {
+		retval = -EIO;
+		goto out;
+	}
+
+	current_weight = (resource >> 5 * 8) & 0xFF;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+		 __FUNCTION__, current_entitled, current_weight);
+
+	pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+		 __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+	retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+				    *new_weight_ptr);
+
+	if (retval == H_Success || retval == H_Constrained) {
+		retval = count;
+	} else if (retval == H_Busy) {
+		retval = -EBUSY;
+	} else if (retval == H_Hardware) {
+		retval = -EIO;
+	} else if (retval == H_Parameter) {
+		retval = -EINVAL;
+	} else {
+		printk(KERN_WARNING "%s: received unknown hv return code %ld",
+		       __FUNCTION__, retval);
+		retval = -EIO;
+	}
+
+      out:
+	kfree(kbuf);
+	return retval;
+}
+
+#endif				/* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+      .owner	= THIS_MODULE,
+      .read	= seq_read,
+      .open	= lparcfg_open,
+      .release	= single_release,
+};
+
+int __init lparcfg_init(void)
+{
+	struct proc_dir_entry *ent;
+	mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+	/* Allow writing if we have FW_FEATURE_SPLPAR */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		lparcfg_fops.write = lparcfg_write;
+		mode |= S_IWUSR;
+	}
+
+	ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+	if (ent) {
+		ent->proc_fops = &lparcfg_fops;
+		ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+		if (!ent->data) {
+			printk(KERN_ERR
+			       "Failed to allocate buffer for lparcfg\n");
+			remove_proc_entry("lparcfg", ent->parent);
+			return -ENOMEM;
+		}
+	} else {
+		printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+		return -EIO;
+	}
+
+	proc_ppc64_lparcfg = ent;
+	return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+	if (proc_ppc64_lparcfg) {
+		kfree(proc_ppc64_lparcfg->data);
+		remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+	}
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 0000000..a1c1950
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/systemcfg.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t  page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+			      loff_t *ppos);
+static int     page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+	.llseek	= page_map_seek,
+	.read	= page_map_read,
+	.mmap	= page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+	struct proc_dir_entry *root;
+
+	root = proc_mkdir("ppc64", NULL);
+	if (!root)
+		return 1;
+
+	if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+		return 0;
+
+	if (!proc_mkdir("rtas", root))
+		return 1;
+
+	if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+		return 1;
+
+	return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+	struct proc_dir_entry *pde;
+
+	pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+	if (!pde)
+		return 1;
+	pde->nlink = 1;
+	pde->data = _systemcfg;
+	pde->size = PAGE_SIZE;
+	pde->proc_fops = &page_map_fops;
+
+	return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+	loff_t new;
+	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+	switch(whence) {
+	case 0:
+		new = off;
+		break;
+	case 1:
+		new = file->f_pos + off;
+		break;
+	case 2:
+		new = dp->size + off;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if ( new < 0 || new > dp->size )
+		return -EINVAL;
+	return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+			      loff_t *ppos)
+{
+	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+	return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+	vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+	if ((vma->vm_end - vma->vm_start) > dp->size)
+		return -EINVAL;
+
+	remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+						dp->size, vma->vm_page_prot);
+	return 0;
+}
+
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 0000000..0e5a8e1
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,513 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+	if (where < 256)
+		return 1;
+	if (where < 4096 && dn->pci_ext_config_space)
+		return 1;
+
+	return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+        char * status;
+
+        status = get_property(dn, "status", NULL);
+
+        if (!status)
+                return 1;
+
+        if (!strcmp(status, "okay"))
+                return 1;
+
+        return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+	int returnval = -1;
+	unsigned long buid, addr;
+	int ret;
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!config_access_valid(pdn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+		(pdn->devfn << 8) | (where & 0xff);
+	buid = pdn->phb->buid;
+	if (buid) {
+		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+				addr, BUID_HI(buid), BUID_LO(buid), size);
+	} else {
+		ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+	}
+	*val = returnval;
+
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (returnval == EEH_IO_ERROR_VALUE(size) &&
+	    eeh_dn_check_failure (pdn->node, NULL))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 *val)
+{
+	struct device_node *busdn, *dn;
+
+	if (bus->self)
+		busdn = pci_device_to_OF_node(bus->self);
+	else
+		busdn = bus->sysdata;	/* must be a phb */
+
+	/* Search only direct children of the bus */
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		struct pci_dn *pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn
+		    && of_device_available(dn))
+			return rtas_read_config(pdn, where, size, val);
+	}
+
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+	unsigned long buid, addr;
+	int ret;
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!config_access_valid(pdn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+		(pdn->devfn << 8) | (where & 0xff);
+	buid = pdn->phb->buid;
+	if (buid) {
+		ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+			BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+	} else {
+		ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+	}
+
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+				 unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	struct device_node *busdn, *dn;
+
+	if (bus->self)
+		busdn = pci_device_to_OF_node(bus->self);
+	else
+		busdn = bus->sysdata;	/* must be a phb */
+
+	/* Search only direct children of the bus */
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		struct pci_dn *pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn
+		    && of_device_available(dn))
+			return rtas_write_config(pdn, where, size, val);
+	}
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+	rtas_pci_read_config,
+	rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+	char *model = (char *)get_property(dev, "model", NULL);
+
+	if (model && strstr(model, "Python"))
+		return 1;
+
+	return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+			    unsigned int addr_size_words,
+			    struct reg_property64 *reg)
+{
+	unsigned int *ui_ptr = NULL, len;
+
+	/* Found a PHB, now figure out where his registers are mapped. */
+	ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+	if (ui_ptr == NULL)
+		return 1;
+
+	if (addr_size_words == 1) {
+		reg->address = ((struct reg_property32 *)ui_ptr)->address;
+		reg->size    = ((struct reg_property32 *)ui_ptr)->size;
+	} else {
+		*reg = *((struct reg_property64 *)ui_ptr);
+	}
+
+	return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+				   unsigned int addr_size_words)
+{
+	struct reg_property64 reg_struct;
+	void __iomem *chip_regs;
+	volatile u32 val;
+
+	if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+		return;
+
+	/* Python's register file is 1 MB in size. */
+	chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+	/*
+	 * Firmware doesn't always clear this bit which is critical
+	 * for good performance - Anton
+	 */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+	val = in_be32(chip_regs + 0xf6030);
+	if (val & PRG_CL_RESET_VALID) {
+		printk(KERN_INFO "Python workaround: ");
+		val &= ~PRG_CL_RESET_VALID;
+		out_be32(chip_regs + 0xf6030, val);
+		/*
+		 * We must read it back for changes to
+		 * take effect
+		 */
+		val = in_be32(chip_regs + 0xf6030);
+		printk("reg0: %x\n", val);
+	}
+
+	iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+	read_pci_config = rtas_token("read-pci-config");
+	write_pci_config = rtas_token("write-pci-config");
+	ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+	ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+	int addr_cells;
+	unsigned int *buid_vals;
+	unsigned int len;
+	unsigned long buid;
+
+	if (ibm_read_pci_config == -1) return 0;
+
+	/* PHB's will always be children of the root node,
+	 * or so it is promised by the current firmware. */
+	if (phb->parent == NULL)
+		return 0;
+	if (phb->parent->parent)
+		return 0;
+
+	buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+	if (buid_vals == NULL)
+		return 0;
+
+	addr_cells = prom_n_addr_cells(phb);
+	if (addr_cells == 1) {
+		buid = (unsigned long) buid_vals[0];
+	} else {
+		buid = (((unsigned long)buid_vals[0]) << 32UL) |
+			(((unsigned long)buid_vals[1]) & 0xffffffff);
+	}
+	return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+			      struct pci_controller *phb)
+{
+	int *bus_range;
+	unsigned int len;
+
+	bus_range = (int *) get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		return 1;
+ 	}
+
+	phb->first_busno =  bus_range[0];
+	phb->last_busno  =  bus_range[1];
+
+	return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+			       struct pci_controller *phb,
+			       unsigned int addr_size_words)
+{
+	pci_setup_pci_controller(phb);
+
+	if (is_python(dev))
+		python_countermeasures(dev, addr_size_words);
+
+	if (phb_set_bus_ranges(dev, phb))
+		return 1;
+
+	phb->arch_data = dev;
+	phb->ops = &rtas_pci_ops;
+	phb->buid = get_phb_buid(dev);
+
+	return 0;
+}
+
+static void __devinit add_linux_pci_domain(struct device_node *dev,
+					   struct pci_controller *phb,
+					   struct property *of_prop)
+{
+	memset(of_prop, 0, sizeof(struct property));
+	of_prop->name = "linux,pci-domain";
+	of_prop->length = sizeof(phb->global_number);
+	of_prop->value = (unsigned char *)&of_prop[1];
+	memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
+	prom_add_property(dev, of_prop);
+}
+
+static struct pci_controller * __init alloc_phb(struct device_node *dev,
+						unsigned int addr_size_words)
+{
+	struct pci_controller *phb;
+	struct property *of_prop;
+
+	phb = alloc_bootmem(sizeof(struct pci_controller));
+	if (phb == NULL)
+		return NULL;
+
+	of_prop = alloc_bootmem(sizeof(struct property) +
+				sizeof(phb->global_number));
+	if (!of_prop)
+		return NULL;
+
+	if (setup_phb(dev, phb, addr_size_words))
+		return NULL;
+
+	add_linux_pci_domain(dev, phb, of_prop);
+
+	return phb;
+}
+
+static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
+{
+	struct pci_controller *phb;
+
+	phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
+					       GFP_KERNEL);
+	if (phb == NULL)
+		return NULL;
+
+	if (setup_phb(dev, phb, addr_size_words))
+		return NULL;
+
+	phb->is_dynamic = 1;
+
+	/* TODO: linux,pci-domain? */
+
+ 	return phb;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+	struct device_node *node;
+	struct pci_controller *phb;
+	unsigned int root_size_cells = 0;
+	unsigned int index;
+	unsigned int *opprop = NULL;
+	struct device_node *root = of_find_node_by_path("/");
+
+	if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+		opprop = (unsigned int *)get_property(root,
+				"platform-open-pic", NULL);
+	}
+
+	root_size_cells = prom_n_size_cells(root);
+
+	index = 0;
+
+	for (node = of_get_next_child(root, NULL);
+	     node != NULL;
+	     node = of_get_next_child(root, node)) {
+		if (node->type == NULL || strcmp(node->type, "pci") != 0)
+			continue;
+
+		phb = alloc_phb(node, root_size_cells);
+		if (!phb)
+			continue;
+
+		pci_process_bridge_OF_ranges(phb, node, 0);
+		pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+		if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+			int addr = root_size_cells * (index + 2) - 1;
+			mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+		}
+#endif
+		index++;
+	}
+
+	of_node_put(root);
+	pci_devs_phb_init();
+
+	/*
+	 * pci_probe_only and pci_assign_all_buses can be set via properties
+	 * in chosen.
+	 */
+	if (of_chosen) {
+		int *prop;
+
+		prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+					   NULL);
+		if (prop)
+			pci_probe_only = *prop;
+
+		prop = (int *)get_property(of_chosen,
+					   "linux,pci-assign-all-buses", NULL);
+		if (prop)
+			pci_assign_all_buses = *prop;
+	}
+
+	return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	unsigned int root_size_cells = 0;
+	struct pci_controller *phb;
+	int primary;
+
+	root_size_cells = prom_n_size_cells(root);
+
+	primary = list_empty(&hose_list);
+	phb = alloc_phb_dynamic(dn, root_size_cells);
+	if (!phb)
+		return NULL;
+
+	pci_process_bridge_OF_ranges(phb, dn, primary);
+
+	pci_setup_phb_io_dynamic(phb, primary);
+	of_node_put(root);
+
+	pci_devs_phb_init_dynamic(phb);
+	scan_phb(phb);
+
+	return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+	struct pci_bus *b = phb->bus;
+	struct resource *res;
+	int rc, i;
+
+	res = b->resource[0];
+	if (!res->flags) {
+		printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+				b->name);
+		return 1;
+	}
+
+	rc = unmap_bus_range(b);
+	if (rc) {
+		printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+			__FUNCTION__, b->name);
+		return 1;
+	}
+
+	if (release_resource(res)) {
+		printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+				__FUNCTION__, b->name);
+		return 1;
+	}
+
+	for (i = 1; i < 3; ++i) {
+		res = b->resource[i];
+		if (!res->flags && i == 0) {
+			printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+				__FUNCTION__, b->name);
+			return 1;
+		}
+		if (res->flags && release_resource(res)) {
+			printk(KERN_ERR
+			       "%s: failed to release IO %d on bus %s\n",
+				__FUNCTION__, i, b->name);
+			return 1;
+		}
+	}
+
+	list_del(&phb->list_node);
+	if (phb->is_dynamic)
+		kfree(phb);
+
+	return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 0000000..850af19
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,384 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+				      size_t count)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	ssize_t ret;
+	unsigned long snooze;
+
+	ret = sscanf(buf, "%lu", &snooze);
+	if (ret != 1)
+		return -EINVAL;
+
+	per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+	return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+	return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+		   store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+	struct device_node *options;
+	unsigned int *val;
+	unsigned int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return 1;
+
+	options = find_path_device("/options");
+	if (!options)
+		return 1;
+
+	val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+					   NULL);
+	if (!smt_snooze_cmdline && val) {
+		for_each_cpu(cpu)
+			per_cpu(smt_snooze_delay, cpu) = *val;
+	}
+
+	return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+	unsigned int cpu;
+	int snooze;
+
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return 1;
+
+	smt_snooze_cmdline = 1;
+
+	if (get_option(&str, &snooze)) {
+		for_each_cpu(cpu)
+			per_cpu(smt_snooze_delay, cpu) = snooze;
+	}
+
+	return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+	/* Only need to enable them once */
+	if (__get_cpu_var(pmcs_enabled))
+		return;
+
+	__get_cpu_var(pmcs_enabled) = 1;
+
+	if (ppc_md.enable_pmcs)
+		ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+			        unsigned long (*func)(unsigned long),
+				unsigned long arg)
+{
+	cpumask_t old_affinity = current->cpus_allowed;
+	unsigned long ret;
+
+	/* should return -EINVAL to userspace */
+	if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+		return 0;
+
+	ret = func(arg);
+
+	set_cpus_allowed(current, old_affinity);
+
+	return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+	return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+	ppc64_enable_pmcs(); \
+	mtspr(ADDRESS, val); \
+	return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+	return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+	store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	unsigned long val; \
+	int ret = sscanf(buf, "%lx", &val); \
+	if (ret != 1) \
+		return -EINVAL; \
+	run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+	return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+	if (cpu_has_feature(CPU_FTR_SMT))
+		sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+	/* PMC stuff */
+
+	sysdev_create_file(s, &attr_mmcr0);
+	sysdev_create_file(s, &attr_mmcr1);
+
+	if (cpu_has_feature(CPU_FTR_MMCRA))
+		sysdev_create_file(s, &attr_mmcra);
+
+	if (cur_cpu_spec->num_pmcs >= 1)
+		sysdev_create_file(s, &attr_pmc1);
+	if (cur_cpu_spec->num_pmcs >= 2)
+		sysdev_create_file(s, &attr_pmc2);
+	if (cur_cpu_spec->num_pmcs >= 3)
+		sysdev_create_file(s, &attr_pmc3);
+	if (cur_cpu_spec->num_pmcs >= 4)
+		sysdev_create_file(s, &attr_pmc4);
+	if (cur_cpu_spec->num_pmcs >= 5)
+		sysdev_create_file(s, &attr_pmc5);
+	if (cur_cpu_spec->num_pmcs >= 6)
+		sysdev_create_file(s, &attr_pmc6);
+	if (cur_cpu_spec->num_pmcs >= 7)
+		sysdev_create_file(s, &attr_pmc7);
+	if (cur_cpu_spec->num_pmcs >= 8)
+		sysdev_create_file(s, &attr_pmc8);
+
+	if (cpu_has_feature(CPU_FTR_SMT))
+		sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct sys_device *s = &c->sysdev;
+
+	BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+	if (cpu_has_feature(CPU_FTR_SMT))
+		sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+	/* PMC stuff */
+
+	sysdev_remove_file(s, &attr_mmcr0);
+	sysdev_remove_file(s, &attr_mmcr1);
+
+	if (cpu_has_feature(CPU_FTR_MMCRA))
+		sysdev_remove_file(s, &attr_mmcra);
+
+	if (cur_cpu_spec->num_pmcs >= 1)
+		sysdev_remove_file(s, &attr_pmc1);
+	if (cur_cpu_spec->num_pmcs >= 2)
+		sysdev_remove_file(s, &attr_pmc2);
+	if (cur_cpu_spec->num_pmcs >= 3)
+		sysdev_remove_file(s, &attr_pmc3);
+	if (cur_cpu_spec->num_pmcs >= 4)
+		sysdev_remove_file(s, &attr_pmc4);
+	if (cur_cpu_spec->num_pmcs >= 5)
+		sysdev_remove_file(s, &attr_pmc5);
+	if (cur_cpu_spec->num_pmcs >= 6)
+		sysdev_remove_file(s, &attr_pmc6);
+	if (cur_cpu_spec->num_pmcs >= 7)
+		sysdev_remove_file(s, &attr_pmc7);
+	if (cur_cpu_spec->num_pmcs >= 8)
+		sysdev_remove_file(s, &attr_pmc8);
+
+	if (cpu_has_feature(CPU_FTR_SMT))
+		sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+				      unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned int)(long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		register_cpu_online(cpu);
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DEAD:
+		unregister_cpu_online(cpu);
+		break;
+#endif
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+	.notifier_call	= sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (node_online(i)) {
+			int p_node = parent_node(i);
+			struct node *parent = NULL;
+
+			if (p_node != i)
+				parent = &node_devices[p_node];
+
+			register_node(&node_devices[i], i, parent);
+		}
+	}
+}
+#else
+static void register_nodes(void)
+{
+	return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+	int cpu;
+	struct node *parent = NULL;
+
+	register_nodes();
+
+	register_cpu_notifier(&sysfs_cpu_nb);
+
+	for_each_cpu(cpu) {
+		struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+		/* The node to which a cpu belongs can't be known
+		 * until the cpu is made present.
+		 */
+		parent = NULL;
+		if (cpu_present(cpu))
+			parent = &node_devices[cpu_to_node(cpu)];
+#endif
+		/*
+		 * For now, we just see if the system supports making
+		 * the RTAS calls for CPU hotplug.  But, there may be a
+		 * more comprehensive way to do this for an individual
+		 * CPU.  For instance, the boot cpu might never be valid
+		 * for hotplugging.
+		 */
+		if (!ppc_md.cpu_die)
+			c->no_control = 1;
+
+		if (cpu_online(cpu) || (c->no_control == 0)) {
+			register_cpu(c, cpu, parent);
+
+			sysdev_create_file(&c->sysdev, &attr_physical_id);
+		}
+
+		if (cpu_online(cpu))
+			register_cpu_online(cpu);
+	}
+
+	return 0;
+}
+__initcall(topology_init);