powerpc/47x: Base ppc476 support

This patch adds the base support for the 476 processor.  The code was
primarily written by Ben Herrenschmidt and Torez Smith, but I've been
maintaining it for a while.

The goal is to have a single binary that will run on 44x and 47x, but
we still have some details to work out.  The biggest is that the L1 cache
line size differs on the two platforms, but it's currently a compile-time
option.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Torez Smith  <lnxtorez@linux.vnet.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 8af4949..a1d8458 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1701,6 +1701,19 @@
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
+	{ /* 476 core */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x11a50000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x |
+			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.platform		= "ppc470",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1175a85..ed4aeb9 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -373,11 +373,13 @@
 	bnel-	load_dbcr0
 #endif
 #ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
 	lis	r4,icache_44x_need_flush@ha
 	lwz	r5,icache_44x_need_flush@l(r4)
 	cmplwi	cr0,r5,0
 	bne-	2f
 1:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
 #endif /* CONFIG_44x */
 BEGIN_FTR_SECTION
 	lwarx	r7,0,r1
@@ -848,6 +850,9 @@
 	/* interrupts are hard-disabled at this point */
 restore:
 #ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+	b	1f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
 	lis	r4,icache_44x_need_flush@ha
 	lwz	r5,icache_44x_need_flush@l(r4)
 	cmplwi	cr0,r5,0
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 39be049..1acd175 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -37,6 +37,7 @@
 #include <asm/thread_info.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/synch.h>
 #include "head_booke.h"
 
 
@@ -191,7 +192,7 @@
 #endif
 
 	/* Data TLB Error Interrupt */
-	START_EXCEPTION(DataTLBError)
+	START_EXCEPTION(DataTLBError44x)
 	mtspr	SPRN_SPRG_WSCRATCH0, r10		/* Save some working registers */
 	mtspr	SPRN_SPRG_WSCRATCH1, r11
 	mtspr	SPRN_SPRG_WSCRATCH2, r12
@@ -282,7 +283,7 @@
 	mfspr	r10,SPRN_DEAR
 
 	 /* Jump to common tlb load */
-	b	finish_tlb_load
+	b	finish_tlb_load_44x
 
 2:
 	/* The bailout.  Restore registers to pre-exception conditions
@@ -302,7 +303,7 @@
 	 * information from different registers and bailout
 	 * to a different point.
 	 */
-	START_EXCEPTION(InstructionTLBError)
+	START_EXCEPTION(InstructionTLBError44x)
 	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
 	mtspr	SPRN_SPRG_WSCRATCH1, r11
 	mtspr	SPRN_SPRG_WSCRATCH2, r12
@@ -378,7 +379,7 @@
 	mfspr	r10,SPRN_SRR0
 
 	/* Jump to common TLB load point */
-	b	finish_tlb_load
+	b	finish_tlb_load_44x
 
 2:
 	/* The bailout.  Restore registers to pre-exception conditions
@@ -392,15 +393,7 @@
 	mfspr	r10, SPRN_SPRG_RSCRATCH0
 	b	InstructionStorage
 
-	/* Debug Interrupt */
-	DEBUG_CRIT_EXCEPTION
-
 /*
- * Local functions
-  */
-
-/*
-
  * Both the instruction and data TLB miss get to this
  * point to load the TLB.
  * 	r10 - EA of fault
@@ -410,7 +403,7 @@
  *	MMUCR - loaded with proper value when we get here
  *	Upon exit, we reload everything and RFI.
  */
-finish_tlb_load:
+finish_tlb_load_44x:
 	/* Combine RPN & ERPN an write WS 0 */
 	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
 	tlbwe	r11,r13,PPC44x_TLB_XLAT
@@ -443,6 +436,227 @@
 	mfspr	r10, SPRN_SPRG_RSCRATCH0
 	rfi					/* Force context change */
 
+/* TLB error interrupts for 476
+ */
+#ifdef CONFIG_PPC_47x
+	START_EXCEPTION(DataTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Mask of required permission bits. Note that while we
+	 * do copy ESR:ST to _PAGE_RW position as trying to write
+	 * to an RO page is pretty common, we don't do it with
+	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
+	 * event so I'd rather take the overhead when it happens
+	 * rather than adding an instruction here. We should measure
+	 * whether the whole thing is worth it in the first place
+	 * as we could avoid loading SPRN_ESR completely in the first
+	 * place...
+	 *
+	 * TODO: Is it worth doing that mfspr & rlwimi in the first
+	 *       place or can we save a couple of instructions here ?
+	 */
+	mfspr	r12,SPRN_ESR
+	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
+	rlwimi	r13,r12,10,30,30
+
+	/* Load the PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	 /* Jump to common tlb load */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11,SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13,SPRN_SPRG_RSCRATCH3
+	mfspr	r12,SPRN_SPRG_RSCRATCH2
+	mfspr	r11,SPRN_SPRG_RSCRATCH1
+	mfspr	r10,SPRN_SPRG_RSCRATCH0
+	b	DataStorage
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Make up the required permissions */
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+	/* Load PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	/* Jump to common TLB load point */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - free to use
+ * 	r11 - PTE high word value
+ *	r12 - PTE low word value
+ *      r13 - free to use
+ *	MMUCR - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_47x:
+	/* Combine RPN & ERPN an write WS 1 */
+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
+	tlbwe	r11,r13,1
+
+	/* And make up word 2 */
+	li	r10,0xf85			/* Mask to apply from PTE */
+	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
+	and	r11,r12,r10			/* Mask PTE bits to keep */
+	andi.	r10,r12,_PAGE_USER		/* User page ? */
+	beq	1f				/* nope, leave U bits empty */
+	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
+1:	tlbwe	r11,r13,2
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	rfi
+
+#endif /* CONFIG_PPC_47x */
+
+	/* Debug Interrupt */
+	/*
+	 * This statement needs to exist at the end of the IVPR
+	 * definition just in case you end up taking a debug
+	 * exception within another exception.
+	 */
+	DEBUG_CRIT_EXCEPTION
+
 /*
  * Global functions
  */
@@ -491,9 +705,18 @@
 /*
  * Init CPU state. This is called at boot time or for secondary CPUs
  * to setup initial TLB entries, setup IVORs, etc...
+ *
  */
 _GLOBAL(init_cpu_state)
 	mflr	r22
+#ifdef CONFIG_PPC_47x
+	/* We use the PVR to differenciate 44x cores from 476 */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476@h
+	beq	head_start_47x
+#endif /* CONFIG_PPC_47x */
+
 /*
  * In case the firmware didn't do it, we apply some workarounds
  * that are good for all 440 core variants here
@@ -506,7 +729,7 @@
 	sync
 
 /*
- * Set up the initial MMU state
+ * Set up the initial MMU state for 44x
  *
  * We are still executing code at the virtual address
  * mappings set by the firmware for the base of RAM.
@@ -646,16 +869,257 @@
 	SET_IVOR(10, Decrementer);
 	SET_IVOR(11, FixedIntervalTimer);
 	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError);
-	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(13, DataTLBError44x);
+	SET_IVOR(14, InstructionTLBError44x);
 	SET_IVOR(15, DebugCrit);
 
+	b	head_start_common
+
+
+#ifdef CONFIG_PPC_47x
+
+#ifdef CONFIG_SMP
+
+/* Entry point for secondary 47x processors */
+_GLOBAL(start_secondary_47x)
+        mr      r24,r3          /* CPU number */
+
+	bl	init_cpu_state
+
+	/* Now we need to bolt the rest of kernel memory which
+	 * is done in C code. We must be careful because our task
+	 * struct or our stack can (and will probably) be out
+	 * of reach of the initial 256M TLB entry, so we use a
+	 * small temporary stack in .bss for that. This works
+	 * because only one CPU at a time can be in this code
+	 */
+	lis	r1,temp_boot_stack@h
+	ori	r1,r1,temp_boot_stack@l
+	addi	r1,r1,1024-STACK_FRAME_OVERHEAD
+	li	r0,0
+	stw	r0,0(r1)
+	bl	mmu_init_secondary
+
+	/* Now we can get our task struct and real stack pointer */
+
+	/* Get current_thread_info and current */
+	lis	r1,secondary_ti@ha
+	lwz	r1,secondary_ti@l(r1)
+	lwz	r2,TI_TASK(r1)
+
+	/* Current stack pointer */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r0,0
+	stw	r0,0(r1)
+
+	/* Kernel stack for exception entry in SPRG3 */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	b	start_secondary
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ */
+
+head_start_47x:
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	1f				/* If not, leave STS=0 */
+	oris	r3,r3,PPC47x_MMUCR_STS@h	/* Set STS=1 */
+1:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	/* Find the entry we are running from */
+	bl	1f
+1:	mflr	r23
+	tlbsx	r23,0,r23
+	tlbre	r24,r23,0
+	tlbre	r25,r23,1
+	tlbre	r26,r23,2
+
+/*
+ * Cleanup time
+ */
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+clear_all_utlb_entries:
+
+	#; Set initial values.
+
+	addis		r3,0,0x8000
+	addi		r4,0,0
+	addi		r5,0,0
+	b		clear_utlb_entry
+
+	#; Align the loop to speed things up.
+
+	.align		6
+
+clear_utlb_entry:
+
+	tlbwe		r4,r3,0
+	tlbwe		r5,r3,1
+	tlbwe		r5,r3,2
+	addis		r3,r3,0x2000
+	cmpwi		r3,0
+	bne		clear_utlb_entry
+	addis		r3,0,0x8000
+	addis		r4,r4,0x100
+	cmpwi		r4,0
+	bne		clear_utlb_entry
+
+	#; Restore original entry.
+
+	oris	r23,r23,0x8000  /* specify the way */
+	tlbwe		r24,r23,0
+	tlbwe		r25,r23,1
+	tlbwe		r26,r23,2
+
+/*
+ * Configure and load pinned entry into TLB for the kernel core
+ */
+
+	lis	r3,PAGE_OFFSET@h
+	ori	r3,r3,PAGE_OFFSET@l
+
+	/* Kernel is at the base of RAM */
+	li r4, 0			/* Load the kernel physical address */
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Word 0 */
+	clrrwi	r3,r3,12		/* Mask off the effective page number */
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
+
+	/* Word 1 */
+	clrrwi	r4,r4,12		/* Mask off the real page number */
+					/* ERPN is 0 for first 4GB page */
+	/* Word 2 */
+	li	r5,0
+	ori	r5,r5,PPC47x_TLB2_S_RWX
+#ifdef CONFIG_SMP
+	ori	r5,r5,PPC47x_TLB2_M
+#endif
+
+	/* We write to way 0 and bolted 0 */
+	lis	r0,0x8800
+	tlbwe	r3,r0,0
+	tlbwe	r4,r0,1
+	tlbwe	r5,r0,2
+
+/*
+ * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
+ * them up later
+ */
+	LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
+	mtspr	SPRN_SSPCR,r3
+	mtspr	SPRN_USPCR,r3
+	LOAD_REG_IMMEDIATE(r3, 0x12345670)
+	mtspr	SPRN_ISPCR,r3
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* Invalidate original entry we used */
+3:
+	rlwinm	r24,r24,0,21,19 /* clear the "valid" bit */
+	tlbwe	r24,r23,0
+	addi	r24,0,0
+	tlbwe	r24,r23,1
+	tlbwe	r24,r23,2
+	isync                   /* Clear out the shadow TLB entries */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+	/* Add UART mapping for early debug. */
+
+	/* Word 0 */
+	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
+
+	/* Word 1 */
+	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+	/* Word 2 */
+	li	r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
+
+	/* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
+	 * congruence class as the kernel, we need to make sure of it at
+	 * some point
+	 */
+        lis	r0,0x8d00
+	tlbwe	r3,r0,0
+	tlbwe	r4,r0,1
+	tlbwe	r5,r0,2
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheckA);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError47x);
+	SET_IVOR(14, InstructionTLBError47x);
+	SET_IVOR(15, DebugCrit);
+
+	/* We configure icbi to invalidate 128 bytes at a time since the
+	 * current 32-bit kernel code isn't too happy with icache != dcache
+	 * block size
+	 */
+	mfspr	r3,SPRN_CCR0
+	oris	r3,r3,0x0020
+	mtspr	SPRN_CCR0,r3
+	isync
+
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Here we are back to code that is common between 44x and 47x
+ *
+ * We proceed to further kernel initialization and return to the
+ * main kernel entry
+ */
+head_start_common:
 	/* Establish the interrupt vector base */
 	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
 	mtspr	SPRN_IVPR,r4
 
 	addis	r22,r22,KERNELBASE@h
 	mtlr	r22
+	isync
 	blr
 
 /*
@@ -683,3 +1147,9 @@
  */
 abatron_pteptrs:
 	.space	8
+
+#ifdef CONFIG_SMP
+	.align	12
+temp_boot_stack:
+	.space	1024
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 8649f53..8043d1b 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -441,7 +441,7 @@
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	0b
 	sync
-#ifndef CONFIG_44x
+#ifdef CONFIG_44x
 	/* We don't flush the icache on 44x. Those have a virtual icache
 	 * and we don't have access to the virtual address here (it's
 	 * not the page vaddr but where it's mapped in user space). The
@@ -449,15 +449,19 @@
 	 * a change in the address space occurs, before returning to
 	 * user space
 	 */
+BEGIN_MMU_FTR_SECTION
+	blr
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
+#endif /* CONFIG_44x */
 	mtctr	r4
 1:	icbi	0,r6
 	addi	r6,r6,L1_CACHE_BYTES
 	bdnz	1b
 	sync
 	isync
-#endif /* CONFIG_44x */
 	blr
 
+#ifndef CONFIG_BOOKE
 /*
  * Flush a particular page from the data cache to RAM, identified
  * by its physical address.  We turn off the MMU so we can just use
@@ -490,6 +494,7 @@
 	mtmsr	r10				/* restore DR */
 	isync
 	blr
+#endif /* CONFIG_BOOKE */
 
 /*
  * Clear pages using the dcbz instruction, which doesn't cause any
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e36f94f..3fe4de2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -495,6 +495,14 @@
 	current->active_mm = &init_mm;
 
 	smp_store_cpu_info(cpu);
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+	/* Clear any pending timer interrupts */
+	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+	/* Enable decrementer interrupt */
+	mtspr(SPRN_TCR, TCR_DIE);
+#endif
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
 	cpu_callin_map[cpu] = 1;