ARM: tegra30: cpuidle: add powered-down state for CPU0

This is a power gating idle mode. It support power gating vdd_cpu rail
after all cpu cores in "powered-down" status. For Tegra30, the CPU0 can
enter this state only when all secondary CPU is offline. We need to take
care and make sure whole secondary CPUs were offline and checking the
CPU power gate status. After that, the CPU0 can go into "powered-down"
state safely. Then shut off the CPU rail.

Be aware of that, you may see the legacy power state "LP2" in the code
which is exactly the same meaning of "CPU power down".

Base on the work by:
Scott Williams <scwilliams@nvidia.com>

Signed-off-by: Joseph Lo <josephl@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 91548a7..88f4de9 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -25,6 +25,7 @@
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cp15.h>
 
 #include "iomap.h"
@@ -59,4 +60,45 @@
 	ldmfd	sp!, {r0, r4-r5, r7, r9-r11, pc}
 ENDPROC(tegra_disable_clean_inv_dcache)
 
+/*
+ * tegra_sleep_cpu_finish(unsigned long v2p)
+ *
+ * enters suspend in LP2 by turning off the mmu and jumping to
+ * tegra?_tear_down_cpu
+ */
+ENTRY(tegra_sleep_cpu_finish)
+	/* Flush and disable the L1 data cache */
+	bl	tegra_disable_clean_inv_dcache
+
+	mov32	r6, tegra_tear_down_cpu
+	ldr	r1, [r6]
+	add	r1, r1, r0
+
+	mov32	r3, tegra_shut_off_mmu
+	add	r3, r3, r0
+	mov	r0, r1
+
+	mov	pc, r3
+ENDPROC(tegra_sleep_cpu_finish)
+
+/*
+ * tegra_shut_off_mmu
+ *
+ * r0 = physical address to jump to with mmu off
+ *
+ * called with VA=PA mapping
+ * turns off MMU, icache, dcache and branch prediction
+ */
+	.align	L1_CACHE_SHIFT
+	.pushsection	.idmap.text, "ax"
+ENTRY(tegra_shut_off_mmu)
+	mrc	p15, 0, r3, c1, c0, 0
+	movw	r2, #CR_I | CR_Z | CR_C | CR_M
+	bic	r3, r3, r2
+	dsb
+	mcr	p15, 0, r3, c1, c0, 0
+	isb
+	mov	pc, r0
+ENDPROC(tegra_shut_off_mmu)
+	.popsection
 #endif