ARM: pm: add generic CPU suspend/resume support

This adds core support for saving and restoring CPU coprocessor
registers for suspend/resume support.  This contains support for suspend
with ARM920, ARM926, SA11x0, PXA25x, PXA27x, PXA3xx, V6 and V7 CPUs.
Tested on Assabet and Tegra 2.

Tested-by: Colin Cross <ccross@android.com>
Tested-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 185ee82..74554f1 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,6 +29,7 @@
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
+obj-$(CONFIG_PM)		+= sleep.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 5302a91..927522c 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <asm/cacheflush.h>
 #include <asm/glue-df.h>
 #include <asm/glue-pf.h>
 #include <asm/mach/arch.h>
@@ -116,6 +117,14 @@
 #ifdef MULTI_PABORT
   DEFINE(PROCESSOR_PABT_FUNC,	offsetof(struct processor, _prefetch_abort));
 #endif
+#ifdef MULTI_CPU
+  DEFINE(CPU_SLEEP_SIZE,	offsetof(struct processor, suspend_size));
+  DEFINE(CPU_DO_SUSPEND,	offsetof(struct processor, do_suspend));
+  DEFINE(CPU_DO_RESUME,		offsetof(struct processor, do_resume));
+#endif
+#ifdef MULTI_CACHE
+  DEFINE(CACHE_FLUSH_KERN_ALL,	offsetof(struct cpu_cache_fns, flush_kern_all));
+#endif
   BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
new file mode 100644
index 0000000..2ba1794
--- /dev/null
+++ b/arch/arm/kernel/sleep.S
@@ -0,0 +1,109 @@
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/glue-cache.h>
+#include <asm/glue-proc.h>
+#include <asm/system.h>
+	.text
+
+/*
+ * Save CPU state for a suspend
+ *  r1 = v:p offset
+ *  r3 = virtual return function
+ * Note: sp is decremented to allocate space for CPU state on stack
+ * r0-r3,r9,r10,lr corrupted
+ */
+ENTRY(cpu_suspend)
+	mov	r9, lr
+#ifdef MULTI_CPU
+	ldr	r10, =processor
+	mov	r2, sp			@ current virtual SP
+	ldr	r0, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
+	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
+	sub	sp, sp, r0		@ allocate CPU state on stack
+	mov	r0, sp			@ save pointer
+	add	ip, ip, r1		@ convert resume fn to phys
+	stmfd	sp!, {r1, r2, r3, ip}	@ save v:p, virt SP, retfn, phys resume fn
+	ldr	r3, =sleep_save_sp
+	add	r2, sp, r1		@ convert SP to phys
+	str	r2, [r3]		@ save phys SP
+	mov	lr, pc
+	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
+#else
+	mov	r2, sp			@ current virtual SP
+	ldr	r0, =cpu_suspend_size
+	sub	sp, sp, r0		@ allocate CPU state on stack
+	mov	r0, sp			@ save pointer
+	stmfd	sp!, {r1, r2, r3}	@ save v:p, virt SP, return fn
+	ldr	r3, =sleep_save_sp
+	add	r2, sp, r1		@ convert SP to phys
+	str	r2, [r3]		@ save phys SP
+	bl	cpu_do_suspend
+#endif
+
+	@ flush data cache
+#ifdef MULTI_CACHE
+	ldr	r10, =cpu_cache
+	mov	lr, r9
+	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
+#else
+	mov	lr, r9
+	b	__cpuc_flush_kern_all
+#endif
+ENDPROC(cpu_suspend)
+	.ltorg
+
+/*
+ * r0 = control register value
+ * r1 = v:p offset (preserved by cpu_do_resume)
+ * r2 = phys page table base
+ * r3 = L1 section flags
+ */
+ENTRY(cpu_resume_mmu)
+	adr	r4, cpu_resume_turn_mmu_on
+	mov	r4, r4, lsr #20
+	orr	r3, r3, r4, lsl #20
+	ldr	r5, [r2, r4, lsl #2]	@ save old mapping
+	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
+	sub	r2, r2, r1
+	ldr	r3, =cpu_resume_after_mmu
+	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
+	b	cpu_resume_turn_mmu_on
+ENDPROC(cpu_resume_mmu)
+	.ltorg
+	.align	5
+cpu_resume_turn_mmu_on:
+	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
+	mov	r1, r1
+	mov	r1, r1
+	mov	pc, r3			@ jump to virtual address
+ENDPROC(cpu_resume_turn_mmu_on)
+cpu_resume_after_mmu:
+	str	r5, [r2, r4, lsl #2]	@ restore old mapping
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
+	mov	pc, lr
+ENDPROC(cpu_resume_after_mmu)
+
+/*
+ * Note: Yes, part of the following code is located into the .data section.
+ *       This is to allow sleep_save_sp to be accessed with a relative load
+ *       while we can't rely on any MMU translation.  We could have put
+ *       sleep_save_sp in the .text section as well, but some setups might
+ *       insist on it to be truly read-only.
+ */
+	.data
+	.align
+ENTRY(cpu_resume)
+	ldr	r0, sleep_save_sp	@ stack phys addr
+	msr	cpsr_c, #PSR_I_BIT | PSR_F_BIT | SVC_MODE @ set SVC, irqs off
+#ifdef MULTI_CPU
+	ldmia	r0!, {r1, sp, lr, pc}	@ load v:p, stack, return fn, resume fn
+#else
+	ldmia	r0!, {r1, sp, lr}	@ load v:p, stack, return fn
+	b	cpu_do_resume
+#endif
+ENDPROC(cpu_resume)
+
+sleep_save_sp:
+	.word	0				@ preserve stack phys ptr here