[ARM] 3377/2: add support for intel xsc3 core

Patch from Lennert Buytenhek

This patch adds support for the new XScale v3 core.  This is an
ARMv5 ISA core with the following additions:

- L2 cache
- I/O coherency support (on select chipsets)
- Low-Locality Reference cache attributes (replaces mini-cache)
- Supersections (v6 compatible)
- 36-bit addressing (v6 compatible)
- Single instruction cache line clean/invalidate
- LRU cache replacement (vs round-robin)

I attempted to merge the XSC3 support into proc-xscale.S, but XSC3
cores have separate errata and have to handle things like L2, so it
is simpler to keep it separate.

L2 cache support is currently a build option because the L2 enable
bit must be set before we enable the MMU and there is no easy way to
capture command line parameters at this point.

There are still optimizations that can be done such as using LLR for
copypage (in theory using the exisiting mini-cache code) but those
can be addressed down the road.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/mm/copypage-xsc3.S b/arch/arm/mm/copypage-xsc3.S
new file mode 100644
index 0000000..9a2cb43
--- /dev/null
+++ b/arch/arm/mm/copypage-xsc3.S
@@ -0,0 +1,97 @@
+/*
+ *  linux/arch/arm/lib/copypage-xsc3.S
+ *
+ *  Copyright (C) 2004 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Adapted for 3rd gen XScale core, no more mini-dcache
+ * Author: Matt Gilbert (matthew.m.gilbert@intel.com)
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * General note:
+ *  We don't really want write-allocate cache behaviour for these functions
+ *  since that will just eat through 8K of the cache.
+ */
+
+	.text
+	.align	5
+/*
+ * XSC3 optimised copy_user_page
+ *  r0 = destination
+ *  r1 = source
+ *  r2 = virtual user address of ultimate destination page
+ *
+ * The source page may have some clean entries in the cache already, but we
+ * can safely ignore them - break_cow() will flush them out of the cache
+ * if we eventually end up using our copied page.
+ *
+ */
+ENTRY(xsc3_mc_copy_user_page)
+	stmfd	sp!, {r4, r5, lr}
+	mov	lr, #PAGE_SZ/64-1
+
+	pld	[r1, #0]
+	pld	[r1, #32]
+1:	pld	[r1, #64]
+	pld	[r1, #96]
+
+2:	ldrd	r2, [r1], #8
+	mov	ip, r0
+	ldrd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate
+	strd	r2, [r0], #8
+	ldrd	r2, [r1], #8
+	strd	r4, [r0], #8
+	ldrd	r4, [r1], #8
+	strd	r2, [r0], #8
+	strd	r4, [r0], #8
+	ldrd	r2, [r1], #8
+	mov	ip, r0
+	ldrd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate
+	strd	r2, [r0], #8
+	ldrd	r2, [r1], #8
+	subs	lr, lr, #1
+	strd	r4, [r0], #8
+	ldrd	r4, [r1], #8
+	strd	r2, [r0], #8
+	strd	r4, [r0], #8
+	bgt	1b
+	beq	2b
+
+	ldmfd	sp!, {r4, r5, pc}
+
+	.align	5
+/*
+ * XScale optimised clear_user_page
+ *  r0 = destination
+ *  r1 = virtual user address of ultimate destination page
+ */
+ENTRY(xsc3_mc_clear_user_page)
+	mov	r1, #PAGE_SZ/32
+	mov	r2, #0
+	mov	r3, #0
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate line
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	subs	r1, r1, #1
+	bne	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	xsc3_mc_user_fns, #object
+ENTRY(xsc3_mc_user_fns)
+	.long	xsc3_mc_clear_user_page
+	.long	xsc3_mc_copy_user_page
+	.size	xsc3_mc_user_fns, . - xsc3_mc_user_fns