sh: Support for extended ASIDs on PTEAEX-capable SH-X3 cores.

This adds support for extended ASIDs (up to 16-bits) on newer SH-X3 cores
that implement the PTAEX register and respective functionality. Presently
only the 65nm SH7786 (90nm only supports legacy 8-bit ASIDs).

The main change is in how the PTE is written out when loading the entry
in to the TLB, as well as in how the TLB entry is selectively flushed.

While SH-X2 extended mode splits out the memory-mapped U and I-TLB data
arrays for extra bits, extended ASID mode splits out the address arrays.
While we don't use the memory-mapped data array access, the address
array accesses are necessary for selective TLB flushes, so these are
implemented newly and replace the generic SH-4 implementation.

With this, TLB flushes in switch_mm() are almost non-existent on newer
parts.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/cpu-features.h b/arch/sh/include/asm/cpu-features.h
index 86308aa..694abe4 100644
--- a/arch/sh/include/asm/cpu-features.h
+++ b/arch/sh/include/asm/cpu-features.h
@@ -21,5 +21,6 @@
 #define CPU_HAS_LLSC		0x0040	/* movli.l/movco.l */
 #define CPU_HAS_L2_CACHE	0x0080	/* Secondary cache / URAM */
 #define CPU_HAS_OP32		0x0100	/* 32-bit instruction support */
+#define CPU_HAS_PTEAEX		0x0200	/* PTE ASID Extension support */
 
 #endif /* __ASM_SH_CPU_FEATURES_H */
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 5d9157b..2a9c55f 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -19,13 +19,18 @@
  *    (a) TLB cache version (or round, cycle whatever expression you like)
  *    (b) ASID (Address Space IDentifier)
  */
+#ifdef CONFIG_CPU_HAS_PTEAEX
+#define MMU_CONTEXT_ASID_MASK		0x0000ffff
+#else
 #define MMU_CONTEXT_ASID_MASK		0x000000ff
-#define MMU_CONTEXT_VERSION_MASK	0xffffff00
-#define MMU_CONTEXT_FIRST_VERSION	0x00000100
-#define NO_CONTEXT			0UL
+#endif
 
-/* ASID is 8-bit value, so it can't be 0x100 */
-#define MMU_NO_ASID			0x100
+#define MMU_CONTEXT_VERSION_MASK	(~0UL & ~MMU_CONTEXT_ASID_MASK)
+#define MMU_CONTEXT_FIRST_VERSION	(MMU_CONTEXT_ASID_MASK + 1)
+
+/* Impossible ASID value, to differentiate from NO_CONTEXT. */
+#define MMU_NO_ASID			MMU_CONTEXT_FIRST_VERSION
+#define NO_CONTEXT			0UL
 
 #define asid_cache(cpu)		(cpu_data[cpu].asid_cache)
 
diff --git a/arch/sh/include/asm/mmu_context_32.h b/arch/sh/include/asm/mmu_context_32.h
index f4f9aeb..8ef800c 100644
--- a/arch/sh/include/asm/mmu_context_32.h
+++ b/arch/sh/include/asm/mmu_context_32.h
@@ -10,6 +10,17 @@
 	/* Do nothing */
 }
 
+#ifdef CONFIG_CPU_HAS_PTEAEX
+static inline void set_asid(unsigned long asid)
+{
+	__raw_writel(asid, MMU_PTEAEX);
+}
+
+static inline unsigned long get_asid(void)
+{
+	return __raw_readl(MMU_PTEAEX) & MMU_CONTEXT_ASID_MASK;
+}
+#else
 static inline void set_asid(unsigned long asid)
 {
 	unsigned long __dummy;
@@ -33,6 +44,7 @@
 	asid &= MMU_CONTEXT_ASID_MASK;
 	return asid;
 }
+#endif /* CONFIG_CPU_HAS_PTEAEX */
 
 /* MMU_TTB is used for optimizing the fault handling. */
 static inline void set_TTB(pgd_t *pgd)