Merge branch 'v6v7' into devel

Conflicts:
	arch/arm/include/asm/cacheflush.h
	arch/arm/include/asm/proc-fns.h
	arch/arm/mm/Kconfig
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index 7b1bb2b..af54ed1 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -149,14 +149,18 @@
  */
 
 /*
+ * Native endian assembly bitops.  nr = 0 -> word 0 bit 0.
+ */
+extern void _set_bit(int nr, volatile unsigned long * p);
+extern void _clear_bit(int nr, volatile unsigned long * p);
+extern void _change_bit(int nr, volatile unsigned long * p);
+extern int _test_and_set_bit(int nr, volatile unsigned long * p);
+extern int _test_and_clear_bit(int nr, volatile unsigned long * p);
+extern int _test_and_change_bit(int nr, volatile unsigned long * p);
+
+/*
  * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
  */
-extern void _set_bit_le(int nr, volatile unsigned long * p);
-extern void _clear_bit_le(int nr, volatile unsigned long * p);
-extern void _change_bit_le(int nr, volatile unsigned long * p);
-extern int _test_and_set_bit_le(int nr, volatile unsigned long * p);
-extern int _test_and_clear_bit_le(int nr, volatile unsigned long * p);
-extern int _test_and_change_bit_le(int nr, volatile unsigned long * p);
 extern int _find_first_zero_bit_le(const void * p, unsigned size);
 extern int _find_next_zero_bit_le(const void * p, int size, int offset);
 extern int _find_first_bit_le(const unsigned long *p, unsigned size);
@@ -165,12 +169,6 @@
 /*
  * Big endian assembly bitops.  nr = 0 -> byte 3 bit 0.
  */
-extern void _set_bit_be(int nr, volatile unsigned long * p);
-extern void _clear_bit_be(int nr, volatile unsigned long * p);
-extern void _change_bit_be(int nr, volatile unsigned long * p);
-extern int _test_and_set_bit_be(int nr, volatile unsigned long * p);
-extern int _test_and_clear_bit_be(int nr, volatile unsigned long * p);
-extern int _test_and_change_bit_be(int nr, volatile unsigned long * p);
 extern int _find_first_zero_bit_be(const void * p, unsigned size);
 extern int _find_next_zero_bit_be(const void * p, int size, int offset);
 extern int _find_first_bit_be(const unsigned long *p, unsigned size);
@@ -180,33 +178,26 @@
 /*
  * The __* form of bitops are non-atomic and may be reordered.
  */
-#define	ATOMIC_BITOP_LE(name,nr,p)		\
-	(__builtin_constant_p(nr) ?		\
-	 ____atomic_##name(nr, p) :		\
-	 _##name##_le(nr,p))
-
-#define	ATOMIC_BITOP_BE(name,nr,p)		\
-	(__builtin_constant_p(nr) ?		\
-	 ____atomic_##name(nr, p) :		\
-	 _##name##_be(nr,p))
+#define ATOMIC_BITOP(name,nr,p)			\
+	(__builtin_constant_p(nr) ? ____atomic_##name(nr, p) : _##name(nr,p))
 #else
-#define ATOMIC_BITOP_LE(name,nr,p)	_##name##_le(nr,p)
-#define ATOMIC_BITOP_BE(name,nr,p)	_##name##_be(nr,p)
+#define ATOMIC_BITOP(name,nr,p)		_##name(nr,p)
 #endif
 
-#define NONATOMIC_BITOP(name,nr,p)		\
-	(____nonatomic_##name(nr, p))
+/*
+ * Native endian atomic definitions.
+ */
+#define set_bit(nr,p)			ATOMIC_BITOP(set_bit,nr,p)
+#define clear_bit(nr,p)			ATOMIC_BITOP(clear_bit,nr,p)
+#define change_bit(nr,p)		ATOMIC_BITOP(change_bit,nr,p)
+#define test_and_set_bit(nr,p)		ATOMIC_BITOP(test_and_set_bit,nr,p)
+#define test_and_clear_bit(nr,p)	ATOMIC_BITOP(test_and_clear_bit,nr,p)
+#define test_and_change_bit(nr,p)	ATOMIC_BITOP(test_and_change_bit,nr,p)
 
 #ifndef __ARMEB__
 /*
  * These are the little endian, atomic definitions.
  */
-#define set_bit(nr,p)			ATOMIC_BITOP_LE(set_bit,nr,p)
-#define clear_bit(nr,p)			ATOMIC_BITOP_LE(clear_bit,nr,p)
-#define change_bit(nr,p)		ATOMIC_BITOP_LE(change_bit,nr,p)
-#define test_and_set_bit(nr,p)		ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
-#define test_and_clear_bit(nr,p)	ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
-#define test_and_change_bit(nr,p)	ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
 #define find_first_zero_bit(p,sz)	_find_first_zero_bit_le(p,sz)
 #define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_le(p,sz,off)
 #define find_first_bit(p,sz)		_find_first_bit_le(p,sz)
@@ -215,16 +206,9 @@
 #define WORD_BITOFF_TO_LE(x)		((x))
 
 #else
-
 /*
  * These are the big endian, atomic definitions.
  */
-#define set_bit(nr,p)			ATOMIC_BITOP_BE(set_bit,nr,p)
-#define clear_bit(nr,p)			ATOMIC_BITOP_BE(clear_bit,nr,p)
-#define change_bit(nr,p)		ATOMIC_BITOP_BE(change_bit,nr,p)
-#define test_and_set_bit(nr,p)		ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
-#define test_and_clear_bit(nr,p)	ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
-#define test_and_change_bit(nr,p)	ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
 #define find_first_zero_bit(p,sz)	_find_first_zero_bit_be(p,sz)
 #define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_be(p,sz,off)
 #define find_first_bit(p,sz)		_find_first_bit_be(p,sz)
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 18a5664..d5d8d5c 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -187,7 +187,8 @@
  * Optimized __flush_icache_all for the common cases. Note that UP ARMv7
  * will fall through to use __flush_icache_all_generic.
  */
-#if (defined(CONFIG_CPU_V7) && defined(CONFIG_CPU_V6)) ||		\
+#if (defined(CONFIG_CPU_V7) && \
+     (defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K))) || \
 	defined(CONFIG_SMP_ON_UP)
 #define __flush_icache_preferred	__cpuc_flush_icache_all
 #elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index 0591d35..c7afbc5 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -109,7 +109,7 @@
 # define MULTI_CACHE 1
 #endif
 
-#if defined(CONFIG_CPU_V6)
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
 //# ifdef _CACHE
 #  define MULTI_CACHE 1
 //# else
diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index 6469521..e2be7f1 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -230,7 +230,7 @@
 # endif
 #endif
 
-#ifdef CONFIG_CPU_V6
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
 # ifdef CPU_NAME
 #  undef  MULTI_CPU
 #  define MULTI_CPU
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 17eb355..fdd3820 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -5,17 +5,52 @@
 #error SMP not supported on pre-ARMv6 CPUs
 #endif
 
+/*
+ * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
+ * extensions, so when running on UP, we have to patch these instructions away.
+ */
+#define ALT_SMP(smp, up)					\
+	"9998:	" smp "\n"					\
+	"	.pushsection \".alt.smp.init\", \"a\"\n"	\
+	"	.long	9998b\n"				\
+	"	" up "\n"					\
+	"	.popsection\n"
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define SEV		ALT_SMP("sev.w", "nop.w")
+/*
+ * For Thumb-2, special care is needed to ensure that the conditional WFE
+ * instruction really does assemble to exactly 4 bytes (as required by
+ * the SMP_ON_UP fixup code).   By itself "wfene" might cause the
+ * assembler to insert a extra (16-bit) IT instruction, depending on the
+ * presence or absence of neighbouring conditional instructions.
+ *
+ * To avoid this unpredictableness, an approprite IT is inserted explicitly:
+ * the assembler won't change IT instructions which are explicitly present
+ * in the input.
+ */
+#define WFE(cond)	ALT_SMP(		\
+	"it " cond "\n\t"			\
+	"wfe" cond ".n",			\
+						\
+	"nop.w"					\
+)
+#else
+#define SEV		ALT_SMP("sev", "nop")
+#define WFE(cond)	ALT_SMP("wfe" cond, "nop")
+#endif
+
 static inline void dsb_sev(void)
 {
 #if __LINUX_ARM_ARCH__ >= 7
 	__asm__ __volatile__ (
 		"dsb\n"
-		"sev"
+		SEV
 	);
-#elif defined(CONFIG_CPU_32v6K)
+#else
 	__asm__ __volatile__ (
 		"mcr p15, 0, %0, c7, c10, 4\n"
-		"sev"
+		SEV
 		: : "r" (0)
 	);
 #endif
@@ -46,9 +81,7 @@
 	__asm__ __volatile__(
 "1:	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
-#ifdef CONFIG_CPU_32v6K
-"	wfene\n"
-#endif
+	WFE("ne")
 "	strexeq	%0, %2, [%1]\n"
 "	teqeq	%0, #0\n"
 "	bne	1b"
@@ -107,9 +140,7 @@
 	__asm__ __volatile__(
 "1:	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
-#ifdef CONFIG_CPU_32v6K
-"	wfene\n"
-#endif
+	WFE("ne")
 "	strexeq	%0, %2, [%1]\n"
 "	teq	%0, #0\n"
 "	bne	1b"
@@ -176,9 +207,7 @@
 "1:	ldrex	%0, [%2]\n"
 "	adds	%0, %0, #1\n"
 "	strexpl	%1, %0, [%2]\n"
-#ifdef CONFIG_CPU_32v6K
-"	wfemi\n"
-#endif
+	WFE("mi")
 "	rsbpls	%0, %1, #0\n"
 "	bmi	1b"
 	: "=&r" (tmp), "=&r" (tmp2)
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 97f6d60..9a87823 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -347,6 +347,7 @@
 #include <asm-generic/cmpxchg-local.h>
 
 #if __LINUX_ARM_ARCH__ < 6
+/* min ARCH < ARMv6 */
 
 #ifdef CONFIG_SMP
 #error "SMP is not supported on this platform"
@@ -365,7 +366,7 @@
 #include <asm-generic/cmpxchg.h>
 #endif
 
-#else	/* __LINUX_ARM_ARCH__ >= 6 */
+#else	/* min ARCH >= ARMv6 */
 
 extern void __bad_cmpxchg(volatile void *ptr, int size);
 
@@ -379,7 +380,7 @@
 	unsigned long oldval, res;
 
 	switch (size) {
-#ifdef CONFIG_CPU_32v6K
+#ifndef CONFIG_CPU_V6	/* min ARCH >= ARMv6K */
 	case 1:
 		do {
 			asm volatile("@ __cmpxchg1\n"
@@ -404,7 +405,7 @@
 				: "memory", "cc");
 		} while (res);
 		break;
-#endif /* CONFIG_CPU_32v6K */
+#endif
 	case 4:
 		do {
 			asm volatile("@ __cmpxchg4\n"
@@ -450,12 +451,12 @@
 	unsigned long ret;
 
 	switch (size) {
-#ifndef CONFIG_CPU_32v6K
+#ifdef CONFIG_CPU_V6	/* min ARCH == ARMv6 */
 	case 1:
 	case 2:
 		ret = __cmpxchg_local_generic(ptr, old, new, size);
 		break;
-#endif	/* !CONFIG_CPU_32v6K */
+#endif
 	default:
 		ret = __cmpxchg(ptr, old, new, size);
 	}
@@ -469,7 +470,7 @@
 				       (unsigned long)(n),		\
 				       sizeof(*(ptr))))
 
-#ifdef CONFIG_CPU_32v6K
+#ifndef CONFIG_CPU_V6	/* min ARCH >= ARMv6K */
 
 /*
  * Note : ARMv7-M (currently unsupported by Linux) does not support
@@ -524,11 +525,11 @@
 					 (unsigned long long)(o),	\
 					 (unsigned long long)(n)))
 
-#else	/* !CONFIG_CPU_32v6K */
+#else /* min ARCH = ARMv6 */
 
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 
-#endif	/* CONFIG_CPU_32v6K */
+#endif
 
 #endif	/* __LINUX_ARM_ARCH__ >= 6 */
 
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index e71d6ff..60843eb 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -28,15 +28,14 @@
 #define tls_emu		1
 #define has_tls_reg		1
 #define set_tls		set_tls_none
-#elif __LINUX_ARM_ARCH__ >= 7 ||					\
-	(__LINUX_ARM_ARCH__ == 6 && defined(CONFIG_CPU_32v6K))
-#define tls_emu		0
-#define has_tls_reg		1
-#define set_tls		set_tls_v6k
-#elif __LINUX_ARM_ARCH__ == 6
+#elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
 #define set_tls		set_tls_v6
+#elif defined(CONFIG_CPU_32v6K)
+#define tls_emu		0
+#define has_tls_reg		1
+#define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0