Merge branch 'for-rmk' of git://linux-arm.org/linux-2.6 into devel

Conflicts:
	arch/arm/Kconfig
	arch/arm/kernel/smp.c
	arch/arm/mach-realview/Makefile
	arch/arm/mach-realview/platsmp.c
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 83b1da6..fc8af43 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -482,6 +482,9 @@
 	subeq	r4, r2, #4			@ ARM instr at LR - 4
 	subne	r4, r2, #2			@ Thumb instr at LR - 2
 1:	ldreqt	r0, [r4]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	reveq	r0, r0				@ little endian instruction
+#endif
 	beq	call_fpe
 	@ Thumb instruction
 #if __LINUX_ARM_ARCH__ >= 7
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index b55cb03..366e509 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -210,6 +210,9 @@
   A710(	teq	ip, #0x0f000000						)
   A710(	bne	.Larm710bug						)
 #endif
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	r10, r10			@ little endian instruction
+#endif
 
 #elif defined(CONFIG_AEABI)
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c3265a2..1585423 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -365,7 +365,7 @@
 	regs.ARM_r2 = (unsigned long)fn;
 	regs.ARM_r3 = (unsigned long)do_exit;
 	regs.ARM_pc = (unsigned long)kernel_thread_helper;
-	regs.ARM_cpsr = SVC_MODE;
+	regs.ARM_cpsr = SVC_MODE | PSR_ENDSTATE;
 
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 80b8b5c..442b874 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -426,9 +426,13 @@
 		 */
 		thumb = handler & 1;
 
-		if (thumb)
+		if (thumb) {
 			cpsr |= PSR_T_BIT;
-		else
+#if __LINUX_ARM_ARCH__ >= 7
+			/* clear the If-Then Thumb-2 execution state */
+			cpsr &= ~PSR_IT_MASK;
+#endif
+		} else
 			cpsr &= ~PSR_T_BIT;
 	}
 #endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 0d8097f..de885fd 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -28,6 +28,7 @@
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
+#include <asm/cputype.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -585,6 +586,12 @@
 	unsigned long ta_end;
 };
 
+/* all SMP configurations have the extended CPUID registers */
+static inline int tlb_ops_need_broadcast(void)
+{
+	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
+}
+
 static inline void ipi_flush_tlb_all(void *ignored)
 {
 	local_flush_tlb_all();
@@ -627,51 +634,61 @@
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	else
+		local_flush_tlb_all();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast())
+		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+	else
+		local_flush_tlb_mm(mm);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 {
-	struct tlb_args ta;
-
-	ta.ta_vma = vma;
-	ta.ta_start = uaddr;
-
-	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = uaddr;
+		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	} else
+		local_flush_tlb_page(vma, uaddr);
 }
 
 void flush_tlb_kernel_page(unsigned long kaddr)
 {
-	struct tlb_args ta;
-
-	ta.ta_start = kaddr;
-
-	on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = kaddr;
+		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	} else
+		local_flush_tlb_kernel_page(kaddr);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma,
                      unsigned long start, unsigned long end)
 {
-	struct tlb_args ta;
-
-	ta.ta_vma = vma;
-	ta.ta_start = start;
-	ta.ta_end = end;
-
-	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	} else
+		local_flush_tlb_range(vma, start, end);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	struct tlb_args ta;
-
-	ta.ta_start = start;
-	ta.ta_end = end;
-
-	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	} else
+		local_flush_tlb_kernel_range(start, end);
 }
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index c90f272..6c07797 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -141,6 +141,7 @@
 
 	.data : AT(__data_loc) {
 		_data = .;		/* address in memory */
+		_sdata = .;
 
 		/*
 		 * first, the init task union, aligned
@@ -192,6 +193,7 @@
 		__bss_start = .;	/* BSS				*/
 		*(.bss)
 		*(COMMON)
+		__bss_stop = .;
 		_end = .;
 	}
 					/* Stabs debugging sections.	*/