Merge branch 'devel-stable' into devel

Conflicts:
	arch/arm/Kconfig
	arch/arm/include/asm/system.h
	arch/arm/mm/Kconfig
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 3302730..346ae14 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -572,6 +572,8 @@
 config CPU_TLB_V7
 	bool
 
+config VERIFY_PERMISSION_FAULT
+	bool
 endif
 
 config CPU_HAS_ASID
@@ -736,6 +738,12 @@
 config OUTER_CACHE
 	bool
 
+config OUTER_CACHE_SYNC
+	bool
+	help
+	  The outer cache has a outer_cache_fns.sync function pointer
+	  that can be used to drain the write buffer of the outer cache.
+
 config CACHE_FEROCEON_L2
 	bool "Enable the Feroceon L2 cache controller"
 	depends on ARCH_KIRKWOOD || ARCH_MV78XX0
@@ -758,12 +766,13 @@
 		   ARCH_NOMADIK || ARCH_OMAP4 || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
 	default y
 	select OUTER_CACHE
+	select OUTER_CACHE_SYNC
 	help
 	  This option enables the L2x0 PrimeCell.
 
 config CACHE_TAUROS2
 	bool "Enable the Tauros2 L2 cache controller"
-	depends on ARCH_DOVE
+	depends on (ARCH_DOVE || ARCH_MMP)
 	default y
 	select OUTER_CACHE
 	help
@@ -801,3 +810,9 @@
 	  behaviour.  Therefore, we offer this as an option.
 
 	  You are recommended say 'Y' here and debug any affected drivers.
+
+config ARCH_HAS_BARRIERS
+	bool
+	help
+	  This option allows the use of custom mandatory barriers
+	  included via the mach/barriers.h file.
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index 2e6dc04..ec88b15 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -29,5 +29,26 @@
 	 * V6 code adjusts the returned DFSR.
 	 * New designs should not need to patch up faults.
 	 */
+
+#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
+	/*
+	 * Detect erroneous permission failures and fix
+	 */
+	ldr	r3, =0x40d			@ On permission fault
+	and	r3, r1, r3
+	cmp	r3, #0x0d
+	movne	pc, lr
+
+	mcr	p15, 0, r0, c7, c8, 0   	@ Retranslate FAR
+	isb
+	mrc	p15, 0, r2, c7, c4, 0   	@ Read the PAR
+	and	r3, r2, #0x7b   		@ On translation fault
+	cmp	r3, #0x0b
+	movne	pc, lr
+	bic	r1, r1, #0xf			@ Fix up FSR FS[5:0]
+	and	r2, r2, #0x7e
+	orr	r1, r1, r2, LSR #1
+#endif
+
 	mov	pc, lr
 ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 28b7c27..6f98c35 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -167,15 +167,15 @@
  THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
  THUMB(	"	add	%2, %2, #1\n"	)		\
 	"2:\n"						\
-	"	.section .fixup,\"ax\"\n"		\
+	"	.pushsection .fixup,\"ax\"\n"		\
 	"	.align	2\n"				\
 	"3:	mov	%0, #1\n"			\
 	"	b	2b\n"				\
-	"	.previous\n"				\
-	"	.section __ex_table,\"a\"\n"		\
+	"	.popsection\n"				\
+	"	.pushsection __ex_table,\"a\"\n"	\
 	"	.align	3\n"				\
 	"	.long	1b, 3b\n"			\
-	"	.previous\n"				\
+	"	.popsection\n"				\
 	: "=r" (err), "=&r" (val), "=r" (addr)		\
 	: "0" (err), "2" (addr))
 
@@ -227,16 +227,16 @@
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"2:	"ins"	%1, [%2]\n"			\
 		"3:\n"						\
-		"	.section .fixup,\"ax\"\n"		\
+		"	.pushsection .fixup,\"ax\"\n"		\
 		"	.align	2\n"				\
 		"4:	mov	%0, #1\n"			\
 		"	b	3b\n"				\
-		"	.previous\n"				\
-		"	.section __ex_table,\"a\"\n"		\
+		"	.popsection\n"				\
+		"	.pushsection __ex_table,\"a\"\n"	\
 		"	.align	3\n"				\
 		"	.long	1b, 4b\n"			\
 		"	.long	2b, 4b\n"			\
-		"	.previous\n"				\
+		"	.popsection\n"				\
 		: "=r" (err), "=&r" (v), "=&r" (a)		\
 		: "0" (err), "1" (v), "2" (a));			\
 		if (err)					\
@@ -267,18 +267,18 @@
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"4:	"ins"	%1, [%2]\n"			\
 		"5:\n"						\
-		"	.section .fixup,\"ax\"\n"		\
+		"	.pushsection .fixup,\"ax\"\n"		\
 		"	.align	2\n"				\
 		"6:	mov	%0, #1\n"			\
 		"	b	5b\n"				\
-		"	.previous\n"				\
-		"	.section __ex_table,\"a\"\n"		\
+		"	.popsection\n"				\
+		"	.pushsection __ex_table,\"a\"\n"	\
 		"	.align	3\n"				\
 		"	.long	1b, 6b\n"			\
 		"	.long	2b, 6b\n"			\
 		"	.long	3b, 6b\n"			\
 		"	.long	4b, 6b\n"			\
-		"	.previous\n"				\
+		"	.popsection\n"				\
 		: "=r" (err), "=&r" (v), "=&r" (a)		\
 		: "0" (err), "1" (v), "2" (a));			\
 		if (err)					\
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 78f0fc8..9819869 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -94,6 +94,15 @@
 }
 #endif
 
+static void l2x0_cache_sync(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&l2x0_lock, flags);
+	cache_sync();
+	spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
 static inline void l2x0_inv_all(void)
 {
 	unsigned long flags;
@@ -252,6 +261,7 @@
 	outer_cache.inv_range = l2x0_inv_range;
 	outer_cache.clean_range = l2x0_clean_range;
 	outer_cache.flush_range = l2x0_flush_range;
+	outer_cache.sync = l2x0_cache_sync;
 
 	printk(KERN_INFO "%s cache controller enabled\n", type);
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
index b2a6008..d2852e1 100644
--- a/arch/arm/mm/copypage-fa.c
+++ b/arch/arm/mm/copypage-fa.c
@@ -40,7 +40,7 @@
 }
 
 void fa_copy_user_highpage(struct page *to, struct page *from,
-	unsigned long vaddr)
+	unsigned long vaddr, struct vm_area_struct *vma)
 {
 	void *kto, *kfrom;
 
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 8bca4de..f55fa10 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -41,14 +41,7 @@
 	kfrom = kmap_atomic(from, KM_USER0);
 	kto = kmap_atomic(to, KM_USER1);
 	copy_page(kto, kfrom);
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * kmap_atomic() doesn't set the page virtual address, and
-	 * kunmap_atomic() takes care of cache flushing already.
-	 */
-	if (page_address(to) != NULL)
-#endif
-		__cpuc_flush_dcache_area(kto, PAGE_SIZE);
+	__cpuc_flush_dcache_area(kto, PAGE_SIZE);
 	kunmap_atomic(kto, KM_USER1);
 	kunmap_atomic(kfrom, KM_USER0);
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 0da7ecc..13fa536 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -11,7 +11,7 @@
  */
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
+#include <linux/gfp.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/init.h>
@@ -464,6 +464,11 @@
 				vaddr += offset;
 				op(vaddr, len, dir);
 				kunmap_high(page);
+			} else if (cache_is_vipt()) {
+				pte_t saved_pte;
+				vaddr = kmap_high_l1_vipt(page, &saved_pte);
+				op(vaddr + offset, len, dir);
+				kunmap_high_l1_vipt(page, saved_pte);
 			}
 		} else {
 			vaddr = page_address(page) + offset;
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 82df01a..9b906de 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -16,6 +16,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/gfp.h>
 
 #include <asm/bugs.h>
 #include <asm/cacheflush.h>
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index e34f095..c6844cb 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -13,6 +13,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
+#include <asm/highmem.h>
 #include <asm/smp_plat.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
@@ -152,21 +153,25 @@
 
 void __flush_dcache_page(struct address_space *mapping, struct page *page)
 {
-	void *addr = page_address(page);
-
 	/*
 	 * Writeback any data associated with the kernel mapping of this
 	 * page.  This ensures that data in the physical page is mutually
 	 * coherent with the kernels mapping.
 	 */
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * kmap_atomic() doesn't set the page virtual address, and
-	 * kunmap_atomic() takes care of cache flushing already.
-	 */
-	if (addr)
-#endif
-		__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+	if (!PageHighMem(page)) {
+		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+	} else {
+		void *addr = kmap_high_get(page);
+		if (addr) {
+			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+			kunmap_high(page);
+		} else if (cache_is_vipt()) {
+			pte_t saved_pte;
+			addr = kmap_high_l1_vipt(page, &saved_pte);
+			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+			kunmap_high_l1_vipt(page, saved_pte);
+		}
+	}
 
 	/*
 	 * If this is a page cache page, and we have an aliasing VIPT cache,
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 2be1ec7..77b030f 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -79,7 +79,8 @@
 	unsigned int idx = type + KM_TYPE_NR * smp_processor_id();
 
 	if (kvaddr >= (void *)FIXADDR_START) {
-		__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+		if (cache_is_vivt())
+			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
 #ifdef CONFIG_DEBUG_HIGHMEM
 		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
 		set_pte_ext(TOP_PTE(vaddr), __pte(0), 0);
@@ -124,3 +125,87 @@
 	pte = TOP_PTE(vaddr);
 	return pte_page(*pte);
 }
+
+#ifdef CONFIG_CPU_CACHE_VIPT
+
+#include <linux/percpu.h>
+
+/*
+ * The VIVT cache of a highmem page is always flushed before the page
+ * is unmapped. Hence unmapped highmem pages need no cache maintenance
+ * in that case.
+ *
+ * However unmapped pages may still be cached with a VIPT cache, and
+ * it is not possible to perform cache maintenance on them using physical
+ * addresses unfortunately.  So we have no choice but to set up a temporary
+ * virtual mapping for that purpose.
+ *
+ * Yet this VIPT cache maintenance may be triggered from DMA support
+ * functions which are possibly called from interrupt context. As we don't
+ * want to keep interrupt disabled all the time when such maintenance is
+ * taking place, we therefore allow for some reentrancy by preserving and
+ * restoring the previous fixmap entry before the interrupted context is
+ * resumed.  If the reentrancy depth is 0 then there is no need to restore
+ * the previous fixmap, and leaving the current one in place allow it to
+ * be reused the next time without a TLB flush (common with DMA).
+ */
+
+static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
+
+void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
+{
+	unsigned int idx, cpu = smp_processor_id();
+	int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
+	unsigned long vaddr, flags;
+	pte_t pte, *ptep;
+
+	idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	ptep = TOP_PTE(vaddr);
+	pte = mk_pte(page, kmap_prot);
+
+	if (!in_interrupt())
+		preempt_disable();
+
+	raw_local_irq_save(flags);
+	(*depth)++;
+	if (pte_val(*ptep) == pte_val(pte)) {
+		*saved_pte = pte;
+	} else {
+		*saved_pte = *ptep;
+		set_pte_ext(ptep, pte, 0);
+		local_flush_tlb_kernel_page(vaddr);
+	}
+	raw_local_irq_restore(flags);
+
+	return (void *)vaddr;
+}
+
+void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte)
+{
+	unsigned int idx, cpu = smp_processor_id();
+	int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
+	unsigned long vaddr, flags;
+	pte_t pte, *ptep;
+
+	idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	ptep = TOP_PTE(vaddr);
+	pte = mk_pte(page, kmap_prot);
+
+	BUG_ON(pte_val(*ptep) != pte_val(pte));
+	BUG_ON(*depth <= 0);
+
+	raw_local_irq_save(flags);
+	(*depth)--;
+	if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) {
+		set_pte_ext(ptep, saved_pte, 0);
+		local_flush_tlb_kernel_page(vaddr);
+	}
+	raw_local_irq_restore(flags);
+
+	if (!in_interrupt())
+		preempt_enable();
+}
+
+#endif  /* CONFIG_CPU_CACHE_VIPT */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 105d1d4..1ba6cf5 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -16,6 +16,7 @@
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
+#include <linux/gfp.h>
 
 #include <asm/mach-types.h>
 #include <asm/sections.h>
@@ -84,9 +85,6 @@
 	printk("Mem-info:\n");
 	show_free_areas();
 	for_each_online_node(node) {
-		pg_data_t *n = NODE_DATA(node);
-		struct page *map = pgdat_page_nr(n, 0) - n->node_start_pfn;
-
 		for_each_nodebank (i,mi,node) {
 			struct membank *bank = &mi->bank[i];
 			unsigned int pfn1, pfn2;
@@ -95,8 +93,8 @@
 			pfn1 = bank_pfn_start(bank);
 			pfn2 = bank_pfn_end(bank);
 
-			page = map + pfn1;
-			end  = map + pfn2;
+			page = pfn_to_page(pfn1);
+			end  = pfn_to_page(pfn2 - 1) + 1;
 
 			do {
 				total++;
@@ -568,9 +566,6 @@
 	reserved_pages = free_pages = 0;
 
 	for_each_online_node(node) {
-		pg_data_t *n = NODE_DATA(node);
-		struct page *map = pgdat_page_nr(n, 0) - n->node_start_pfn;
-
 		for_each_nodebank(i, &meminfo, node) {
 			struct membank *bank = &meminfo.bank[i];
 			unsigned int pfn1, pfn2;
@@ -579,8 +574,8 @@
 			pfn1 = bank_pfn_start(bank);
 			pfn2 = bank_pfn_end(bank);
 
-			page = map + pfn1;
-			end  = map + pfn2;
+			page = pfn_to_page(pfn1);
+			end  = pfn_to_page(pfn2 - 1) + 1;
 
 			do {
 				if (PageReserved(page))
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6985200..e7113d0 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -421,6 +421,10 @@
 		user_pgprot |= L_PTE_SHARED;
 		kern_pgprot |= L_PTE_SHARED;
 		vecs_pgprot |= L_PTE_SHARED;
+		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
+		mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
+		mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
+		mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 #endif
@@ -1087,10 +1091,12 @@
 	pgd_t *pgd;
 	int i;
 
-	if (current->mm && current->mm->pgd)
-		pgd = current->mm->pgd;
-	else
-		pgd = init_mm.pgd;
+	/*
+	 * We need to access to user-mode page tables here. For kernel threads
+	 * we don't have any user-mode mappings so we use the context that we
+	 * "borrowed".
+	 */
+	pgd = current->active_mm->pgd;
 
 	base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
 	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 2690146..be5f58e 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/mm.h>
+#include <linux/gfp.h>
 #include <linux/highmem.h>
 
 #include <asm/pgalloc.h>
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index ee77002..5c47760 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -45,7 +45,7 @@
 	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
 	mov	pc, lr
 
-	.previous
+	.section .text
 
 /*
  * cpu_sa1100_proc_fin()