[XTENSA] Add support for cache-aliasing

Add support for processors that have cache-aliasing issues, such as
the Stretch S5000 processor. Cache-aliasing means that the size of
the cache (for one way) is larger than the page size, thus, a page
can end up in several places in cache depending on the virtual to
physical translation. The method used here is to map a user page
temporarily through the auto-refill way 0 and of of the DTLB.
We probably will want to revisit this issue and use a better
approach with kmap/kunmap.

Signed-off-by: Chris Zankel <chris@zankel.net>
diff --git a/include/asm-xtensa/cache.h b/include/asm-xtensa/cache.h
index 1c4a78f..3bba2a5 100644
--- a/include/asm-xtensa/cache.h
+++ b/include/asm-xtensa/cache.h
@@ -19,6 +19,15 @@
 
 #define DCACHE_WAY_SIZE	(XCHAL_DCACHE_SIZE/XCHAL_DCACHE_WAYS)
 #define ICACHE_WAY_SIZE	(XCHAL_ICACHE_SIZE/XCHAL_ICACHE_WAYS)
+#define DCACHE_WAY_SHIFT (XCHAL_DCACHE_SETWIDTH + XCHAL_DCACHE_LINEWIDTH)
+#define ICACHE_WAY_SHIFT (XCHAL_ICACHE_SETWIDTH + XCHAL_ICACHE_LINEWIDTH)
+
+/* Maximum cache size per way. */
+#if DCACHE_WAY_SIZE >= ICACHE_WAY_SIZE
+# define CACHE_WAY_SIZE DCACHE_WAY_SIZE
+#else
+# define CACHE_WAY_SIZE ICACHE_WAY_SIZE
+#endif
 
 
 #endif	/* _XTENSA_CACHE_H */
diff --git a/include/asm-xtensa/cacheflush.h b/include/asm-xtensa/cacheflush.h
index 22ef901..b773c57 100644
--- a/include/asm-xtensa/cacheflush.h
+++ b/include/asm-xtensa/cacheflush.h
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * (C) 2001 - 2006 Tensilica Inc.
+ * (C) 2001 - 2007 Tensilica Inc.
  */
 
 #ifndef _XTENSA_CACHEFLUSH_H
@@ -18,10 +18,7 @@
 #include <asm/page.h>
 
 /*
- * flush and invalidate data cache, invalidate instruction cache:
- *
- * __flush_invalidate_cache_all()
- * __flush_invalidate_cache_range(from,sze)
+ * Lo-level routines for cache flushing.
  *
  * invalidate data or instruction cache:
  *
@@ -40,26 +37,39 @@
  * __flush_invalidate_dcache_all()
  * __flush_invalidate_dcache_page(adr)
  * __flush_invalidate_dcache_range(from,size)
+ *
+ * specials for cache aliasing:
+ *
+ * __flush_invalidate_dcache_page_alias(vaddr,paddr)
+ * __invalidate_icache_page_alias(vaddr,paddr)
  */
 
-extern void __flush_invalidate_cache_all(void);
-extern void __flush_invalidate_cache_range(unsigned long, unsigned long);
-extern void __flush_invalidate_dcache_all(void);
+extern void __invalidate_dcache_all(void);
 extern void __invalidate_icache_all(void);
-
 extern void __invalidate_dcache_page(unsigned long);
 extern void __invalidate_icache_page(unsigned long);
 extern void __invalidate_icache_range(unsigned long, unsigned long);
 extern void __invalidate_dcache_range(unsigned long, unsigned long);
 
+
 #if XCHAL_DCACHE_IS_WRITEBACK
+extern void __flush_invalidate_dcache_all(void);
 extern void __flush_dcache_page(unsigned long);
+extern void __flush_dcache_range(unsigned long, unsigned long);
 extern void __flush_invalidate_dcache_page(unsigned long);
 extern void __flush_invalidate_dcache_range(unsigned long, unsigned long);
 #else
-# define __flush_dcache_page(p)				do { } while(0)
-# define __flush_invalidate_dcache_page(p) 		do { } while(0)
-# define __flush_invalidate_dcache_range(p,s)		do { } while(0)
+# define __flush_dcache_range(p,s)		do { } while(0)
+# define __flush_dcache_page(p)			do { } while(0)
+# define __flush_invalidate_dcache_page(p) 	__invalidate_dcache_page(p)
+# define __flush_invalidate_dcache_range(p,s)	__invalidate_dcache_range(p,s)
+#endif
+
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
+extern void __flush_invalidate_dcache_page_alias(unsigned long, unsigned long);
+#endif
+#if (ICACHE_WAY_SIZE > PAGE_SIZE)
+extern void __invalidate_icache_page_alias(unsigned long, unsigned long);
 #endif
 
 /*
@@ -71,17 +81,21 @@
  * (see also Documentation/cachetlb.txt)
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
-#define flush_cache_all()		__flush_invalidate_cache_all();
-#define flush_cache_mm(mm)		__flush_invalidate_cache_all();
-#define flush_cache_dup_mm(mm)		__flush_invalidate_cache_all();
+#define flush_cache_all()						\
+	do {								\
+		__flush_invalidate_dcache_all();			\
+		__invalidate_icache_all();				\
+	} while (0)
 
-#define flush_cache_vmap(start,end)	__flush_invalidate_cache_all();
-#define flush_cache_vunmap(start,end)	__flush_invalidate_cache_all();
+#define flush_cache_mm(mm)		flush_cache_all()
+#define flush_cache_dup_mm(mm)		flush_cache_mm(mm)
+
+#define flush_cache_vmap(start,end)	flush_cache_all()
+#define flush_cache_vunmap(start,end)	flush_cache_all()
 
 extern void flush_dcache_page(struct page*);
-
 extern void flush_cache_range(struct vm_area_struct*, ulong, ulong);
 extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long);
 
@@ -101,24 +115,39 @@
 
 #endif
 
+/* Ensure consistency between data and instruction cache. */
 #define flush_icache_range(start,end) 					\
-	__invalidate_icache_range(start,(end)-(start))
+	do {								\
+		__flush_dcache_range(start, (end) - (start));		\
+		__invalidate_icache_range(start,(end) - (start));	\
+	} while (0)
 
 /* This is not required, see Documentation/cachetlb.txt */
-
-#define	flush_icache_page(vma,page)			do { } while(0)
+#define	flush_icache_page(vma,page)			do { } while (0)
 
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
+extern void copy_to_user_page(struct vm_area_struct*, struct page*,
+		unsigned long, void*, const void*, unsigned long);
+extern void copy_from_user_page(struct vm_area_struct*, struct page*,
+		unsigned long, void*, const void*, unsigned long);
+
+#else
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
+	do {								\
+		memcpy(dst, src, len);					\
+		__flush_dcache_range((unsigned long) dst, len);		\
+		__invalidate_icache_range((unsigned long) dst, len);	\
+	} while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 
+#endif
+
 #endif /* __KERNEL__ */
-
 #endif /* _XTENSA_CACHEFLUSH_H */
-
diff --git a/include/asm-xtensa/io.h b/include/asm-xtensa/io.h
index 0faa614..47c3616 100644
--- a/include/asm-xtensa/io.h
+++ b/include/asm-xtensa/io.h
@@ -14,6 +14,7 @@
 #ifdef __KERNEL__
 #include <asm/byteorder.h>
 #include <asm/page.h>
+#include <linux/kernel.h>
 
 #include <linux/types.h>
 
diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 2d6ac21..55ce2c9 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -15,6 +15,7 @@
 
 #include <asm/processor.h>
 #include <asm/types.h>
+#include <asm/cache.h>
 
 /*
  * Fixed TLB translations in the processor.
@@ -39,6 +40,53 @@
 #define MAX_MEM_PFN		XCHAL_KSEG_SIZE
 #define PGTABLE_START		0x80000000
 
+/*
+ * Cache aliasing:
+ *
+ * If the cache size for one way is greater than the page size, we have to
+ * deal with cache aliasing. The cache index is wider than the page size:
+ *
+ * |    |cache| cache index
+ * | pfn  |off|	virtual address
+ * |xxxx:X|zzz|
+ * |    : |   |
+ * | \  / |   |
+ * |trans.|   |
+ * | /  \ |   |
+ * |yyyy:Y|zzz|	physical address
+ *
+ * When the page number is translated to the physical page address, the lowest
+ * bit(s) (X) that are part of the cache index are also translated (Y).
+ * If this translation changes bit(s) (X), the cache index is also afected,
+ * thus resulting in a different cache line than before.
+ * The kernel does not provide a mechanism to ensure that the page color
+ * (represented by this bit) remains the same when allocated or when pages
+ * are remapped. When user pages are mapped into kernel space, the color of
+ * the page might also change.
+ *
+ * We use the address space VMALLOC_END ... VMALLOC_END + DCACHE_WAY_SIZE * 2
+ * to temporarily map a patch so we can match the color.
+ */
+
+#if DCACHE_WAY_SIZE > PAGE_SIZE
+# define DCACHE_ALIAS_ORDER	(DCACHE_WAY_SHIFT - PAGE_SHIFT)
+# define DCACHE_ALIAS_MASK	(PAGE_MASK & (DCACHE_WAY_SIZE - 1))
+# define DCACHE_ALIAS(a)	(((a) & DCACHE_ALIAS_MASK) >> PAGE_SHIFT)
+# define DCACHE_ALIAS_EQ(a,b)	((((a) ^ (b)) & DCACHE_ALIAS_MASK) == 0)
+#else
+# define DCACHE_ALIAS_ORDER	0
+#endif
+
+#if ICACHE_WAY_SIZE > PAGE_SIZE
+# define ICACHE_ALIAS_ORDER	(ICACHE_WAY_SHIFT - PAGE_SHIFT)
+# define ICACHE_ALIAS_MASK	(PAGE_MASK & (ICACHE_WAY_SIZE - 1))
+# define ICACHE_ALIAS(a)	(((a) & ICACHE_ALIAS_MASK) >> PAGE_SHIFT)
+# define ICACHE_ALIAS_EQ(a,b)	((((a) ^ (b)) & ICACHE_ALIAS_MASK) == 0)
+#else
+# define ICACHE_ALIAS_ORDER	0
+#endif
+
+
 #ifdef __ASSEMBLY__
 
 #define __pgprot(x)	(x)
@@ -90,11 +138,11 @@
  * some extra work
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-void clear_user_page(void *addr, unsigned long vaddr, struct page* page);
-void copy_user_page(void *to,void* from,unsigned long vaddr,struct page* page);
+#if DCACHE_WAY_SIZE > PAGE_SIZE
+extern void clear_user_page(void*, unsigned long, struct page*);
+extern void copy_user_page(void*, void*, unsigned long, struct page*);
 #else
-# define clear_user_page(page,vaddr,pg)		clear_page(page)
+# define clear_user_page(page, vaddr, pg)	clear_page(page)
 # define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 #endif
 
diff --git a/include/asm-xtensa/pgalloc.h b/include/asm-xtensa/pgalloc.h
index d56ddf2..3e5b565 100644
--- a/include/asm-xtensa/pgalloc.h
+++ b/include/asm-xtensa/pgalloc.h
@@ -1,11 +1,11 @@
 /*
- * linux/include/asm-xtensa/pgalloc.h
+ * include/asm-xtensa/pgalloc.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Copyright (C) 2001-2005 Tensilica Inc.
+ * Copyright (C) 2001-2007 Tensilica Inc.
  */
 
 #ifndef _XTENSA_PGALLOC_H
@@ -13,103 +13,54 @@
 
 #ifdef __KERNEL__
 
-#include <linux/threads.h>
 #include <linux/highmem.h>
-#include <asm/processor.h>
-#include <asm/cacheflush.h>
-
-
-/* Cache aliasing:
- *
- * If the cache size for one way is greater than the page size, we have to
- * deal with cache aliasing. The cache index is wider than the page size:
- *
- *      |cache |
- * |pgnum |page|	virtual address
- * |xxxxxX|zzzz|
- * |      |    |
- *   \  / |    |
- *  trans.|    |
- *   /  \ |    |
- * |yyyyyY|zzzz|	physical address
- *
- * When the page number is translated to the physical page address, the lowest
- * bit(s) (X) that are also part of the cache index are also translated (Y).
- * If this translation changes this bit (X), the cache index is also afected,
- * thus resulting in a different cache line than before.
- * The kernel does not provide a mechanism to ensure that the page color
- * (represented by this bit) remains the same when allocated or when pages
- * are remapped. When user pages are mapped into kernel space, the color of
- * the page might also change.
- *
- * We use the address space VMALLOC_END ... VMALLOC_END + DCACHE_WAY_SIZE * 2
- * to temporarily map a patch so we can match the color.
- */
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-# define PAGE_COLOR_MASK	(PAGE_MASK & (DCACHE_WAY_SIZE-1))
-# define PAGE_COLOR(a)		\
-	(((unsigned long)(a)&PAGE_COLOR_MASK) >> PAGE_SHIFT)
-# define PAGE_COLOR_EQ(a,b)	\
-	((((unsigned long)(a) ^ (unsigned long)(b)) & PAGE_COLOR_MASK) == 0)
-# define PAGE_COLOR_MAP0(v)	\
-	(VMALLOC_END + ((unsigned long)(v) & PAGE_COLOR_MASK))
-# define PAGE_COLOR_MAP1(v)	\
-	(VMALLOC_END + ((unsigned long)(v) & PAGE_COLOR_MASK) + DCACHE_WAY_SIZE)
-#endif
 
 /*
  * Allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
  */
 
-#define pgd_free(pgd)	free_page((unsigned long)(pgd))
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
-
-static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *pte)
-{
-	pmd_val(*(pmdp)) = (unsigned long)(pte);
-	__asm__ __volatile__ ("memw; dhwb %0, 0; dsync" :: "a" (pmdp));
-}
-
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *page)
-{
-	pmd_val(*(pmdp)) = (unsigned long)page_to_virt(page);
-	__asm__ __volatile__ ("memw; dhwb %0, 0; dsync" :: "a" (pmdp));
-}
-
-
-
-#else
-
-# define pmd_populate_kernel(mm, pmdp, pte)				     \
-	(pmd_val(*(pmdp)) = (unsigned long)(pte))
-# define pmd_populate(mm, pmdp, page)					     \
-	(pmd_val(*(pmdp)) = (unsigned long)page_to_virt(page))
-
-#endif
+#define pmd_populate_kernel(mm, pmdp, ptep)				     \
+	(pmd_val(*(pmdp)) = ((unsigned long)ptep))
+#define pmd_populate(mm, pmdp, page)					     \
+	(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
 
 static inline pgd_t*
 pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd;
-
-	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGD_ORDER);
-
-	if (likely(pgd != NULL))
-		__flush_dcache_page((unsigned long)pgd);
-
-	return pgd;
+	return (pgd_t*) __get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
 }
 
-extern pte_t* pte_alloc_one_kernel(struct mm_struct* mm, unsigned long addr);
-extern struct page* pte_alloc_one(struct mm_struct* mm, unsigned long addr);
+static inline void pgd_free(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
 
-#define pte_free_kernel(pte) free_page((unsigned long)pte)
-#define pte_free(pte) __free_page(pte)
+/* Use a slab cache for the pte pages (see also sparc64 implementation) */
+
+extern struct kmem_cache *pgtable_cache;
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 
+					 unsigned long address)
+{
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm, 
+					 unsigned long addr)
+{
+	return virt_to_page(pte_alloc_one_kernel(mm, addr));
+}
+
+static inline void pte_free_kernel(pte_t *pte)
+{
+	kmem_cache_free(pgtable_cache, pte);
+}
+
+static inline void pte_free(struct page *page)
+{
+	kmem_cache_free(pgtable_cache, page_address(page));
+}
 
 #endif /* __KERNEL__ */
 #endif /* _XTENSA_PGALLOC_H */
diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h
index 667a6c4..c0fcc1c 100644
--- a/include/asm-xtensa/pgtable.h
+++ b/include/asm-xtensa/pgtable.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/asm-xtensa/pgtable.h
+ * include/asm-xtensa/pgtable.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -60,16 +60,20 @@
 #define FIRST_USER_ADDRESS	0
 #define FIRST_USER_PGD_NR	(FIRST_USER_ADDRESS >> PGDIR_SHIFT)
 
-/* virtual memory area. We keep a distance to other memory regions to be
+/*
+ * Virtual memory area. We keep a distance to other memory regions to be
  * on the safe side. We also use this area for cache aliasing.
  */
 
-// FIXME: virtual memory area must be configuration-dependent
-
 #define VMALLOC_START		0xC0000000
-#define VMALLOC_END		0xC7FF0000
+#define VMALLOC_END		0xC6FEFFFF
+#define TLBTEMP_BASE_1		0xC6FF0000
+#define TLBTEMP_BASE_2		0xC6FF8000
+#define MODULE_START		0xC7000000
+#define MODULE_END		0xC7FFFFFF
 
-/* Xtensa Linux config PTE layout (when present):
+/*
+ * Xtensa Linux config PTE layout (when present):
  *	31-12:	PPN
  *	11-6:	Software
  *	5-4:	RING
@@ -126,12 +130,13 @@
 #define PAGE_SHARED	   __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITABLE)
 #define PAGE_SHARED_EXEC \
 	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITABLE | _PAGE_HW_EXEC)
-#define PAGE_KERNEL	   __pgprot(_PAGE_PRESENT)
+#define PAGE_KERNEL	   __pgprot(_PAGE_PRESENT | _PAGE_HW_WRITE)
+#define PAGE_KERNEL_EXEC   __pgprot(_PAGE_PRESENT|_PAGE_HW_WRITE|_PAGE_HW_EXEC)
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
-# define _PAGE_DIRECTORY (_PAGE_VALID | _PAGE_ACCESSED | _PAGE_HW_WRITE)
+# define _PAGE_DIRECTORY (_PAGE_VALID | _PAGE_ACCESSED)
 #else
-# define _PAGE_DIRECTORY (_PAGE_VALID|_PAGE_ACCESSED|_PAGE_HW_WRITE|_PAGE_CA_WB)
+# define _PAGE_DIRECTORY (_PAGE_VALID | _PAGE_ACCESSED | _PAGE_CA_WB)
 #endif
 
 #else /* no mmu */
@@ -244,6 +249,10 @@
 static inline void update_pte(pte_t *ptep, pte_t pteval)
 {
 	*ptep = pteval;
+#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+	__asm__ __volatile__ ("dhwb %0, 0" :: "a" (ptep));
+#endif
+
 }
 
 struct mm_struct;
@@ -383,13 +392,12 @@
  * remap a physical page `pfn' of size `size' with page protection `prot'
  * into virtual address `from'
  */
+
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
                 remap_pfn_range(vma, from, pfn, size, prot)
 
 
-/* No page table caches to init */
-
-#define pgtable_cache_init()	do { } while (0)
+extern void pgtable_cache_init(void);
 
 typedef pte_t *pte_addr_t;
 
diff --git a/include/asm-xtensa/tlb.h b/include/asm-xtensa/tlb.h
index 4562b2d..4830232 100644
--- a/include/asm-xtensa/tlb.h
+++ b/include/asm-xtensa/tlb.h
@@ -11,14 +11,36 @@
 #ifndef _XTENSA_TLB_H
 #define _XTENSA_TLB_H
 
-#define tlb_start_vma(tlb,vma)			do { } while (0)
-#define tlb_end_vma(tlb,vma)			do { } while (0)
-#define __tlb_remove_tlb_entry(tlb,pte,addr)	do { } while (0)
+#include <asm/cache.h>
+#include <asm/page.h>
 
+#if (DCACHE_WAY_SIZE <= PAGE_SIZE)
+
+/* Note, read http://lkml.org/lkml/2004/1/15/6 */
+
+# define tlb_start_vma(tlb,vma)			do { } while (0)
+# define tlb_end_vma(tlb,vma)			do { } while (0)
+
+#else
+
+# define tlb_start_vma(tlb, vma)					      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_cache_range(vma, vma->vm_start, vma->vm_end);   \
+	} while(0)
+
+# define tlb_end_vma(tlb, vma)						      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);     \
+	} while(0)
+
+#endif
+
+#define __tlb_remove_tlb_entry(tlb,pte,addr)	do { } while (0)
 #define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
 
 #include <asm-generic/tlb.h>
-#include <asm/page.h>
 
 #define __pte_free_tlb(tlb,pte)			pte_free(pte)