powerpc: Use 64k pages without needing cache-inhibited large pages

Some POWER5+ machines can do 64k hardware pages for normal memory but
not for cache-inhibited pages.  This patch lets us use 64k hardware
pages for most user processes on such machines (assuming the kernel
has been configured with CONFIG_PPC_64K_PAGES=y).  User processes
start out using 64k pages and get switched to 4k pages if they use any
non-cacheable mappings.

With this, we use 64k pages for the vmalloc region and 4k pages for
the imalloc region.  If anything creates a non-cacheable mapping in
the vmalloc region, the vmalloc region will get switched to 4k pages.
I don't know of any driver other than the DRM that would do this,
though, and these machines don't have AGP.

When a region gets switched from 64k pages to 4k pages, we do not have
to clear out all the 64k HPTEs from the hash table immediately.  We
use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page
was hashed in as a 64k page or a set of 4k pages.  If hash_page is
trying to insert a 4k page for a Linux PTE and it sees that it has
already been inserted as a 64k page, it first invalidates the 64k HPTE
before inserting the 4k HPTE.  The hash invalidation routines also use
the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a
set of 4k HPTEs to remove.  With those two changes, we can tolerate a
mix of 4k and 64k HPTEs in the hash table, and they will all get
removed when the address space is torn down.

Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index abfaabf..8548dcf 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -59,10 +59,19 @@
 	li	r11,0
 	b	slb_finish_load
 
-1:	/* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+1:	/* vmalloc/ioremap mapping encoding bits, the "li" instructions below
 	 * will be patched by the kernel at boot
 	 */
-_GLOBAL(slb_miss_kernel_load_virtual)
+BEGIN_FTR_SECTION
+	/* check whether this is in vmalloc or ioremap space */
+	clrldi	r11,r10,48
+	cmpldi	r11,(VMALLOC_SIZE >> 28) - 1
+	bgt	5f
+	lhz	r11,PACAVMALLOCSLLP(r13)
+	b	slb_finish_load
+5:
+END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+_GLOBAL(slb_miss_kernel_load_io)
 	li	r11,0
 	b	slb_finish_load
 
@@ -96,9 +105,7 @@
 1:
 #endif /* CONFIG_HUGETLB_PAGE */
 
-_GLOBAL(slb_miss_user_load_normal)
-	li	r11,0
-
+	lhz	r11,PACACONTEXTSLLP(r13)
 2:
 	ld	r9,PACACONTEXTID(r13)
 	rldimi	r10,r9,USER_ESID_BITS,0