[PARISC] Further work for multiple page sizes

More work towards supporing multiple page sizes on 64-bit. Convert
some assumptions that 64bit uses 3 level page tables into testing
PT_NLEVELS. Also some BUG() to BUG_ON() conversions and some cleanups
to assembler.

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index e23c4e1..c11a5bc 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -288,8 +288,11 @@
 	DEFINE(ASM_PGD_ENTRY_SIZE, PGD_ENTRY_SIZE);
 	DEFINE(ASM_PMD_ENTRY_SIZE, PMD_ENTRY_SIZE);
 	DEFINE(ASM_PTE_ENTRY_SIZE, PTE_ENTRY_SIZE);
+	DEFINE(ASM_PFN_PTE_SHIFT, PFN_PTE_SHIFT);
 	DEFINE(ASM_PT_INITIAL, PT_INITIAL);
 	DEFINE(ASM_PAGE_SIZE, PAGE_SIZE);
+	DEFINE(ASM_PAGE_SIZE_DIV64, PAGE_SIZE/64);
+	DEFINE(ASM_PAGE_SIZE_DIV128, PAGE_SIZE/128);
 	BLANK();
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 7c95d76..d9e53cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -502,18 +502,20 @@
 	 * all ILP32 processes and all the kernel for machines with
 	 * under 4GB of memory) */
 	.macro		L3_ptep pgd,pte,index,va,fault
+#if PT_NLEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
 	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
 	copy		%r0,\pte
-	extrd,u,*=	\va,31,32,%r0
+	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	ldw,s		\index(\pgd),\pgd
-	extrd,u,*=	\va,31,32,%r0
+	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	bb,>=,n		\pgd,_PxD_PRESENT_BIT,\fault
-	extrd,u,*=	\va,31,32,%r0
+	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	shld		\pgd,PxD_VALUE_SHIFT,\index
-	extrd,u,*=	\va,31,32,%r0
+	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	copy		\index,\pgd
-	extrd,u,*<>	\va,31,32,%r0
+	extrd,u,*<>	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	ldo		ASM_PGD_PMD_OFFSET(\pgd),\pgd
+#endif
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
 
@@ -563,10 +565,18 @@
 	extrd,u,*= 	\pte,_PAGE_GATEWAY_BIT+32,1,%r0
 	depd		%r0,11,2,\prot	/* If Gateway, Set PL2 to 0 */
 
-	/* Get rid of prot bits and convert to page addr for iitlbt and idtlbt */
+	/* Enforce uncacheable pages.
+	 * This should ONLY be use for MMIO on PA 2.0 machines.
+	 * Memory/DMA is cache coherent on all PA2.0 machines we support
+	 * (that means T-class is NOT supported) and the memory controllers
+	 * on most of those machines only handles cache transactions.
+	 */
+	extrd,u,*=	\pte,_PAGE_NO_CACHE_BIT+32,1,%r0
+	depi		1,12,1,\prot
 
-	depd		%r0,63,PAGE_SHIFT,\pte
-	extrd,s		\pte,(63-PAGE_SHIFT)+(63-58),64-PAGE_SHIFT,\pte
+	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
+	extrd,u		\pte,(63-ASM_PFN_PTE_SHIFT)+(63-58),64-PAGE_SHIFT,\pte
+	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,63-58,\pte
 	.endm
 
 	/* Identical macro to make_insert_tlb above, except it
@@ -584,9 +594,8 @@
 
 	/* Get rid of prot bits and convert to page addr for iitlba */
 
-	depi		0,31,PAGE_SHIFT,\pte
+	depi		_PAGE_SIZE_ENCODING_DEFAULT,31,ASM_PFN_PTE_SHIFT,\pte
 	extru		\pte,24,25,\pte
-
 	.endm
 
 	/* This is for ILP32 PA2.0 only.  The TLB insertion needs
@@ -1201,10 +1210,9 @@
 	 */
 
 	/* adjust isr/ior. */
-
-	extrd,u         %r16,63,7,%r1    /* get high bits from isr for ior */
-	depd            %r1,31,7,%r17    /* deposit them into ior */
-	depdi           0,63,7,%r16      /* clear them from isr */
+	extrd,u         %r16,63,SPACEID_SHIFT,%r1	/* get high bits from isr for ior */
+	depd            %r1,31,SPACEID_SHIFT,%r17	/* deposit them into ior */
+	depdi           0,63,SPACEID_SHIFT,%r16		/* clear them from isr */
 #endif
 	STREG           %r16, PT_ISR(%r29)
 	STREG           %r17, PT_IOR(%r29)
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index 0b47afc..3e79e62 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -76,16 +76,16 @@
 	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
 	mtctl		%r4,%cr25	/* Initialize user root pointer */
 
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
 	/* Set pmd in pgd */
 	load32		PA(pmd0),%r5
 	shrd            %r5,PxD_VALUE_SHIFT,%r3	
-        ldo             (PxD_FLAG_PRESENT+PxD_FLAG_VALID)(%r3),%r3	
+	ldo		(PxD_FLAG_PRESENT+PxD_FLAG_VALID)(%r3),%r3
 	stw		%r3,ASM_PGD_ENTRY*ASM_PGD_ENTRY_SIZE(%r4)
 	ldo		ASM_PMD_ENTRY*ASM_PMD_ENTRY_SIZE(%r5),%r4
 #else
 	/* 2-level page table, so pmd == pgd */
-        ldo             ASM_PGD_ENTRY*ASM_PGD_ENTRY_SIZE(%r4),%r4
+	ldo		ASM_PGD_ENTRY*ASM_PGD_ENTRY_SIZE(%r4),%r4
 #endif
 
 	/* Fill in pmd with enough pte directories */
@@ -99,7 +99,7 @@
 	stw		%r3,0(%r4)
 	ldo		(ASM_PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3
 	addib,>		-1,%r1,1b
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
 	ldo             ASM_PMD_ENTRY_SIZE(%r4),%r4
 #else
 	ldo             ASM_PGD_ENTRY_SIZE(%r4),%r4
@@ -107,13 +107,14 @@
 
 
 	/* Now initialize the PTEs themselves */
-	ldo		_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */
+	ldo		0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */
+	ldi		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
 	load32		PA(pg0),%r1
 
 $pgt_fill_loop:
 	STREGM          %r3,ASM_PTE_ENTRY_SIZE(%r1)
-	ldo		ASM_PAGE_SIZE(%r3),%r3
-	bb,>=		%r3,31-KERNEL_INITIAL_ORDER,$pgt_fill_loop
+	ldo		(1<<PFN_PTE_SHIFT)(%r3),%r3 /* add one PFN */
+	addib,>		-1,%r11,$pgt_fill_loop
 	nop
 
 	/* Load the return address...er...crash 'n burn */
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index 7e898fd..8384bf9 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -53,17 +53,17 @@
 	__attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
-#ifdef __LP64__
+#if PT_NLEVELS == 3
 /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout
  * with the first pmd adjacent to the pgd and below it. gcc doesn't actually
  * guarantee that global objects will be laid out in memory in the same order 
  * as the order of declaration, so put these in different sections and use
  * the linker script to order them. */
-pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((aligned(PAGE_SIZE))) __attribute__ ((__section__ (".data.vm0.pmd"))) = { {0}, };
-
+pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data.vm0.pmd"), aligned(PAGE_SIZE)));
 #endif
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((aligned(PAGE_SIZE))) __attribute__ ((__section__ (".data.vm0.pgd"))) = { {0}, };
-pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((aligned(PAGE_SIZE))) __attribute__ ((__section__ (".data.vm0.pte")))  = { {0}, };
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data.vm0.pgd"), aligned(PAGE_SIZE)));
+pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data.vm0.pte"), aligned(PAGE_SIZE)));
 
 /*
  * Initial task structure.
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 7a4f07e..f600556 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -65,7 +65,7 @@
 	 */
 
 	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
-	rsm	PSW_SM_I, %r19		/* save I-bit state */
+	rsm		PSW_SM_I, %r19		/* save I-bit state */
 	load32		PA(1f), %r1
 	nop
 	nop
@@ -84,8 +84,7 @@
 	rfi
 	nop
 
-1:      ldil		L%PA(cache_info), %r1
-	ldo		R%PA(cache_info)(%r1), %r1
+1:      load32		PA(cache_info), %r1
 
 	/* Flush Instruction Tlb */
 
@@ -212,8 +211,7 @@
 	.entry
 
 	mtsp		%r0, %sr1
-	ldil		L%cache_info, %r1
-	ldo		R%cache_info(%r1), %r1
+	load32		cache_info, %r1
 
 	/* Flush Instruction Cache */
 
@@ -254,8 +252,7 @@
 	.entry
 
 	mtsp		%r0, %sr1
-	ldil		L%cache_info, %r1
-	ldo		R%cache_info(%r1), %r1
+	load32 		cache_info, %r1
 
 	/* Flush Data Cache */
 
@@ -303,7 +300,8 @@
 	 */
 
 	ldd		0(%r25), %r19
-	ldi		32, %r1                 /* PAGE_SIZE/128 == 32 */
+	ldi		ASM_PAGE_SIZE_DIV128, %r1
+
 	ldw		64(%r25), %r0		/* prefetch 1 cacheline ahead */
 	ldw		128(%r25), %r0		/* prefetch 2 */
 
@@ -368,7 +366,7 @@
 	 * use ldd/std on a 32 bit kernel.
 	 */
 	ldw		0(%r25), %r19
-	ldi		64, %r1		/* PAGE_SIZE/64 == 64 */
+	ldi		ASM_PAGE_SIZE_DIV64, %r1
 
 1:
 	ldw		4(%r25), %r20
@@ -461,6 +459,7 @@
 	sub		%r25, %r1, %r23		/* move physical addr into non shadowed reg */
 
 	ldil		L%(TMPALIAS_MAP_START), %r28
+	/* FIXME for different page sizes != 4k */
 #ifdef CONFIG_64BIT
 	extrd,u		%r26,56,32, %r26		/* convert phys addr to tlb insert format */
 	extrd,u		%r23,56,32, %r23		/* convert phys addr to tlb insert format */
@@ -551,6 +550,7 @@
 #ifdef CONFIG_64BIT
 #if (TMPALIAS_MAP_START >= 0x80000000)
 	depdi		0, 31,32, %r28		/* clear any sign extension */
+	/* FIXME: page size dependend */
 #endif
 	extrd,u		%r26, 56,32, %r26	/* convert phys addr to tlb insert format */
 	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
@@ -566,10 +566,10 @@
 	pdtlb		0(%r28)
 
 #ifdef CONFIG_64BIT
-	ldi		32, %r1			/* PAGE_SIZE/128 == 32 */
+	ldi		ASM_PAGE_SIZE_DIV128, %r1
 
 	/* PREFETCH (Write) has not (yet) been proven to help here */
-/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
+	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
 
 1:	std		%r0, 0(%r28)
 	std		%r0, 8(%r28)
@@ -591,8 +591,7 @@
 	ldo		128(%r28), %r28
 
 #else	/* ! CONFIG_64BIT */
-
-	ldi		64, %r1			/* PAGE_SIZE/64 == 64 */
+	ldi		ASM_PAGE_SIZE_DIV64, %r1
 
 1:
 	stw		%r0, 0(%r28)
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index af88afe..479d9a0 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -55,7 +55,7 @@
 	 * pointers.
 	 */
 
-	.align 4096
+	.align ASM_PAGE_SIZE
 linux_gateway_page:
 
         /* ADDRESS 0x00 to 0xb0 = 176 bytes / 4 bytes per insn = 44 insns */
@@ -632,7 +632,7 @@
 end_compare_and_swap:
 
 	/* Make sure nothing else is placed on this page */
-	.align 4096
+	.align ASM_PAGE_SIZE
 	.export end_linux_gateway_page
 end_linux_gateway_page:
 
@@ -652,7 +652,7 @@
 
 	.section .rodata,"a"
 
-	.align 4096
+	.align ASM_PAGE_SIZE
 	/* Light-weight-syscall table */
 	/* Start of lws table. */
 	.export lws_table
@@ -662,14 +662,14 @@
 	LWS_ENTRY(compare_and_swap64)	/* 1 - ELF64 Atomic compare and swap */
 	/* End of lws table */
 
-	.align 4096
+	.align ASM_PAGE_SIZE
 	.export sys_call_table
 .Lsys_call_table:
 sys_call_table:
 #include "syscall_table.S"
 
 #ifdef CONFIG_64BIT
-	.align 4096
+	.align ASM_PAGE_SIZE
 	.export sys_call_table64
 .Lsys_call_table64:
 sys_call_table64:
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 6d6436a..94dcc03 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -6,6 +6,7 @@
  *    Copyright (C) 2000 Michael Ang <mang with subcarrier.org>
  *    Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
  *    Copyright (C) 2003 James Bottomley <jejb with parisc-linux.org>
+ *    Copyright (C) 2006 Helge Deller <deller@gmx.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -27,6 +28,7 @@
 /* needed for the processor specific cache alignment size */	
 #include <asm/cache.h>
 #include <asm/page.h>
+#include <asm/asm-offsets.h>
 	
 /* ld script to make hppa Linux kernel */
 #ifndef CONFIG_64BIT
@@ -68,7 +70,7 @@
   RODATA
 
   /* writeable */
-  . = ALIGN(4096);		/* Make sure this is page aligned so
+  . = ALIGN(ASM_PAGE_SIZE);	/* Make sure this is page aligned so
   				   that we can properly leave these
 				   as writable */
   data_start = .;
@@ -81,23 +83,17 @@
   __start___unwind = .;         /* unwind info */
   .PARISC.unwind : { *(.PARISC.unwind) }
   __stop___unwind = .;
- 
+
+  /* rarely changed data like cpu maps */
+  . = ALIGN(16);
+  .data.read_mostly : { *(.data.read_mostly) }
+
+  . = ALIGN(L1_CACHE_BYTES);
   .data : {			/* Data */
 	*(.data)
-	*(.data.vm0.pmd)
-	*(.data.vm0.pgd)
-	*(.data.vm0.pte)
 	CONSTRUCTORS
 	}
 
-  . = ALIGN(4096);
-  /* nosave data is really only used for software suspend...it's here
-   * just in case we ever implement it */
-  __nosave_begin = .;
-  .data_nosave : { *(.data.nosave) }
-  . = ALIGN(4096);
-  __nosave_end = .;
-
   . = ALIGN(L1_CACHE_BYTES);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
@@ -105,12 +101,29 @@
   . = ALIGN(16);
   .data.lock_aligned : { *(.data.lock_aligned) }
 
-  /* rarely changed data like cpu maps */
-  . = ALIGN(16);
-  .data.read_mostly : { *(.data.read_mostly) }
+  . = ALIGN(ASM_PAGE_SIZE);
+  /* nosave data is really only used for software suspend...it's here
+   * just in case we ever implement it */
+  __nosave_begin = .;
+  .data_nosave : { *(.data.nosave) }
+  . = ALIGN(ASM_PAGE_SIZE);
+  __nosave_end = .;
 
   _edata = .;			/* End of data section */
 
+  __bss_start = .;		/* BSS */
+  /* page table entries need to be PAGE_SIZE aligned */
+  . = ALIGN(ASM_PAGE_SIZE);
+  .data.vmpages : {
+	*(.data.vm0.pmd)
+	*(.data.vm0.pgd)
+	*(.data.vm0.pte)
+	}
+  .bss : { *(.bss) *(COMMON) }
+  __bss_stop = .;
+
+
+  /* assembler code expects init_task to be 16k aligned */
   . = ALIGN(16384); 		/* init_task */
   .data.init_task : { *(.data.init_task) }
 
@@ -126,6 +139,7 @@
   .dlt : { *(.dlt) }
 #endif
 
+  /* reserve space for interrupt stack by aligning __init* to 16k */
   . = ALIGN(16384);
   __init_begin = .;
   .init.text : { 
@@ -166,7 +180,7 @@
      from .altinstructions and .eh_frame */
   .exit.text : { *(.exit.text) }
   .exit.data : { *(.exit.data) }
-  . = ALIGN(4096);
+  . = ALIGN(ASM_PAGE_SIZE);
   __initramfs_start = .;
   .init.ramfs : { *(.init.ramfs) }
   __initramfs_end = .;
@@ -174,14 +188,10 @@
   __per_cpu_start = .;
   .data.percpu  : { *(.data.percpu) }
   __per_cpu_end = .;
-  . = ALIGN(4096);
+  . = ALIGN(ASM_PAGE_SIZE);
   __init_end = .;
   /* freed after init ends here */
 	
-  __bss_start = .;		/* BSS */
-  .bss : { *(.bss) *(COMMON) }
-  __bss_stop = .; 
-
   _end = . ;
 
   /* Sections to be discarded */