Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S
new file mode 100644
index 0000000..b73a3c8
--- /dev/null
+++ b/arch/sparc64/kernel/dtlb_backend.S
@@ -0,0 +1,181 @@
+/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $
+ * dtlb_backend.S: Back end to DTLB miss replacement strategy.
+ *                 This is included directly into the trap table.
+ *
+ * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
+ */
+
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+
+#if PAGE_SHIFT == 13
+#define SZ_BITS		_PAGE_SZ8K
+#elif PAGE_SHIFT == 16
+#define SZ_BITS		_PAGE_SZ64K
+#elif PAGE_SHIFT == 19
+#define SZ_BITS		_PAGE_SZ512K
+#elif PAGE_SHIFT == 22
+#define SZ_BITS		_PAGE_SZ4M
+#endif
+
+#define VALID_SZ_BITS	(_PAGE_VALID | SZ_BITS)
+
+#define VPTE_BITS		(_PAGE_CP | _PAGE_CV | _PAGE_P )
+#define VPTE_SHIFT		(PAGE_SHIFT - 3)
+
+/* Ways we can get here:
+ *
+ * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1.
+ * 2) Nucleus loads and stores to/from user/kernel window save areas.
+ * 3) VPTE misses from dtlb_base and itlb_base.
+ *
+ * We need to extract out the PMD and PGDIR indexes from the
+ * linear virtual page table access address.  The PTE index
+ * is at the bottom, but we are not concerned with it.  Bits
+ * 0 to 2 are clear since each PTE is 8 bytes in size.  Each
+ * PMD and PGDIR entry are 4 bytes in size.   Thus, this
+ * address looks something like:
+ *
+ * |---------------------------------------------------------------|
+ * |  ...   |    PGDIR index    |    PMD index    | PTE index  |   |
+ * |---------------------------------------------------------------|
+ *   63   F   E               D   C             B   A         3 2 0  <- bit nr
+ *
+ *  The variable bits above are defined as:
+ *  A --> 3 + (PAGE_SHIFT - log2(8))
+ *    --> 3 + (PAGE_SHIFT - 3) - 1
+ *        (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1)
+ *  B --> A + 1
+ *  C --> B + (PAGE_SHIFT - log2(4))
+ *    -->  B + (PAGE_SHIFT - 2) - 1
+ *        (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1)
+ *  D --> C + 1
+ *  E --> D + (PAGE_SHIFT - log2(4))
+ *    --> D + (PAGE_SHIFT - 2) - 1
+ *        (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1)
+ *  F --> E + 1
+ *
+ * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants
+ *  cancel out.)
+ *
+ * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are:
+ * A --> 12
+ * B --> 13
+ * C --> 23
+ * D --> 24
+ * E --> 34
+ * F --> 35
+ *
+ * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are:
+ * A --> 15
+ * B --> 16
+ * C --> 29
+ * D --> 30
+ * E --> 43
+ * F --> 44
+ *
+ * Because bits both above and below each PGDIR and PMD index need to
+ * be masked out, and the index can be as long as 14 bits (when using a
+ * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions
+ * to extract each index out.
+ *
+ * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so
+ * we try to avoid using them for the entire operation.  We could setup
+ * a mask anywhere from bit 31 down to bit 10 using the sethi instruction.
+ *
+ * We need a mask covering bits B --> C and one covering D --> E.
+ * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000.
+ * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000.
+ * The second in each set cannot be loaded with a single sethi
+ * instruction, because the upper bits are past bit 32.  We would
+ * need to use a sethi + a shift.
+ *
+ * For the time being, we use 2 shifts and a simple "and" mask.
+ * We shift left to clear the bits above the index, we shift down
+ * to clear the bits below the index (sans the log2(4 or 8) bits)
+ * and a mask to clear the log2(4 or 8) bits.  We need therefore
+ * define 4 shift counts, all of which are relative to PAGE_SHIFT.
+ *
+ * Although unsupportable for other reasons, this does mean that
+ * 512K and 4MB page sizes would be generaally supported by the
+ * kernel.  (ELF binaries would break with > 64K PAGE_SIZE since
+ * the sections are only aligned that strongly).
+ *
+ * The operations performed for extraction are thus:
+ *
+ *      ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3
+ *
+ */
+
+#define A (3 + (PAGE_SHIFT - 3) - 1)
+#define B (A + 1)
+#define C (B + (PAGE_SHIFT - 2) - 1)
+#define D (C + 1)
+#define E (D + (PAGE_SHIFT - 2) - 1)
+#define F (E + 1)
+
+#define PMD_SHIFT_LEFT		(64 - D)
+#define PMD_SHIFT_RIGHT		(64 - (D - B) - 2)
+#define PGDIR_SHIFT_LEFT 	(64 - F)
+#define PGDIR_SHIFT_RIGHT	(64 - (F - D) - 2)
+#define LOW_MASK_BITS		0x3
+
+/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss	*/
+	ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS
+	add		%g3, %g3, %g5			! Compute VPTE base
+	cmp		%g4, %g5			! VPTE miss?
+	bgeu,pt		%xcc, 1f			! Continue here
+	 andcc		%g4, TAG_CONTEXT_BITS, %g5	! tl0 miss Nucleus test
+	ba,a,pt		%xcc, from_tl1_trap		! Fall to tl0 miss
+1:	sllx		%g6, VPTE_SHIFT, %g4		! Position TAG_ACCESS
+	or		%g4, %g5, %g4			! Prepare TAG_ACCESS
+
+/* TLB1 ** ICACHE line 2: Quick VPTE miss	  	*/
+	mov		TSB_REG, %g1			! Grab TSB reg
+	ldxa		[%g1] ASI_DMMU, %g5		! Doing PGD caching?
+	sllx		%g6, PMD_SHIFT_LEFT, %g1	! Position PMD offset
+	be,pn		%xcc, sparc64_vpte_nucleus	! Is it from Nucleus?
+	 srlx		%g1, PMD_SHIFT_RIGHT, %g1	! Mask PMD offset bits
+	brnz,pt		%g5, sparc64_vpte_continue	! Yep, go like smoke
+	 andn		%g1, LOW_MASK_BITS, %g1		! Final PMD mask
+	sllx		%g6, PGDIR_SHIFT_LEFT, %g5	! Position PGD offset
+
+/* TLB1 ** ICACHE line 3: Quick VPTE miss	  	*/
+	srlx		%g5, PGDIR_SHIFT_RIGHT, %g5	! Mask PGD offset bits
+	andn		%g5, LOW_MASK_BITS, %g5		! Final PGD mask
+	lduwa		[%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD
+	brz,pn		%g5, vpte_noent			! Valid?
+sparc64_kpte_continue:
+	 sllx		%g5, 11, %g5			! Shift into place
+sparc64_vpte_continue:
+	lduwa		[%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD
+	sllx		%g5, 11, %g5			! Shift into place
+	brz,pn		%g5, vpte_noent			! Valid?
+
+/* TLB1 ** ICACHE line 4: Quick VPTE miss	  	*/
+	 mov		(VALID_SZ_BITS >> 61), %g1	! upper vpte into %g1
+	sllx		%g1, 61, %g1			! finish calc
+	or		%g5, VPTE_BITS, %g5		! Prepare VPTE data
+	or		%g5, %g1, %g5			! ...
+	mov		TLB_SFSR, %g1			! Restore %g1 value
+	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Load VPTE into TLB
+	stxa		%g4, [%g1 + %g1] ASI_DMMU	! Restore previous TAG_ACCESS
+	retry						! Load PTE once again
+
+#undef SZ_BITS
+#undef VALID_SZ_BITS
+#undef VPTE_SHIFT
+#undef VPTE_BITS
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef PMD_SHIFT_LEFT
+#undef PMD_SHIFT_RIGHT
+#undef PGDIR_SHIFT_LEFT
+#undef PGDIR_SHIFT_RIGHT
+#undef LOW_MASK_BITS
+