[SPARC64]: Access TSB with physical addresses when possible.

This way we don't need to lock the TSB into the TLB.
The trick is that every TSB load/store is registered into
a special instruction patch section.  The default uses
virtual addresses, and the patch instructions use physical
address load/stores.

We can't do this on all chips because only cheetah+ and later
have the physical variant of the atomic quad load.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S
index d0f1565..2ef6f6e 100644
--- a/arch/sparc64/kernel/dtlb_miss.S
+++ b/arch/sparc64/kernel/dtlb_miss.S
@@ -4,7 +4,7 @@
 	srlx	%g6, 48, %g5			! Get context
 	brz,pn	%g5, kvmap_dtlb			! Context 0 processing
 	 nop					! Delay slot (fill me)
-	ldda	[%g1] ASI_NUCLEUS_QUAD_LDD, %g4	! Load TSB entry
+	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
 	nop					! Push branch to next I$ line
 	cmp	%g4, %g6			! Compare TAG
 
diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S
index 6b6c8fe..97facce 100644
--- a/arch/sparc64/kernel/itlb_miss.S
+++ b/arch/sparc64/kernel/itlb_miss.S
@@ -4,7 +4,7 @@
 	srlx	%g6, 48, %g5			! Get context
 	brz,pn	%g5, kvmap_itlb			! Context 0 processing
 	 nop					! Delay slot (fill me)
-	ldda	[%g1] ASI_NUCLEUS_QUAD_LDD, %g4	! Load TSB entry
+	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
 	cmp	%g4, %g6			! Compare TAG
 	sethi	%hi(_PAGE_EXEC), %g4		! Setup exec check
 
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index 2b5e71b..9b415ab 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -44,14 +44,14 @@
 kvmap_itlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
-	TSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g4)
 
 	/* Load and check PTE.  */
 	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
 	brgez,a,pn	%g5, kvmap_itlb_longpath
-	 stx		%g0, [%g1]
+	 KTSB_STORE(%g1, %g0)
 
-	TSB_WRITE(%g1, %g5, %g6)
+	KTSB_WRITE(%g1, %g5, %g6)
 
 	/* fallthrough to TLB load */
 
@@ -69,9 +69,9 @@
 kvmap_itlb_obp:
 	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
 
-	TSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g4)
 
-	TSB_WRITE(%g1, %g5, %g6)
+	KTSB_WRITE(%g1, %g5, %g6)
 
 	ba,pt		%xcc, kvmap_itlb_load
 	 nop
@@ -79,9 +79,9 @@
 kvmap_dtlb_obp:
 	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
 
-	TSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g4)
 
-	TSB_WRITE(%g1, %g5, %g6)
+	KTSB_WRITE(%g1, %g5, %g6)
 
 	ba,pt		%xcc, kvmap_dtlb_load
 	 nop
@@ -114,14 +114,14 @@
 kvmap_dtlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
-	TSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g4)
 
 	/* Load and check PTE.  */
 	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
 	brgez,a,pn	%g5, kvmap_dtlb_longpath
-	 stx		%g0, [%g1]
+	 KTSB_STORE(%g1, %g0)
 
-	TSB_WRITE(%g1, %g5, %g6)
+	KTSB_WRITE(%g1, %g5, %g6)
 
 	/* fallthrough to TLB load */
 
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index e1dd37f..ff6a79b 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -53,7 +53,7 @@
 	/* Load and check PTE.  */
 	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
 	brgez,a,pn	%g5, tsb_do_fault
-	 stx		%g0, [%g1]
+	 TSB_STORE(%g1, %g0)
 
 	/* If it is larger than the base page size, don't
 	 * bother putting it into the TSB.
@@ -64,7 +64,7 @@
 	and		%g2, %g4, %g2
 	cmp		%g2, %g7
 	bne,a,pn	%xcc, tsb_tlb_reload
-	 stx		%g0, [%g1]
+	 TSB_STORE(%g1, %g0)
 
 	TSB_WRITE(%g1, %g5, %g6)
 
@@ -131,13 +131,13 @@
 
 	/* Insert an entry into the TSB.
 	 *
-	 * %o0: TSB entry pointer
+	 * %o0: TSB entry pointer (virt or phys address)
 	 * %o1: tag
 	 * %o2:	pte
 	 */
 	.align	32
-	.globl	tsb_insert
-tsb_insert:
+	.globl	__tsb_insert
+__tsb_insert:
 	rdpr	%pstate, %o5
 	wrpr	%o5, PSTATE_IE, %pstate
 	TSB_LOCK_TAG(%o0, %g2, %g3)
@@ -146,6 +146,31 @@
 	retl
 	 nop
 
+	/* Flush the given TSB entry if it has the matching
+	 * tag.
+	 *
+	 * %o0: TSB entry pointer (virt or phys address)
+	 * %o1:	tag
+	 */
+	.align	32
+	.globl	tsb_flush
+tsb_flush:
+	sethi	%hi(TSB_TAG_LOCK_HIGH), %g2
+1:	TSB_LOAD_TAG(%o0, %g1)
+	srlx	%g1, 32, %o3
+	andcc	%o3, %g2, %g0
+	bne,pn	%icc, 1b
+	 membar	#LoadLoad
+	cmp	%g1, %o1
+	bne,pt	%xcc, 2f
+	 clr	%o3
+	TSB_CAS_TAG(%o0, %g1, %o3)
+	cmp	%g1, %o3
+	bne,pn	%xcc, 1b
+	 nop
+2:	retl
+	 TSB_MEMBAR
+
 	/* Reload MMU related context switch state at
 	 * schedule() time.
 	 *
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index 467d13a..71b943f 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -70,6 +70,10 @@
   .con_initcall.init : { *(.con_initcall.init) }
   __con_initcall_end = .;
   SECURITY_INIT
+  . = ALIGN(4);
+  __tsb_phys_patch = .;
+  .tsb_phys_patch : { *(.tsb_phys_patch) }
+  __tsb_phys_patch_end = .;
   . = ALIGN(8192); 
   __initramfs_start = .;
   .init.ramfs : { *(.init.ramfs) }