[SPARC64]: Fix some SUN4V TLB miss bugs.

Code patching did not sign extend negative branch
offsets correctly.

Kernel TLB miss path needs patching and %g4 register
preservation in order to handle SUN4V correctly.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index f6bb2e0..2d333ab 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -48,7 +48,7 @@
 kvmap_itlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
-	KTSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g7)
 
 	/* Load and check PTE.  */
 	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
@@ -60,8 +60,29 @@
 	/* fallthrough to TLB load */
 
 kvmap_itlb_load:
-	stxa		%g5, [%g0] ASI_ITLB_DATA_IN	! Reload TLB
+
+661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
 	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_itlb_load
+	 mov		%g5, %g3
 
 kvmap_itlb_longpath:
 
@@ -80,7 +101,7 @@
 kvmap_itlb_obp:
 	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
 
-	KTSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g7)
 
 	KTSB_WRITE(%g1, %g5, %g6)
 
@@ -90,7 +111,7 @@
 kvmap_dtlb_obp:
 	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
 
-	KTSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g7)
 
 	KTSB_WRITE(%g1, %g5, %g6)
 
@@ -129,7 +150,7 @@
 kvmap_dtlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
-	KTSB_LOCK_TAG(%g1, %g2, %g4)
+	KTSB_LOCK_TAG(%g1, %g2, %g7)
 
 	/* Load and check PTE.  */
 	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
@@ -141,8 +162,29 @@
 	/* fallthrough to TLB load */
 
 kvmap_dtlb_load:
-	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
+
+661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
 	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_dtlb_load
+	 mov		%g5, %g3
 
 kvmap_dtlb_nonlinear:
 	/* Catch kernel NULL pointer derefs.  */
@@ -185,10 +227,17 @@
 	nop
 	.previous
 
-	rdpr	%tl, %g4
-	cmp	%g4, 1
-	mov	TLB_TAG_ACCESS, %g4
+	rdpr	%tl, %g3
+	cmp	%g3, 1
+
+661:	mov	TLB_TAG_ACCESS, %g4
 	ldxa	[%g4] ASI_DMMU, %g5
+	.section .sun4v_2insn_patch, "ax"
+	.word	661b
+	mov	%g4, %g5
+	nop
+	.previous
+
 	be,pt	%xcc, sparc64_realfault_common
 	 mov	FAULT_CODE_DTLB, %g4
 	ba,pt	%xcc, winfix_trampoline
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
index f712913..597359c 100644
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -96,7 +96,7 @@
 	
 	/* Load UTSB reg into %g1.  */
 	mov	SCRATCHPAD_UTSBREG1, %g1
-	ldxa	[%g1 + %g1] ASI_SCRATCHPAD, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
 
 	LOAD_DTLB_INFO(%g2, %g4, %g5)
 	COMPUTE_TAG_TARGET(%g6, %g4, %g5, %g3, kvmap_dtlb_4v)
@@ -149,14 +149,19 @@
 	 * SCRATCHPAD_MMU_MISS contents in %g2.
 	 */
 sun4v_itsb_miss:
-	ba,pt	%xcc, sun4v_tsb_miss_common
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+	brz,pn	%g5, kvmap_itlb_4v
 	 mov	FAULT_CODE_ITLB, %g3
 
 	/* Called from trap table with TAG TARGET placed into
 	 * %g6 and SCRATCHPAD_UTSBREG1 contents in %g1.
 	 */
 sun4v_dtsb_miss:
-	mov	FAULT_CODE_DTLB, %g3
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+	brz,pn	%g5, kvmap_dtlb_4v
+	 mov	FAULT_CODE_DTLB, %g3
 
 	/* Create TSB pointer into %g1.  This is something like:
 	 *
@@ -312,7 +317,8 @@
 	or	%g2, %lo(OLD), %g2; \
 	sub	%g1, %g2, %g1; \
 	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	srl	%g1, 2, %g1; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
 	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
 	or	%g3, %g1, %g3; \
 	stw	%g3, [%g2]; \