[SPARC64]: Do not assume sun4v chips have load-twin/store-init support.

Check the cpu type in the OBP device tree before committing to
using the optimized Niagara memcpy and memset implementation.

If we don't recognize the cpu type, use a completely generic
version.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 9dbd833..ac18bd8 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -97,7 +97,8 @@
 	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
 	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
 	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
-	.globl	is_sun4v
+	.globl	prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+	.globl	is_sun4v, sun4v_chip_type
 prom_peer_name:
 	.asciz	"peer"
 prom_compatible_name:
@@ -106,6 +107,8 @@
 	.asciz	"finddevice"
 prom_chosen_path:
 	.asciz	"/chosen"
+prom_cpu_path:
+	.asciz	"/cpu"
 prom_getprop_name:
 	.asciz	"getprop"
 prom_mmu_name:
@@ -120,9 +123,13 @@
 	.asciz	"unmap"
 prom_sun4v_name:
 	.asciz	"sun4v"
+prom_niagara_prefix:
+	.asciz	"SUNW,UltraSPARC-T"
 	.align	4
 prom_root_compatible:
 	.skip	64
+prom_cpu_compatible:
+	.skip	64
 prom_root_node:
 	.word	0
 prom_mmu_ihandle_cache:
@@ -138,6 +145,8 @@
 	.xword	0
 is_sun4v:
 	.word	0
+sun4v_chip_type:
+	.word	SUN4V_CHIP_INVALID
 1:
 	rd	%pc, %l0
 
@@ -296,13 +305,13 @@
 	sethi	%hi(prom_sun4v_name), %g7
 	or	%g7, %lo(prom_sun4v_name), %g7
 	mov	5, %g3
-1:	ldub	[%g7], %g2
+90:	ldub	[%g7], %g2
 	ldub	[%g1], %g4
 	cmp	%g2, %g4
-	bne,pn	%icc, 2f
+	bne,pn	%icc, 80f
 	 add	%g7, 1, %g7
 	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 1b
+	bne,pt	%xcc, 90b
 	 add	%g1, 1, %g1
 
 	sethi	%hi(is_sun4v), %g1
@@ -310,7 +319,80 @@
 	mov	1, %g7
 	stw	%g7, [%g1]
 
-2:
+	/* cpu_node = prom_finddevice("/cpu") */
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_cpu_path), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%sp, (192 + 128), %sp
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/cpu"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! cpu device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_compatible_name), %l2
+	mov	(1b - prom_cpu_compatible), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_getproperty(cpu_node, "compatible",
+	 *                  &prom_cpu_compatible, 64)
+	 */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, cpu_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_cpu_compatible
+	mov	64, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_niagara_prefix), %g7
+	or	%g7, %lo(prom_niagara_prefix), %g7
+	mov	17, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 4f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	ldub	[%g1 + 17], %g2
+	cmp	%g2, '1'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA1, %g4
+	cmp	%g2, '2'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA2, %g4
+4:
+	mov	SUN4V_CHIP_UNKNOWN, %g4
+5:	sethi	%hi(sun4v_chip_type), %g2
+	or	%g2, %lo(sun4v_chip_type), %g2
+	stw	%g4, [%g2]
+
+80:
 	BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
 	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
@@ -414,6 +496,24 @@
 	stw	%g2, [%g1 + %lo(tlb_type)]
 
 	/* Patch copy/clear ops.  */
+	sethi	%hi(sun4v_chip_type), %g1
+	lduw	[%g1 + %lo(sun4v_chip_type)], %g1
+	cmp	%g1, SUN4V_CHIP_NIAGARA1
+	be,pt	%xcc, niagara_patch
+	 cmp	%g1, SUN4V_CHIP_NIAGARA2
+	be,pt	%xcc, niagara_patch
+	 nop
+
+	call	generic_patch_copyops
+	 nop
+	call	generic_patch_bzero
+	 nop
+	call	generic_patch_pageops
+	 nop
+
+	ba,a,pt	%xcc, 80f
+
+niagara_patch:
 	call	niagara_patch_copyops
 	 nop
 	call	niagara_patch_bzero
@@ -421,6 +521,7 @@
 	call	niagara_patch_pageops
 	 nop
 
+80:
 	/* Patch TLB/cache ops.  */
 	call	hypervisor_patch_cachetlbops
 	 nop