| Adrian Bunk | b00dc83 | 2008-05-19 16:52:27 -0700 | [diff] [blame] | 1 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 |  * memscan.S: Optimized memscan for Sparc64. | 
 | 3 |  * | 
 | 4 |  * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) | 
 | 5 |  * Copyright (C) 1998 David S. Miller (davem@redhat.com) | 
 | 6 |  */ | 
 | 7 |  | 
 | 8 | #define HI_MAGIC	0x8080808080808080 | 
 | 9 | #define LO_MAGIC	0x0101010101010101 | 
 | 10 | #define ASI_PL		0x88 | 
 | 11 |  | 
 | 12 | 	.text | 
 | 13 | 	.align	32 | 
 | 14 | 	.globl		__memscan_zero, __memscan_generic | 
 | 15 | 	.globl		memscan | 
 | 16 |  | 
 | 17 | __memscan_zero: | 
 | 18 | 	/* %o0 = bufp, %o1 = size */ | 
 | 19 | 	brlez,pn	%o1, szzero | 
 | 20 | 	 andcc		%o0, 7, %g0 | 
 | 21 | 	be,pt		%icc, we_are_aligned | 
 | 22 | 	 sethi		%hi(HI_MAGIC), %o4 | 
 | 23 | 	ldub		[%o0], %o5 | 
 | 24 | 1:	subcc		%o1, 1, %o1 | 
 | 25 | 	brz,pn		%o5, 10f | 
 | 26 | 	 add		%o0, 1, %o0 | 
 | 27 |  | 
 | 28 | 	be,pn		%xcc, szzero | 
 | 29 | 	 andcc		%o0, 7, %g0 | 
 | 30 | 	bne,a,pn	%icc, 1b | 
 | 31 | 	 ldub		[%o0], %o5 | 
 | 32 | we_are_aligned: | 
 | 33 | 	ldxa		[%o0] ASI_PL, %o5 | 
 | 34 | 	or		%o4, %lo(HI_MAGIC), %o3 | 
 | 35 | 	sllx		%o3, 32, %o4 | 
 | 36 | 	or		%o4, %o3, %o3 | 
 | 37 |  | 
 | 38 | 	srlx		%o3, 7, %o2 | 
 | 39 | msloop: | 
 | 40 | 	sub		%o1, 8, %o1 | 
 | 41 | 	add		%o0, 8, %o0 | 
 | 42 | 	sub		%o5, %o2, %o4 | 
 | 43 | 	xor		%o4, %o5, %o4 | 
 | 44 | 	andcc		%o4, %o3, %g3 | 
 | 45 | 	bne,pn		%xcc, check_bytes | 
 | 46 | 	 srlx		%o4, 32, %g3 | 
 | 47 |  | 
 | 48 | 	brgz,a,pt	%o1, msloop | 
 | 49 | 	 ldxa		[%o0] ASI_PL, %o5 | 
 | 50 | check_bytes: | 
 | 51 | 	bne,a,pn	%icc, 2f | 
 | 52 | 	 andcc		%o5, 0xff, %g0 | 
 | 53 | 	add		%o0, -5, %g2 | 
 | 54 | 	ba,pt		%xcc, 3f | 
 | 55 | 	 srlx		%o5, 32, %g7 | 
 | 56 |  | 
 | 57 | 2:	srlx		%o5, 8, %g7 | 
 | 58 | 	be,pn		%icc, 1f | 
 | 59 | 	 add		%o0, -8, %g2 | 
 | 60 | 	andcc		%g7, 0xff, %g0 | 
 | 61 | 	srlx		%g7, 8, %g7 | 
 | 62 | 	be,pn		%icc, 1f | 
 | 63 | 	 inc		%g2 | 
 | 64 | 	andcc		%g7, 0xff, %g0 | 
 | 65 |  | 
 | 66 | 	srlx		%g7, 8, %g7 | 
 | 67 | 	be,pn		%icc, 1f | 
 | 68 | 	 inc		%g2 | 
 | 69 | 	andcc		%g7, 0xff, %g0 | 
 | 70 | 	srlx		%g7, 8, %g7 | 
 | 71 | 	be,pn		%icc, 1f | 
 | 72 | 	 inc		%g2 | 
 | 73 | 	andcc		%g3, %o3, %g0 | 
 | 74 |  | 
 | 75 | 	be,a,pn		%icc, 2f | 
 | 76 | 	 mov		%o0, %g2 | 
 | 77 | 3:	andcc		%g7, 0xff, %g0 | 
 | 78 | 	srlx		%g7, 8, %g7 | 
 | 79 | 	be,pn		%icc, 1f | 
 | 80 | 	 inc		%g2 | 
 | 81 | 	andcc		%g7, 0xff, %g0 | 
 | 82 | 	srlx		%g7, 8, %g7 | 
 | 83 |  | 
 | 84 | 	be,pn		%icc, 1f | 
 | 85 | 	 inc		%g2 | 
 | 86 | 	andcc		%g7, 0xff, %g0 | 
 | 87 | 	srlx		%g7, 8, %g7 | 
 | 88 | 	be,pn		%icc, 1f | 
 | 89 | 	 inc		%g2 | 
 | 90 | 	andcc		%g7, 0xff, %g0 | 
 | 91 | 	srlx		%g7, 8, %g7 | 
 | 92 |  | 
 | 93 | 	be,pn		%icc, 1f | 
 | 94 | 	 inc		%g2 | 
 | 95 | 2:	brgz,a,pt	%o1, msloop | 
 | 96 | 	 ldxa		[%o0] ASI_PL, %o5 | 
 | 97 | 	inc		%g2 | 
 | 98 | 1:	add		%o0, %o1, %o0 | 
 | 99 | 	cmp		%g2, %o0 | 
 | 100 | 	retl | 
 | 101 |  | 
 | 102 | 	 movle		%xcc, %g2, %o0 | 
 | 103 | 10:	retl | 
 | 104 | 	 sub		%o0, 1, %o0 | 
 | 105 | szzero:	retl | 
 | 106 | 	 nop | 
 | 107 |  | 
 | 108 | memscan: | 
 | 109 | __memscan_generic: | 
 | 110 | 	/* %o0 = addr, %o1 = c, %o2 = size */ | 
 | 111 | 	brz,pn		%o2, 3f | 
 | 112 | 	 add		%o0, %o2, %o3 | 
 | 113 | 	ldub		[%o0], %o5 | 
 | 114 | 	sub		%g0, %o2, %o4 | 
 | 115 | 1: | 
 | 116 | 	cmp		%o5, %o1 | 
 | 117 | 	be,pn		%icc, 2f | 
 | 118 | 	 addcc		%o4, 1, %o4 | 
 | 119 | 	bne,a,pt 	%xcc, 1b | 
 | 120 | 	 ldub		[%o3 + %o4], %o5 | 
 | 121 | 	retl | 
 | 122 | 	/* The delay slot is the same as the next insn, this is just to make it look more awful */ | 
 | 123 | 2: | 
 | 124 | 	 add		%o3, %o4, %o0 | 
 | 125 | 	retl | 
 | 126 | 	 sub		%o0, 1, %o0 | 
 | 127 | 3: | 
 | 128 | 	retl | 
 | 129 | 	 nop |