| Adrian Bunk | b00dc83 | 2008-05-19 16:52:27 -0700 | [diff] [blame] | 1 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | * memscan.S: Optimized memscan for Sparc64. | 
|  | 3 | * | 
|  | 4 | * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) | 
|  | 5 | * Copyright (C) 1998 David S. Miller (davem@redhat.com) | 
|  | 6 | */ | 
|  | 7 |  | 
|  | 8 | #define HI_MAGIC	0x8080808080808080 | 
|  | 9 | #define LO_MAGIC	0x0101010101010101 | 
|  | 10 | #define ASI_PL		0x88 | 
|  | 11 |  | 
|  | 12 | .text | 
|  | 13 | .align	32 | 
|  | 14 | .globl		__memscan_zero, __memscan_generic | 
|  | 15 | .globl		memscan | 
|  | 16 |  | 
|  | 17 | __memscan_zero: | 
|  | 18 | /* %o0 = bufp, %o1 = size */ | 
|  | 19 | brlez,pn	%o1, szzero | 
|  | 20 | andcc		%o0, 7, %g0 | 
|  | 21 | be,pt		%icc, we_are_aligned | 
|  | 22 | sethi		%hi(HI_MAGIC), %o4 | 
|  | 23 | ldub		[%o0], %o5 | 
|  | 24 | 1:	subcc		%o1, 1, %o1 | 
|  | 25 | brz,pn		%o5, 10f | 
|  | 26 | add		%o0, 1, %o0 | 
|  | 27 |  | 
|  | 28 | be,pn		%xcc, szzero | 
|  | 29 | andcc		%o0, 7, %g0 | 
|  | 30 | bne,a,pn	%icc, 1b | 
|  | 31 | ldub		[%o0], %o5 | 
|  | 32 | we_are_aligned: | 
|  | 33 | ldxa		[%o0] ASI_PL, %o5 | 
|  | 34 | or		%o4, %lo(HI_MAGIC), %o3 | 
|  | 35 | sllx		%o3, 32, %o4 | 
|  | 36 | or		%o4, %o3, %o3 | 
|  | 37 |  | 
|  | 38 | srlx		%o3, 7, %o2 | 
|  | 39 | msloop: | 
|  | 40 | sub		%o1, 8, %o1 | 
|  | 41 | add		%o0, 8, %o0 | 
|  | 42 | sub		%o5, %o2, %o4 | 
|  | 43 | xor		%o4, %o5, %o4 | 
|  | 44 | andcc		%o4, %o3, %g3 | 
|  | 45 | bne,pn		%xcc, check_bytes | 
|  | 46 | srlx		%o4, 32, %g3 | 
|  | 47 |  | 
|  | 48 | brgz,a,pt	%o1, msloop | 
|  | 49 | ldxa		[%o0] ASI_PL, %o5 | 
|  | 50 | check_bytes: | 
|  | 51 | bne,a,pn	%icc, 2f | 
|  | 52 | andcc		%o5, 0xff, %g0 | 
|  | 53 | add		%o0, -5, %g2 | 
|  | 54 | ba,pt		%xcc, 3f | 
|  | 55 | srlx		%o5, 32, %g7 | 
|  | 56 |  | 
|  | 57 | 2:	srlx		%o5, 8, %g7 | 
|  | 58 | be,pn		%icc, 1f | 
|  | 59 | add		%o0, -8, %g2 | 
|  | 60 | andcc		%g7, 0xff, %g0 | 
|  | 61 | srlx		%g7, 8, %g7 | 
|  | 62 | be,pn		%icc, 1f | 
|  | 63 | inc		%g2 | 
|  | 64 | andcc		%g7, 0xff, %g0 | 
|  | 65 |  | 
|  | 66 | srlx		%g7, 8, %g7 | 
|  | 67 | be,pn		%icc, 1f | 
|  | 68 | inc		%g2 | 
|  | 69 | andcc		%g7, 0xff, %g0 | 
|  | 70 | srlx		%g7, 8, %g7 | 
|  | 71 | be,pn		%icc, 1f | 
|  | 72 | inc		%g2 | 
|  | 73 | andcc		%g3, %o3, %g0 | 
|  | 74 |  | 
|  | 75 | be,a,pn		%icc, 2f | 
|  | 76 | mov		%o0, %g2 | 
|  | 77 | 3:	andcc		%g7, 0xff, %g0 | 
|  | 78 | srlx		%g7, 8, %g7 | 
|  | 79 | be,pn		%icc, 1f | 
|  | 80 | inc		%g2 | 
|  | 81 | andcc		%g7, 0xff, %g0 | 
|  | 82 | srlx		%g7, 8, %g7 | 
|  | 83 |  | 
|  | 84 | be,pn		%icc, 1f | 
|  | 85 | inc		%g2 | 
|  | 86 | andcc		%g7, 0xff, %g0 | 
|  | 87 | srlx		%g7, 8, %g7 | 
|  | 88 | be,pn		%icc, 1f | 
|  | 89 | inc		%g2 | 
|  | 90 | andcc		%g7, 0xff, %g0 | 
|  | 91 | srlx		%g7, 8, %g7 | 
|  | 92 |  | 
|  | 93 | be,pn		%icc, 1f | 
|  | 94 | inc		%g2 | 
|  | 95 | 2:	brgz,a,pt	%o1, msloop | 
|  | 96 | ldxa		[%o0] ASI_PL, %o5 | 
|  | 97 | inc		%g2 | 
|  | 98 | 1:	add		%o0, %o1, %o0 | 
|  | 99 | cmp		%g2, %o0 | 
|  | 100 | retl | 
|  | 101 |  | 
|  | 102 | movle		%xcc, %g2, %o0 | 
|  | 103 | 10:	retl | 
|  | 104 | sub		%o0, 1, %o0 | 
|  | 105 | szzero:	retl | 
|  | 106 | nop | 
|  | 107 |  | 
|  | 108 | memscan: | 
|  | 109 | __memscan_generic: | 
|  | 110 | /* %o0 = addr, %o1 = c, %o2 = size */ | 
|  | 111 | brz,pn		%o2, 3f | 
|  | 112 | add		%o0, %o2, %o3 | 
|  | 113 | ldub		[%o0], %o5 | 
|  | 114 | sub		%g0, %o2, %o4 | 
|  | 115 | 1: | 
|  | 116 | cmp		%o5, %o1 | 
|  | 117 | be,pn		%icc, 2f | 
|  | 118 | addcc		%o4, 1, %o4 | 
|  | 119 | bne,a,pt 	%xcc, 1b | 
|  | 120 | ldub		[%o3 + %o4], %o5 | 
|  | 121 | retl | 
|  | 122 | /* The delay slot is the same as the next insn, this is just to make it look more awful */ | 
|  | 123 | 2: | 
|  | 124 | add		%o3, %o4, %o0 | 
|  | 125 | retl | 
|  | 126 | sub		%o0, 1, %o0 | 
|  | 127 | 3: | 
|  | 128 | retl | 
|  | 129 | nop |