| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* memcpy.S: Sparc optimized memcpy and memmove code | 
|  | 2 | * Hand optimized from GNU libc's memcpy and memmove | 
|  | 3 | * Copyright (C) 1991,1996 Free Software Foundation | 
|  | 4 | * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) | 
|  | 5 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | 
|  | 6 | * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) | 
|  | 7 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | 
|  | 8 | */ | 
|  | 9 |  | 
|  | 10 | #ifdef __KERNEL__ | 
|  | 11 |  | 
|  | 12 | #define FUNC(x) 											\ | 
|  | 13 | .globl	x;		\ | 
|  | 14 | .type	x,@function;	\ | 
|  | 15 | .align	4;											\ | 
|  | 16 | x: | 
|  | 17 |  | 
|  | 18 | #undef FASTER_REVERSE | 
|  | 19 | #undef FASTER_NONALIGNED | 
|  | 20 | #define FASTER_ALIGNED | 
|  | 21 |  | 
|  | 22 | /* In kernel these functions don't return a value. | 
|  | 23 | * One should use macros in asm/string.h for that purpose. | 
|  | 24 | * We return 0, so that bugs are more apparent. | 
|  | 25 | */ | 
|  | 26 | #define SETUP_RETL | 
|  | 27 | #define RETL_INSN	clr	%o0 | 
|  | 28 |  | 
|  | 29 | #else | 
|  | 30 |  | 
|  | 31 | /* libc */ | 
|  | 32 |  | 
|  | 33 | #include "DEFS.h" | 
|  | 34 |  | 
|  | 35 | #define FASTER_REVERSE | 
|  | 36 | #define FASTER_NONALIGNED | 
|  | 37 | #define FASTER_ALIGNED | 
|  | 38 |  | 
|  | 39 | #define SETUP_RETL	mov	%o0, %g6 | 
|  | 40 | #define RETL_INSN	mov	%g6, %o0 | 
|  | 41 |  | 
|  | 42 | #endif | 
|  | 43 |  | 
|  | 44 | /* Both these macros have to start with exactly the same insn */ | 
|  | 45 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | 
|  | 46 | ldd	[%src + (offset) + 0x00], %t0; \ | 
|  | 47 | ldd	[%src + (offset) + 0x08], %t2; \ | 
|  | 48 | ldd	[%src + (offset) + 0x10], %t4; \ | 
|  | 49 | ldd	[%src + (offset) + 0x18], %t6; \ | 
|  | 50 | st	%t0, [%dst + (offset) + 0x00]; \ | 
|  | 51 | st	%t1, [%dst + (offset) + 0x04]; \ | 
|  | 52 | st	%t2, [%dst + (offset) + 0x08]; \ | 
|  | 53 | st	%t3, [%dst + (offset) + 0x0c]; \ | 
|  | 54 | st	%t4, [%dst + (offset) + 0x10]; \ | 
|  | 55 | st	%t5, [%dst + (offset) + 0x14]; \ | 
|  | 56 | st	%t6, [%dst + (offset) + 0x18]; \ | 
|  | 57 | st	%t7, [%dst + (offset) + 0x1c]; | 
|  | 58 |  | 
|  | 59 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | 
|  | 60 | ldd	[%src + (offset) + 0x00], %t0; \ | 
|  | 61 | ldd	[%src + (offset) + 0x08], %t2; \ | 
|  | 62 | ldd	[%src + (offset) + 0x10], %t4; \ | 
|  | 63 | ldd	[%src + (offset) + 0x18], %t6; \ | 
|  | 64 | std	%t0, [%dst + (offset) + 0x00]; \ | 
|  | 65 | std	%t2, [%dst + (offset) + 0x08]; \ | 
|  | 66 | std	%t4, [%dst + (offset) + 0x10]; \ | 
|  | 67 | std	%t6, [%dst + (offset) + 0x18]; | 
|  | 68 |  | 
|  | 69 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | 
|  | 70 | ldd	[%src - (offset) - 0x10], %t0; \ | 
|  | 71 | ldd	[%src - (offset) - 0x08], %t2; \ | 
|  | 72 | st	%t0, [%dst - (offset) - 0x10]; \ | 
|  | 73 | st	%t1, [%dst - (offset) - 0x0c]; \ | 
|  | 74 | st	%t2, [%dst - (offset) - 0x08]; \ | 
|  | 75 | st	%t3, [%dst - (offset) - 0x04]; | 
|  | 76 |  | 
|  | 77 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | 
|  | 78 | ldd	[%src - (offset) - 0x10], %t0; \ | 
|  | 79 | ldd	[%src - (offset) - 0x08], %t2; \ | 
|  | 80 | std	%t0, [%dst - (offset) - 0x10]; \ | 
|  | 81 | std	%t2, [%dst - (offset) - 0x08]; | 
|  | 82 |  | 
|  | 83 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | 
|  | 84 | ldub	[%src - (offset) - 0x02], %t0; \ | 
|  | 85 | ldub	[%src - (offset) - 0x01], %t1; \ | 
|  | 86 | stb	%t0, [%dst - (offset) - 0x02]; \ | 
|  | 87 | stb	%t1, [%dst - (offset) - 0x01]; | 
|  | 88 |  | 
|  | 89 | /* Both these macros have to start with exactly the same insn */ | 
|  | 90 | #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | 
|  | 91 | ldd	[%src - (offset) - 0x20], %t0; \ | 
|  | 92 | ldd	[%src - (offset) - 0x18], %t2; \ | 
|  | 93 | ldd	[%src - (offset) - 0x10], %t4; \ | 
|  | 94 | ldd	[%src - (offset) - 0x08], %t6; \ | 
|  | 95 | st	%t0, [%dst - (offset) - 0x20]; \ | 
|  | 96 | st	%t1, [%dst - (offset) - 0x1c]; \ | 
|  | 97 | st	%t2, [%dst - (offset) - 0x18]; \ | 
|  | 98 | st	%t3, [%dst - (offset) - 0x14]; \ | 
|  | 99 | st	%t4, [%dst - (offset) - 0x10]; \ | 
|  | 100 | st	%t5, [%dst - (offset) - 0x0c]; \ | 
|  | 101 | st	%t6, [%dst - (offset) - 0x08]; \ | 
|  | 102 | st	%t7, [%dst - (offset) - 0x04]; | 
|  | 103 |  | 
|  | 104 | #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | 
|  | 105 | ldd	[%src - (offset) - 0x20], %t0; \ | 
|  | 106 | ldd	[%src - (offset) - 0x18], %t2; \ | 
|  | 107 | ldd	[%src - (offset) - 0x10], %t4; \ | 
|  | 108 | ldd	[%src - (offset) - 0x08], %t6; \ | 
|  | 109 | std	%t0, [%dst - (offset) - 0x20]; \ | 
|  | 110 | std	%t2, [%dst - (offset) - 0x18]; \ | 
|  | 111 | std	%t4, [%dst - (offset) - 0x10]; \ | 
|  | 112 | std	%t6, [%dst - (offset) - 0x08]; | 
|  | 113 |  | 
|  | 114 | #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | 
|  | 115 | ldd	[%src + (offset) + 0x00], %t0; \ | 
|  | 116 | ldd	[%src + (offset) + 0x08], %t2; \ | 
|  | 117 | st	%t0, [%dst + (offset) + 0x00]; \ | 
|  | 118 | st	%t1, [%dst + (offset) + 0x04]; \ | 
|  | 119 | st	%t2, [%dst + (offset) + 0x08]; \ | 
|  | 120 | st	%t3, [%dst + (offset) + 0x0c]; | 
|  | 121 |  | 
|  | 122 | #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | 
|  | 123 | ldub	[%src + (offset) + 0x00], %t0; \ | 
|  | 124 | ldub	[%src + (offset) + 0x01], %t1; \ | 
|  | 125 | stb	%t0, [%dst + (offset) + 0x00]; \ | 
|  | 126 | stb	%t1, [%dst + (offset) + 0x01]; | 
|  | 127 |  | 
|  | 128 | #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | 
|  | 129 | ldd	[%src + (offset) + 0x00], %t0; \ | 
|  | 130 | ldd	[%src + (offset) + 0x08], %t2; \ | 
|  | 131 | srl	%t0, shir, %t5; \ | 
|  | 132 | srl	%t1, shir, %t6; \ | 
|  | 133 | sll	%t0, shil, %t0; \ | 
|  | 134 | or	%t5, %prev, %t5; \ | 
|  | 135 | sll	%t1, shil, %prev; \ | 
|  | 136 | or	%t6, %t0, %t0; \ | 
|  | 137 | srl	%t2, shir, %t1; \ | 
|  | 138 | srl	%t3, shir, %t6; \ | 
|  | 139 | sll	%t2, shil, %t2; \ | 
|  | 140 | or	%t1, %prev, %t1; \ | 
|  | 141 | std	%t4, [%dst + (offset) + (offset2) - 0x04]; \ | 
|  | 142 | std	%t0, [%dst + (offset) + (offset2) + 0x04]; \ | 
|  | 143 | sll	%t3, shil, %prev; \ | 
|  | 144 | or	%t6, %t2, %t4; | 
|  | 145 |  | 
|  | 146 | #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | 
|  | 147 | ldd	[%src + (offset) + 0x00], %t0; \ | 
|  | 148 | ldd	[%src + (offset) + 0x08], %t2; \ | 
|  | 149 | srl	%t0, shir, %t4;	\ | 
|  | 150 | srl	%t1, shir, %t5;	\ | 
|  | 151 | sll	%t0, shil, %t6;	\ | 
|  | 152 | or	%t4, %prev, %t0; \ | 
|  | 153 | sll	%t1, shil, %prev; \ | 
|  | 154 | or	%t5, %t6, %t1; \ | 
|  | 155 | srl	%t2, shir, %t4;	\ | 
|  | 156 | srl	%t3, shir, %t5;	\ | 
|  | 157 | sll	%t2, shil, %t6; \ | 
|  | 158 | or	%t4, %prev, %t2; \ | 
|  | 159 | sll	%t3, shil, %prev; \ | 
|  | 160 | or	%t5, %t6, %t3; \ | 
|  | 161 | std	%t0, [%dst + (offset) + (offset2) + 0x00]; \ | 
|  | 162 | std	%t2, [%dst + (offset) + (offset2) + 0x08]; | 
|  | 163 |  | 
|  | 164 | .text | 
|  | 165 | .align	4 | 
|  | 166 |  | 
|  | 167 | #ifdef FASTER_REVERSE | 
|  | 168 |  | 
|  | 169 | 70:	/* rdword_align */ | 
|  | 170 |  | 
|  | 171 | andcc		%o1, 1, %g0 | 
|  | 172 | be		4f | 
|  | 173 | andcc		%o1, 2, %g0 | 
|  | 174 |  | 
|  | 175 | ldub		[%o1 - 1], %g2 | 
|  | 176 | sub		%o1, 1, %o1 | 
|  | 177 | stb		%g2, [%o0 - 1] | 
|  | 178 | sub		%o2, 1, %o2 | 
|  | 179 | be		3f | 
|  | 180 | sub		%o0, 1, %o0 | 
|  | 181 | 4: | 
|  | 182 | lduh		[%o1 - 2], %g2 | 
|  | 183 | sub		%o1, 2, %o1 | 
|  | 184 | sth		%g2, [%o0 - 2] | 
|  | 185 | sub		%o2, 2, %o2 | 
|  | 186 | b		3f | 
|  | 187 | sub		%o0, 2, %o0 | 
|  | 188 |  | 
|  | 189 | #endif /* FASTER_REVERSE */ | 
|  | 190 |  | 
|  | 191 | 0: | 
|  | 192 | retl | 
|  | 193 | nop		! Only bcopy returns here and it retuns void... | 
|  | 194 |  | 
|  | 195 | #ifdef __KERNEL__ | 
|  | 196 | FUNC(amemmove) | 
|  | 197 | FUNC(__memmove) | 
|  | 198 | #endif | 
|  | 199 | FUNC(memmove) | 
|  | 200 | cmp		%o0, %o1 | 
|  | 201 | SETUP_RETL | 
|  | 202 | bleu		9f | 
|  | 203 | sub		%o0, %o1, %o4 | 
|  | 204 |  | 
|  | 205 | add		%o1, %o2, %o3 | 
|  | 206 | cmp		%o3, %o0 | 
|  | 207 | bleu		0f | 
|  | 208 | andcc		%o4, 3, %o5 | 
|  | 209 |  | 
|  | 210 | #ifndef FASTER_REVERSE | 
|  | 211 |  | 
|  | 212 | add		%o1, %o2, %o1 | 
|  | 213 | add		%o0, %o2, %o0 | 
|  | 214 | sub		%o1, 1, %o1 | 
|  | 215 | sub		%o0, 1, %o0 | 
|  | 216 |  | 
|  | 217 | 1:	/* reverse_bytes */ | 
|  | 218 |  | 
|  | 219 | ldub		[%o1], %o4 | 
|  | 220 | subcc		%o2, 1, %o2 | 
|  | 221 | stb		%o4, [%o0] | 
|  | 222 | sub		%o1, 1, %o1 | 
|  | 223 | bne		1b | 
|  | 224 | sub		%o0, 1, %o0 | 
|  | 225 |  | 
|  | 226 | retl | 
|  | 227 | RETL_INSN | 
|  | 228 |  | 
|  | 229 | #else /* FASTER_REVERSE */ | 
|  | 230 |  | 
|  | 231 | add		%o1, %o2, %o1 | 
|  | 232 | add		%o0, %o2, %o0 | 
|  | 233 | bne		77f | 
|  | 234 | cmp		%o2, 15 | 
|  | 235 | bleu		91f | 
|  | 236 | andcc		%o1, 3, %g0 | 
|  | 237 | bne		70b | 
|  | 238 | 3: | 
|  | 239 | andcc		%o1, 4, %g0 | 
|  | 240 |  | 
|  | 241 | be		2f | 
|  | 242 | mov		%o2, %g1 | 
|  | 243 |  | 
|  | 244 | ld		[%o1 - 4], %o4 | 
|  | 245 | sub		%g1, 4, %g1 | 
|  | 246 | st		%o4, [%o0 - 4] | 
|  | 247 | sub		%o1, 4, %o1 | 
|  | 248 | sub		%o0, 4, %o0 | 
|  | 249 | 2: | 
|  | 250 | andcc		%g1, 0xffffff80, %g7 | 
|  | 251 | be		3f | 
|  | 252 | andcc		%o0, 4, %g0 | 
|  | 253 |  | 
|  | 254 | be		74f + 4 | 
|  | 255 | 5: | 
|  | 256 | RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 257 | RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 258 | RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 259 | RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 260 | subcc		%g7, 128, %g7 | 
|  | 261 | sub		%o1, 128, %o1 | 
|  | 262 | bne		5b | 
|  | 263 | sub		%o0, 128, %o0 | 
|  | 264 | 3: | 
|  | 265 | andcc		%g1, 0x70, %g7 | 
|  | 266 | be		72f | 
|  | 267 | andcc		%g1, 8, %g0 | 
|  | 268 |  | 
|  | 269 | sethi		%hi(72f), %o5 | 
|  | 270 | srl		%g7, 1, %o4 | 
|  | 271 | add		%g7, %o4, %o4 | 
|  | 272 | sub		%o1, %g7, %o1 | 
|  | 273 | sub		%o5, %o4, %o5 | 
|  | 274 | jmpl		%o5 + %lo(72f), %g0 | 
|  | 275 | sub		%o0, %g7, %o0 | 
|  | 276 |  | 
|  | 277 | 71:	/* rmemcpy_table */ | 
|  | 278 | RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | 
|  | 279 | RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | 
|  | 280 | RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | 
|  | 281 | RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | 
|  | 282 | RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | 
|  | 283 | RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | 
|  | 284 | RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | 
|  | 285 |  | 
|  | 286 | 72:	/* rmemcpy_table_end */ | 
|  | 287 |  | 
|  | 288 | be		73f | 
|  | 289 | andcc		%g1, 4, %g0 | 
|  | 290 |  | 
|  | 291 | ldd		[%o1 - 0x08], %g2 | 
|  | 292 | sub		%o0, 8, %o0 | 
|  | 293 | sub		%o1, 8, %o1 | 
|  | 294 | st		%g2, [%o0] | 
|  | 295 | st		%g3, [%o0 + 0x04] | 
|  | 296 |  | 
|  | 297 | 73:	/* rmemcpy_last7 */ | 
|  | 298 |  | 
|  | 299 | be		1f | 
|  | 300 | andcc		%g1, 2, %g0 | 
|  | 301 |  | 
|  | 302 | ld		[%o1 - 4], %g2 | 
|  | 303 | sub		%o1, 4, %o1 | 
|  | 304 | st		%g2, [%o0 - 4] | 
|  | 305 | sub		%o0, 4, %o0 | 
|  | 306 | 1: | 
|  | 307 | be		1f | 
|  | 308 | andcc		%g1, 1, %g0 | 
|  | 309 |  | 
|  | 310 | lduh		[%o1 - 2], %g2 | 
|  | 311 | sub		%o1, 2, %o1 | 
|  | 312 | sth		%g2, [%o0 - 2] | 
|  | 313 | sub		%o0, 2, %o0 | 
|  | 314 | 1: | 
|  | 315 | be		1f | 
|  | 316 | nop | 
|  | 317 |  | 
|  | 318 | ldub		[%o1 - 1], %g2 | 
|  | 319 | stb		%g2, [%o0 - 1] | 
|  | 320 | 1: | 
|  | 321 | retl | 
|  | 322 | RETL_INSN | 
|  | 323 |  | 
|  | 324 | 74:	/* rldd_std */ | 
|  | 325 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 326 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 327 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 328 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 329 | subcc		%g7, 128, %g7 | 
|  | 330 | sub		%o1, 128, %o1 | 
|  | 331 | bne		74b | 
|  | 332 | sub		%o0, 128, %o0 | 
|  | 333 |  | 
|  | 334 | andcc		%g1, 0x70, %g7 | 
|  | 335 | be		72b | 
|  | 336 | andcc		%g1, 8, %g0 | 
|  | 337 |  | 
|  | 338 | sethi		%hi(72b), %o5 | 
|  | 339 | srl		%g7, 1, %o4 | 
|  | 340 | add		%g7, %o4, %o4 | 
|  | 341 | sub		%o1, %g7, %o1 | 
|  | 342 | sub		%o5, %o4, %o5 | 
|  | 343 | jmpl		%o5 + %lo(72b), %g0 | 
|  | 344 | sub		%o0, %g7, %o0 | 
|  | 345 |  | 
|  | 346 | 75:	/* rshort_end */ | 
|  | 347 |  | 
|  | 348 | and		%o2, 0xe, %o3 | 
|  | 349 | 2: | 
|  | 350 | sethi		%hi(76f), %o5 | 
|  | 351 | sll		%o3, 3, %o4 | 
|  | 352 | sub		%o0, %o3, %o0 | 
|  | 353 | sub		%o5, %o4, %o5 | 
|  | 354 | sub		%o1, %o3, %o1 | 
|  | 355 | jmpl		%o5 + %lo(76f), %g0 | 
|  | 356 | andcc		%o2, 1, %g0 | 
|  | 357 |  | 
|  | 358 | RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | 
|  | 359 | RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | 
|  | 360 | RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | 
|  | 361 | RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | 
|  | 362 | RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | 
|  | 363 | RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | 
|  | 364 | RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | 
|  | 365 |  | 
|  | 366 | 76:	/* rshort_table_end */ | 
|  | 367 |  | 
|  | 368 | be		1f | 
|  | 369 | nop | 
|  | 370 | ldub		[%o1 - 1], %g2 | 
|  | 371 | stb		%g2, [%o0 - 1] | 
|  | 372 | 1: | 
|  | 373 | retl | 
|  | 374 | RETL_INSN | 
|  | 375 |  | 
|  | 376 | 91:	/* rshort_aligned_end */ | 
|  | 377 |  | 
|  | 378 | bne		75b | 
|  | 379 | andcc		%o2, 8, %g0 | 
|  | 380 |  | 
|  | 381 | be		1f | 
|  | 382 | andcc		%o2, 4, %g0 | 
|  | 383 |  | 
|  | 384 | ld		[%o1 - 0x08], %g2 | 
|  | 385 | ld		[%o1 - 0x04], %g3 | 
|  | 386 | sub		%o1, 8, %o1 | 
|  | 387 | st		%g2, [%o0 - 0x08] | 
|  | 388 | st		%g3, [%o0 - 0x04] | 
|  | 389 | sub		%o0, 8, %o0 | 
|  | 390 | 1: | 
|  | 391 | b		73b | 
|  | 392 | mov		%o2, %g1 | 
|  | 393 |  | 
|  | 394 | 77:	/* rnon_aligned */ | 
|  | 395 | cmp		%o2, 15 | 
|  | 396 | bleu		75b | 
|  | 397 | andcc		%o0, 3, %g0 | 
|  | 398 | be		64f | 
|  | 399 | andcc		%o0, 1, %g0 | 
|  | 400 | be		63f | 
|  | 401 | andcc		%o0, 2, %g0 | 
|  | 402 | ldub		[%o1 - 1], %g5 | 
|  | 403 | sub		%o1, 1, %o1 | 
|  | 404 | stb		%g5, [%o0 - 1] | 
|  | 405 | sub		%o0, 1, %o0 | 
|  | 406 | be		64f | 
|  | 407 | sub		%o2, 1, %o2 | 
|  | 408 | 63: | 
|  | 409 | ldub		[%o1 - 1], %g5 | 
|  | 410 | sub		%o1, 2, %o1 | 
|  | 411 | stb		%g5, [%o0 - 1] | 
|  | 412 | sub		%o0, 2, %o0 | 
|  | 413 | ldub		[%o1], %g5 | 
|  | 414 | sub		%o2, 2, %o2 | 
|  | 415 | stb		%g5, [%o0] | 
|  | 416 | 64: | 
|  | 417 | and		%o1, 3, %g2 | 
|  | 418 | and		%o1, -4, %o1 | 
|  | 419 | and		%o2, 0xc, %g3 | 
|  | 420 | add		%o1, 4, %o1 | 
|  | 421 | cmp		%g3, 4 | 
|  | 422 | sll		%g2, 3, %g4 | 
|  | 423 | mov		32, %g2 | 
|  | 424 | be		4f | 
|  | 425 | sub		%g2, %g4, %g7 | 
|  | 426 |  | 
|  | 427 | blu		3f | 
|  | 428 | cmp		%g3, 8 | 
|  | 429 |  | 
|  | 430 | be		2f | 
|  | 431 | srl		%o2, 2, %g3 | 
|  | 432 |  | 
|  | 433 | ld		[%o1 - 4], %o3 | 
|  | 434 | add		%o0, -8, %o0 | 
|  | 435 | ld		[%o1 - 8], %o4 | 
|  | 436 | add		%o1, -16, %o1 | 
|  | 437 | b		7f | 
|  | 438 | add		%g3, 1, %g3 | 
|  | 439 | 2: | 
|  | 440 | ld		[%o1 - 4], %o4 | 
|  | 441 | add		%o0, -4, %o0 | 
|  | 442 | ld		[%o1 - 8], %g1 | 
|  | 443 | add		%o1, -12, %o1 | 
|  | 444 | b		8f | 
|  | 445 | add		%g3, 2, %g3 | 
|  | 446 | 3: | 
|  | 447 | ld		[%o1 - 4], %o5 | 
|  | 448 | add		%o0, -12, %o0 | 
|  | 449 | ld		[%o1 - 8], %o3 | 
|  | 450 | add		%o1, -20, %o1 | 
|  | 451 | b		6f | 
|  | 452 | srl		%o2, 2, %g3 | 
|  | 453 | 4: | 
|  | 454 | ld		[%o1 - 4], %g1 | 
|  | 455 | srl		%o2, 2, %g3 | 
|  | 456 | ld		[%o1 - 8], %o5 | 
|  | 457 | add		%o1, -24, %o1 | 
|  | 458 | add		%o0, -16, %o0 | 
|  | 459 | add		%g3, -1, %g3 | 
|  | 460 |  | 
|  | 461 | ld		[%o1 + 12], %o3 | 
|  | 462 | 5: | 
|  | 463 | sll		%o5, %g4, %g2 | 
|  | 464 | srl		%g1, %g7, %g5 | 
|  | 465 | or		%g2, %g5, %g2 | 
|  | 466 | st		%g2, [%o0 + 12] | 
|  | 467 | 6: | 
|  | 468 | ld		[%o1 + 8], %o4 | 
|  | 469 | sll		%o3, %g4, %g2 | 
|  | 470 | srl		%o5, %g7, %g5 | 
|  | 471 | or		%g2, %g5, %g2 | 
|  | 472 | st		%g2, [%o0 + 8] | 
|  | 473 | 7: | 
|  | 474 | ld		[%o1 + 4], %g1 | 
|  | 475 | sll		%o4, %g4, %g2 | 
|  | 476 | srl		%o3, %g7, %g5 | 
|  | 477 | or		%g2, %g5, %g2 | 
|  | 478 | st		%g2, [%o0 + 4] | 
|  | 479 | 8: | 
|  | 480 | ld		[%o1], %o5 | 
|  | 481 | sll		%g1, %g4, %g2 | 
|  | 482 | srl		%o4, %g7, %g5 | 
|  | 483 | addcc		%g3, -4, %g3 | 
|  | 484 | or		%g2, %g5, %g2 | 
|  | 485 | add		%o1, -16, %o1 | 
|  | 486 | st		%g2, [%o0] | 
|  | 487 | add		%o0, -16, %o0 | 
|  | 488 | bne,a		5b | 
|  | 489 | ld		[%o1 + 12], %o3 | 
|  | 490 | sll		%o5, %g4, %g2 | 
|  | 491 | srl		%g1, %g7, %g5 | 
|  | 492 | srl		%g4, 3, %g3 | 
|  | 493 | or		%g2, %g5, %g2 | 
|  | 494 | add		%o1, %g3, %o1 | 
|  | 495 | andcc		%o2, 2, %g0 | 
|  | 496 | st		%g2, [%o0 + 12] | 
|  | 497 | be		1f | 
|  | 498 | andcc		%o2, 1, %g0 | 
|  | 499 |  | 
|  | 500 | ldub		[%o1 + 15], %g5 | 
|  | 501 | add		%o1, -2, %o1 | 
|  | 502 | stb		%g5, [%o0 + 11] | 
|  | 503 | add		%o0, -2, %o0 | 
|  | 504 | ldub		[%o1 + 16], %g5 | 
|  | 505 | stb		%g5, [%o0 + 12] | 
|  | 506 | 1: | 
|  | 507 | be		1f | 
|  | 508 | nop | 
|  | 509 | ldub		[%o1 + 15], %g5 | 
|  | 510 | stb		%g5, [%o0 + 11] | 
|  | 511 | 1: | 
|  | 512 | retl | 
|  | 513 | RETL_INSN | 
|  | 514 |  | 
|  | 515 | #endif /* FASTER_REVERSE */ | 
|  | 516 |  | 
|  | 517 | /* NOTE: This code is executed just for the cases, | 
|  | 518 | where %src (=%o1) & 3 is != 0. | 
|  | 519 | We need to align it to 4. So, for (%src & 3) | 
|  | 520 | 1 we need to do ldub,lduh | 
|  | 521 | 2 lduh | 
|  | 522 | 3 just ldub | 
|  | 523 | so even if it looks weird, the branches | 
|  | 524 | are correct here. -jj | 
|  | 525 | */ | 
|  | 526 | 78:	/* dword_align */ | 
|  | 527 |  | 
|  | 528 | andcc		%o1, 1, %g0 | 
|  | 529 | be		4f | 
|  | 530 | andcc		%o1, 2, %g0 | 
|  | 531 |  | 
|  | 532 | ldub		[%o1], %g2 | 
|  | 533 | add		%o1, 1, %o1 | 
|  | 534 | stb		%g2, [%o0] | 
|  | 535 | sub		%o2, 1, %o2 | 
|  | 536 | bne		3f | 
|  | 537 | add		%o0, 1, %o0 | 
|  | 538 | 4: | 
|  | 539 | lduh		[%o1], %g2 | 
|  | 540 | add		%o1, 2, %o1 | 
|  | 541 | sth		%g2, [%o0] | 
|  | 542 | sub		%o2, 2, %o2 | 
|  | 543 | b		3f | 
|  | 544 | add		%o0, 2, %o0 | 
|  | 545 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 546 | FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */ | 
|  | 547 |  | 
|  | 548 | sub		%o0, %o1, %o4 | 
|  | 549 | SETUP_RETL | 
|  | 550 | 9: | 
|  | 551 | andcc		%o4, 3, %o5 | 
|  | 552 | 0: | 
|  | 553 | bne		86f | 
|  | 554 | cmp		%o2, 15 | 
|  | 555 |  | 
|  | 556 | bleu		90f | 
|  | 557 | andcc		%o1, 3, %g0 | 
|  | 558 |  | 
|  | 559 | bne		78b | 
|  | 560 | 3: | 
|  | 561 | andcc		%o1, 4, %g0 | 
|  | 562 |  | 
|  | 563 | be		2f | 
|  | 564 | mov		%o2, %g1 | 
|  | 565 |  | 
|  | 566 | ld		[%o1], %o4 | 
|  | 567 | sub		%g1, 4, %g1 | 
|  | 568 | st		%o4, [%o0] | 
|  | 569 | add		%o1, 4, %o1 | 
|  | 570 | add		%o0, 4, %o0 | 
|  | 571 | 2: | 
|  | 572 | andcc		%g1, 0xffffff80, %g7 | 
|  | 573 | be		3f | 
|  | 574 | andcc		%o0, 4, %g0 | 
|  | 575 |  | 
|  | 576 | be		82f + 4 | 
|  | 577 | 5: | 
|  | 578 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 579 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 580 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 581 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 582 | subcc		%g7, 128, %g7 | 
|  | 583 | add		%o1, 128, %o1 | 
|  | 584 | bne		5b | 
|  | 585 | add		%o0, 128, %o0 | 
|  | 586 | 3: | 
|  | 587 | andcc		%g1, 0x70, %g7 | 
|  | 588 | be		80f | 
|  | 589 | andcc		%g1, 8, %g0 | 
|  | 590 |  | 
|  | 591 | sethi		%hi(80f), %o5 | 
|  | 592 | srl		%g7, 1, %o4 | 
|  | 593 | add		%g7, %o4, %o4 | 
|  | 594 | add		%o1, %g7, %o1 | 
|  | 595 | sub		%o5, %o4, %o5 | 
|  | 596 | jmpl		%o5 + %lo(80f), %g0 | 
|  | 597 | add		%o0, %g7, %o0 | 
|  | 598 |  | 
|  | 599 | 79:	/* memcpy_table */ | 
|  | 600 |  | 
|  | 601 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | 
|  | 602 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | 
|  | 603 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | 
|  | 604 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | 
|  | 605 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | 
|  | 606 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | 
|  | 607 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | 
|  | 608 |  | 
|  | 609 | 80:	/* memcpy_table_end */ | 
|  | 610 | be		81f | 
|  | 611 | andcc		%g1, 4, %g0 | 
|  | 612 |  | 
|  | 613 | ldd		[%o1], %g2 | 
|  | 614 | add		%o0, 8, %o0 | 
|  | 615 | st		%g2, [%o0 - 0x08] | 
|  | 616 | add		%o1, 8, %o1 | 
|  | 617 | st		%g3, [%o0 - 0x04] | 
|  | 618 |  | 
|  | 619 | 81:	/* memcpy_last7 */ | 
|  | 620 |  | 
|  | 621 | be		1f | 
|  | 622 | andcc		%g1, 2, %g0 | 
|  | 623 |  | 
|  | 624 | ld		[%o1], %g2 | 
|  | 625 | add		%o1, 4, %o1 | 
|  | 626 | st		%g2, [%o0] | 
|  | 627 | add		%o0, 4, %o0 | 
|  | 628 | 1: | 
|  | 629 | be		1f | 
|  | 630 | andcc		%g1, 1, %g0 | 
|  | 631 |  | 
|  | 632 | lduh		[%o1], %g2 | 
|  | 633 | add		%o1, 2, %o1 | 
|  | 634 | sth		%g2, [%o0] | 
|  | 635 | add		%o0, 2, %o0 | 
|  | 636 | 1: | 
|  | 637 | be		1f | 
|  | 638 | nop | 
|  | 639 |  | 
|  | 640 | ldub		[%o1], %g2 | 
|  | 641 | stb		%g2, [%o0] | 
|  | 642 | 1: | 
|  | 643 | retl | 
|  | 644 | RETL_INSN | 
|  | 645 |  | 
|  | 646 | 82:	/* ldd_std */ | 
|  | 647 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 648 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 649 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 650 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | 
|  | 651 | subcc		%g7, 128, %g7 | 
|  | 652 | add		%o1, 128, %o1 | 
|  | 653 | bne		82b | 
|  | 654 | add		%o0, 128, %o0 | 
|  | 655 |  | 
|  | 656 | #ifndef FASTER_ALIGNED | 
|  | 657 |  | 
|  | 658 | andcc		%g1, 0x70, %g7 | 
|  | 659 | be		80b | 
|  | 660 | andcc		%g1, 8, %g0 | 
|  | 661 |  | 
|  | 662 | sethi		%hi(80b), %o5 | 
|  | 663 | srl		%g7, 1, %o4 | 
|  | 664 | add		%g7, %o4, %o4 | 
|  | 665 | add		%o1, %g7, %o1 | 
|  | 666 | sub		%o5, %o4, %o5 | 
|  | 667 | jmpl		%o5 + %lo(80b), %g0 | 
|  | 668 | add		%o0, %g7, %o0 | 
|  | 669 |  | 
|  | 670 | #else /* FASTER_ALIGNED */ | 
|  | 671 |  | 
|  | 672 | andcc		%g1, 0x70, %g7 | 
|  | 673 | be		84f | 
|  | 674 | andcc		%g1, 8, %g0 | 
|  | 675 |  | 
|  | 676 | sethi		%hi(84f), %o5 | 
|  | 677 | add		%o1, %g7, %o1 | 
|  | 678 | sub		%o5, %g7, %o5 | 
|  | 679 | jmpl		%o5 + %lo(84f), %g0 | 
|  | 680 | add		%o0, %g7, %o0 | 
|  | 681 |  | 
|  | 682 | 83:	/* amemcpy_table */ | 
|  | 683 |  | 
|  | 684 | MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | 
|  | 685 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | 
|  | 686 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | 
|  | 687 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | 
|  | 688 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | 
|  | 689 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | 
|  | 690 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | 
|  | 691 |  | 
|  | 692 | 84:	/* amemcpy_table_end */ | 
|  | 693 | be		85f | 
|  | 694 | andcc		%g1, 4, %g0 | 
|  | 695 |  | 
|  | 696 | ldd		[%o1], %g2 | 
|  | 697 | add		%o0, 8, %o0 | 
|  | 698 | std		%g2, [%o0 - 0x08] | 
|  | 699 | add		%o1, 8, %o1 | 
|  | 700 | 85:	/* amemcpy_last7 */ | 
|  | 701 | be		1f | 
|  | 702 | andcc		%g1, 2, %g0 | 
|  | 703 |  | 
|  | 704 | ld		[%o1], %g2 | 
|  | 705 | add		%o1, 4, %o1 | 
|  | 706 | st		%g2, [%o0] | 
|  | 707 | add		%o0, 4, %o0 | 
|  | 708 | 1: | 
|  | 709 | be		1f | 
|  | 710 | andcc		%g1, 1, %g0 | 
|  | 711 |  | 
|  | 712 | lduh		[%o1], %g2 | 
|  | 713 | add		%o1, 2, %o1 | 
|  | 714 | sth		%g2, [%o0] | 
|  | 715 | add		%o0, 2, %o0 | 
|  | 716 | 1: | 
|  | 717 | be		1f | 
|  | 718 | nop | 
|  | 719 |  | 
|  | 720 | ldub		[%o1], %g2 | 
|  | 721 | stb		%g2, [%o0] | 
|  | 722 | 1: | 
|  | 723 | retl | 
|  | 724 | RETL_INSN | 
|  | 725 |  | 
|  | 726 | #endif /* FASTER_ALIGNED */ | 
|  | 727 |  | 
|  | 728 | 86:	/* non_aligned */ | 
|  | 729 | cmp		%o2, 6 | 
|  | 730 | bleu		88f | 
|  | 731 |  | 
|  | 732 | #ifdef FASTER_NONALIGNED | 
|  | 733 |  | 
|  | 734 | cmp		%o2, 256 | 
|  | 735 | bcc		87f | 
|  | 736 |  | 
|  | 737 | #endif /* FASTER_NONALIGNED */ | 
|  | 738 |  | 
|  | 739 | andcc		%o0, 3, %g0 | 
|  | 740 | be		61f | 
|  | 741 | andcc		%o0, 1, %g0 | 
|  | 742 | be		60f | 
|  | 743 | andcc		%o0, 2, %g0 | 
|  | 744 |  | 
|  | 745 | ldub		[%o1], %g5 | 
|  | 746 | add		%o1, 1, %o1 | 
|  | 747 | stb		%g5, [%o0] | 
|  | 748 | sub		%o2, 1, %o2 | 
|  | 749 | bne		61f | 
|  | 750 | add		%o0, 1, %o0 | 
|  | 751 | 60: | 
|  | 752 | ldub		[%o1], %g3 | 
|  | 753 | add		%o1, 2, %o1 | 
|  | 754 | stb		%g3, [%o0] | 
|  | 755 | sub		%o2, 2, %o2 | 
|  | 756 | ldub		[%o1 - 1], %g3 | 
|  | 757 | add		%o0, 2, %o0 | 
|  | 758 | stb		%g3, [%o0 - 1] | 
|  | 759 | 61: | 
|  | 760 | and		%o1, 3, %g2 | 
|  | 761 | and		%o2, 0xc, %g3 | 
|  | 762 | and		%o1, -4, %o1 | 
|  | 763 | cmp		%g3, 4 | 
|  | 764 | sll		%g2, 3, %g4 | 
|  | 765 | mov		32, %g2 | 
|  | 766 | be		4f | 
|  | 767 | sub		%g2, %g4, %g7 | 
|  | 768 |  | 
|  | 769 | blu		3f | 
|  | 770 | cmp		%g3, 0x8 | 
|  | 771 |  | 
|  | 772 | be		2f | 
|  | 773 | srl		%o2, 2, %g3 | 
|  | 774 |  | 
|  | 775 | ld		[%o1], %o3 | 
|  | 776 | add		%o0, -8, %o0 | 
|  | 777 | ld		[%o1 + 4], %o4 | 
|  | 778 | b		8f | 
|  | 779 | add		%g3, 1, %g3 | 
|  | 780 | 2: | 
|  | 781 | ld		[%o1], %o4 | 
|  | 782 | add		%o0, -12, %o0 | 
|  | 783 | ld		[%o1 + 4], %o5 | 
|  | 784 | add		%g3, 2, %g3 | 
|  | 785 | b		9f | 
|  | 786 | add		%o1, -4, %o1 | 
|  | 787 | 3: | 
|  | 788 | ld		[%o1], %g1 | 
|  | 789 | add		%o0, -4, %o0 | 
|  | 790 | ld		[%o1 + 4], %o3 | 
|  | 791 | srl		%o2, 2, %g3 | 
|  | 792 | b		7f | 
|  | 793 | add		%o1, 4, %o1 | 
|  | 794 | 4: | 
|  | 795 | ld		[%o1], %o5 | 
|  | 796 | cmp		%o2, 7 | 
|  | 797 | ld		[%o1 + 4], %g1 | 
|  | 798 | srl		%o2, 2, %g3 | 
|  | 799 | bleu		10f | 
|  | 800 | add		%o1, 8, %o1 | 
|  | 801 |  | 
|  | 802 | ld		[%o1], %o3 | 
|  | 803 | add		%g3, -1, %g3 | 
|  | 804 | 5: | 
|  | 805 | sll		%o5, %g4, %g2 | 
|  | 806 | srl		%g1, %g7, %g5 | 
|  | 807 | or		%g2, %g5, %g2 | 
|  | 808 | st		%g2, [%o0] | 
|  | 809 | 7: | 
|  | 810 | ld		[%o1 + 4], %o4 | 
|  | 811 | sll		%g1, %g4, %g2 | 
|  | 812 | srl		%o3, %g7, %g5 | 
|  | 813 | or		%g2, %g5, %g2 | 
|  | 814 | st		%g2, [%o0 + 4] | 
|  | 815 | 8: | 
|  | 816 | ld		[%o1 + 8], %o5 | 
|  | 817 | sll		%o3, %g4, %g2 | 
|  | 818 | srl		%o4, %g7, %g5 | 
|  | 819 | or		%g2, %g5, %g2 | 
|  | 820 | st		%g2, [%o0 + 8] | 
|  | 821 | 9: | 
|  | 822 | ld		[%o1 + 12], %g1 | 
|  | 823 | sll		%o4, %g4, %g2 | 
|  | 824 | srl		%o5, %g7, %g5 | 
|  | 825 | addcc		%g3, -4, %g3 | 
|  | 826 | or		%g2, %g5, %g2 | 
|  | 827 | add		%o1, 16, %o1 | 
|  | 828 | st		%g2, [%o0 + 12] | 
|  | 829 | add		%o0, 16, %o0 | 
|  | 830 | bne,a		5b | 
|  | 831 | ld		[%o1], %o3 | 
|  | 832 | 10: | 
|  | 833 | sll		%o5, %g4, %g2 | 
|  | 834 | srl		%g1, %g7, %g5 | 
|  | 835 | srl		%g7, 3, %g3 | 
|  | 836 | or		%g2, %g5, %g2 | 
|  | 837 | sub		%o1, %g3, %o1 | 
|  | 838 | andcc		%o2, 2, %g0 | 
|  | 839 | st		%g2, [%o0] | 
|  | 840 | be		1f | 
|  | 841 | andcc		%o2, 1, %g0 | 
|  | 842 |  | 
|  | 843 | ldub		[%o1], %g2 | 
|  | 844 | add		%o1, 2, %o1 | 
|  | 845 | stb		%g2, [%o0 + 4] | 
|  | 846 | add		%o0, 2, %o0 | 
|  | 847 | ldub		[%o1 - 1], %g2 | 
|  | 848 | stb		%g2, [%o0 + 3] | 
|  | 849 | 1: | 
|  | 850 | be		1f | 
|  | 851 | nop | 
|  | 852 | ldub		[%o1], %g2 | 
|  | 853 | stb		%g2, [%o0 + 4] | 
|  | 854 | 1: | 
|  | 855 | retl | 
|  | 856 | RETL_INSN | 
|  | 857 |  | 
|  | 858 | #ifdef FASTER_NONALIGNED | 
|  | 859 |  | 
|  | 860 | 87:	/* faster_nonaligned */ | 
|  | 861 |  | 
|  | 862 | andcc		%o1, 3, %g0 | 
|  | 863 | be		3f | 
|  | 864 | andcc		%o1, 1, %g0 | 
|  | 865 |  | 
|  | 866 | be		4f | 
|  | 867 | andcc		%o1, 2, %g0 | 
|  | 868 |  | 
|  | 869 | ldub		[%o1], %g2 | 
|  | 870 | add		%o1, 1, %o1 | 
|  | 871 | stb		%g2, [%o0] | 
|  | 872 | sub		%o2, 1, %o2 | 
|  | 873 | bne		3f | 
|  | 874 | add		%o0, 1, %o0 | 
|  | 875 | 4: | 
|  | 876 | lduh		[%o1], %g2 | 
|  | 877 | add		%o1, 2, %o1 | 
|  | 878 | srl		%g2, 8, %g3 | 
|  | 879 | sub		%o2, 2, %o2 | 
|  | 880 | stb		%g3, [%o0] | 
|  | 881 | add		%o0, 2, %o0 | 
|  | 882 | stb		%g2, [%o0 - 1] | 
|  | 883 | 3: | 
|  | 884 | andcc		%o1, 4, %g0 | 
|  | 885 |  | 
|  | 886 | bne		2f | 
|  | 887 | cmp		%o5, 1 | 
|  | 888 |  | 
|  | 889 | ld		[%o1], %o4 | 
|  | 890 | srl		%o4, 24, %g2 | 
|  | 891 | stb		%g2, [%o0] | 
|  | 892 | srl		%o4, 16, %g3 | 
|  | 893 | stb		%g3, [%o0 + 1] | 
|  | 894 | srl		%o4, 8, %g2 | 
|  | 895 | stb		%g2, [%o0 + 2] | 
|  | 896 | sub		%o2, 4, %o2 | 
|  | 897 | stb		%o4, [%o0 + 3] | 
|  | 898 | add		%o1, 4, %o1 | 
|  | 899 | add		%o0, 4, %o0 | 
|  | 900 | 2: | 
|  | 901 | be		33f | 
|  | 902 | cmp		%o5, 2 | 
|  | 903 | be		32f | 
|  | 904 | sub		%o2, 4, %o2 | 
|  | 905 | 31: | 
|  | 906 | ld		[%o1], %g2 | 
|  | 907 | add		%o1, 4, %o1 | 
|  | 908 | srl		%g2, 24, %g3 | 
|  | 909 | and		%o0, 7, %g5 | 
|  | 910 | stb		%g3, [%o0] | 
|  | 911 | cmp		%g5, 7 | 
|  | 912 | sll		%g2, 8, %g1 | 
|  | 913 | add		%o0, 4, %o0 | 
|  | 914 | be		41f | 
|  | 915 | and		%o2, 0xffffffc0, %o3 | 
|  | 916 | ld		[%o0 - 7], %o4 | 
|  | 917 | 4: | 
|  | 918 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 919 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 920 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 921 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 922 | subcc		%o3, 64, %o3 | 
|  | 923 | add		%o1, 64, %o1 | 
|  | 924 | bne		4b | 
|  | 925 | add		%o0, 64, %o0 | 
|  | 926 |  | 
|  | 927 | andcc		%o2, 0x30, %o3 | 
|  | 928 | be,a		1f | 
|  | 929 | srl		%g1, 16, %g2 | 
|  | 930 | 4: | 
|  | 931 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 932 | subcc		%o3, 16, %o3 | 
|  | 933 | add		%o1, 16, %o1 | 
|  | 934 | bne		4b | 
|  | 935 | add		%o0, 16, %o0 | 
|  | 936 |  | 
|  | 937 | srl		%g1, 16, %g2 | 
|  | 938 | 1: | 
|  | 939 | st		%o4, [%o0 - 7] | 
|  | 940 | sth		%g2, [%o0 - 3] | 
|  | 941 | srl		%g1, 8, %g4 | 
|  | 942 | b		88f | 
|  | 943 | stb		%g4, [%o0 - 1] | 
|  | 944 | 32: | 
|  | 945 | ld		[%o1], %g2 | 
|  | 946 | add		%o1, 4, %o1 | 
|  | 947 | srl		%g2, 16, %g3 | 
|  | 948 | and		%o0, 7, %g5 | 
|  | 949 | sth		%g3, [%o0] | 
|  | 950 | cmp		%g5, 6 | 
|  | 951 | sll		%g2, 16, %g1 | 
|  | 952 | add		%o0, 4, %o0 | 
|  | 953 | be		42f | 
|  | 954 | and		%o2, 0xffffffc0, %o3 | 
|  | 955 | ld		[%o0 - 6], %o4 | 
|  | 956 | 4: | 
|  | 957 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 958 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 959 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 960 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 961 | subcc		%o3, 64, %o3 | 
|  | 962 | add		%o1, 64, %o1 | 
|  | 963 | bne		4b | 
|  | 964 | add		%o0, 64, %o0 | 
|  | 965 |  | 
|  | 966 | andcc		%o2, 0x30, %o3 | 
|  | 967 | be,a		1f | 
|  | 968 | srl		%g1, 16, %g2 | 
|  | 969 | 4: | 
|  | 970 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 971 | subcc		%o3, 16, %o3 | 
|  | 972 | add		%o1, 16, %o1 | 
|  | 973 | bne		4b | 
|  | 974 | add		%o0, 16, %o0 | 
|  | 975 |  | 
|  | 976 | srl		%g1, 16, %g2 | 
|  | 977 | 1: | 
|  | 978 | st		%o4, [%o0 - 6] | 
|  | 979 | b		88f | 
|  | 980 | sth		%g2, [%o0 - 2] | 
|  | 981 | 33: | 
|  | 982 | ld		[%o1], %g2 | 
|  | 983 | sub		%o2, 4, %o2 | 
|  | 984 | srl		%g2, 24, %g3 | 
|  | 985 | and		%o0, 7, %g5 | 
|  | 986 | stb		%g3, [%o0] | 
|  | 987 | cmp		%g5, 5 | 
|  | 988 | srl		%g2, 8, %g4 | 
|  | 989 | sll		%g2, 24, %g1 | 
|  | 990 | sth		%g4, [%o0 + 1] | 
|  | 991 | add		%o1, 4, %o1 | 
|  | 992 | be		43f | 
|  | 993 | and		%o2, 0xffffffc0, %o3 | 
|  | 994 |  | 
|  | 995 | ld		[%o0 - 1], %o4 | 
|  | 996 | add		%o0, 4, %o0 | 
|  | 997 | 4: | 
|  | 998 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | 
|  | 999 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | 
|  | 1000 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | 
|  | 1001 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | 
|  | 1002 | subcc		%o3, 64, %o3 | 
|  | 1003 | add		%o1, 64, %o1 | 
|  | 1004 | bne		4b | 
|  | 1005 | add		%o0, 64, %o0 | 
|  | 1006 |  | 
|  | 1007 | andcc		%o2, 0x30, %o3 | 
|  | 1008 | be,a		1f | 
|  | 1009 | srl		%g1, 24, %g2 | 
|  | 1010 | 4: | 
|  | 1011 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | 
|  | 1012 | subcc		%o3, 16, %o3 | 
|  | 1013 | add		%o1, 16, %o1 | 
|  | 1014 | bne		4b | 
|  | 1015 | add		%o0, 16, %o0 | 
|  | 1016 |  | 
|  | 1017 | srl		%g1, 24, %g2 | 
|  | 1018 | 1: | 
|  | 1019 | st		%o4, [%o0 - 5] | 
|  | 1020 | b		88f | 
|  | 1021 | stb		%g2, [%o0 - 1] | 
|  | 1022 | 41: | 
|  | 1023 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 1024 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 1025 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 1026 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 1027 | subcc		%o3, 64, %o3 | 
|  | 1028 | add		%o1, 64, %o1 | 
|  | 1029 | bne		41b | 
|  | 1030 | add		%o0, 64, %o0 | 
|  | 1031 |  | 
|  | 1032 | andcc		%o2, 0x30, %o3 | 
|  | 1033 | be,a		1f | 
|  | 1034 | srl		%g1, 16, %g2 | 
|  | 1035 | 4: | 
|  | 1036 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | 
|  | 1037 | subcc		%o3, 16, %o3 | 
|  | 1038 | add		%o1, 16, %o1 | 
|  | 1039 | bne		4b | 
|  | 1040 | add		%o0, 16, %o0 | 
|  | 1041 |  | 
|  | 1042 | srl		%g1, 16, %g2 | 
|  | 1043 | 1: | 
|  | 1044 | sth		%g2, [%o0 - 3] | 
|  | 1045 | srl		%g1, 8, %g4 | 
|  | 1046 | b		88f | 
|  | 1047 | stb		%g4, [%o0 - 1] | 
|  | 1048 | 43: | 
|  | 1049 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | 
|  | 1050 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | 
|  | 1051 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | 
|  | 1052 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | 
|  | 1053 | subcc		%o3, 64, %o3 | 
|  | 1054 | add		%o1, 64, %o1 | 
|  | 1055 | bne		43b | 
|  | 1056 | add		%o0, 64, %o0 | 
|  | 1057 |  | 
|  | 1058 | andcc		%o2, 0x30, %o3 | 
|  | 1059 | be,a		1f | 
|  | 1060 | srl		%g1, 24, %g2 | 
|  | 1061 | 4: | 
|  | 1062 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | 
|  | 1063 | subcc		%o3, 16, %o3 | 
|  | 1064 | add		%o1, 16, %o1 | 
|  | 1065 | bne		4b | 
|  | 1066 | add		%o0, 16, %o0 | 
|  | 1067 |  | 
|  | 1068 | srl		%g1, 24, %g2 | 
|  | 1069 | 1: | 
|  | 1070 | stb		%g2, [%o0 + 3] | 
|  | 1071 | b		88f | 
|  | 1072 | add		%o0, 4, %o0 | 
|  | 1073 | 42: | 
|  | 1074 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 1075 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 1076 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 1077 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 1078 | subcc		%o3, 64, %o3 | 
|  | 1079 | add		%o1, 64, %o1 | 
|  | 1080 | bne		42b | 
|  | 1081 | add		%o0, 64, %o0 | 
|  | 1082 |  | 
|  | 1083 | andcc		%o2, 0x30, %o3 | 
|  | 1084 | be,a		1f | 
|  | 1085 | srl		%g1, 16, %g2 | 
|  | 1086 | 4: | 
|  | 1087 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | 
|  | 1088 | subcc		%o3, 16, %o3 | 
|  | 1089 | add		%o1, 16, %o1 | 
|  | 1090 | bne		4b | 
|  | 1091 | add		%o0, 16, %o0 | 
|  | 1092 |  | 
|  | 1093 | srl		%g1, 16, %g2 | 
|  | 1094 | 1: | 
|  | 1095 | sth		%g2, [%o0 - 2] | 
|  | 1096 |  | 
|  | 1097 | /* Fall through */ | 
|  | 1098 |  | 
|  | 1099 | #endif /* FASTER_NONALIGNED */ | 
|  | 1100 |  | 
|  | 1101 | 88:	/* short_end */ | 
|  | 1102 |  | 
|  | 1103 | and		%o2, 0xe, %o3 | 
|  | 1104 | 20: | 
|  | 1105 | sethi		%hi(89f), %o5 | 
|  | 1106 | sll		%o3, 3, %o4 | 
|  | 1107 | add		%o0, %o3, %o0 | 
|  | 1108 | sub		%o5, %o4, %o5 | 
|  | 1109 | add		%o1, %o3, %o1 | 
|  | 1110 | jmpl		%o5 + %lo(89f), %g0 | 
|  | 1111 | andcc		%o2, 1, %g0 | 
|  | 1112 |  | 
|  | 1113 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | 
|  | 1114 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | 
|  | 1115 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | 
|  | 1116 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | 
|  | 1117 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | 
|  | 1118 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | 
|  | 1119 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | 
|  | 1120 |  | 
|  | 1121 | 89:	/* short_table_end */ | 
|  | 1122 |  | 
|  | 1123 | be		1f | 
|  | 1124 | nop | 
|  | 1125 |  | 
|  | 1126 | ldub		[%o1], %g2 | 
|  | 1127 | stb		%g2, [%o0] | 
|  | 1128 | 1: | 
|  | 1129 | retl | 
|  | 1130 | RETL_INSN | 
|  | 1131 |  | 
|  | 1132 | 90:	/* short_aligned_end */ | 
|  | 1133 | bne		88b | 
|  | 1134 | andcc		%o2, 8, %g0 | 
|  | 1135 |  | 
|  | 1136 | be		1f | 
|  | 1137 | andcc		%o2, 4, %g0 | 
|  | 1138 |  | 
|  | 1139 | ld		[%o1 + 0x00], %g2 | 
|  | 1140 | ld		[%o1 + 0x04], %g3 | 
|  | 1141 | add		%o1, 8, %o1 | 
|  | 1142 | st		%g2, [%o0 + 0x00] | 
|  | 1143 | st		%g3, [%o0 + 0x04] | 
|  | 1144 | add		%o0, 8, %o0 | 
|  | 1145 | 1: | 
|  | 1146 | b		81b | 
|  | 1147 | mov		%o2, %g1 |