Shin-ichiro KAWASAKI | 1009327 | 2009-09-28 16:11:39 +0900 | [diff] [blame] | 1 | /* $OpenBSD: memset.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $ */ |
| 2 | /* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ |
| 3 | |
| 4 | /*- |
| 5 | * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * 1. Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * 3. The name of the author may not be used to endorse or promote products |
| 16 | * derived from this software without specific prior written permission. |
| 17 | * |
| 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| 19 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 20 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 21 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 23 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 24 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 25 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | */ |
| 29 | |
| 30 | #include <machine/asm.h> |
| 31 | |
| 32 | #define REG_PTR r0 |
| 33 | #define REG_TMP1 r1 |
| 34 | |
| 35 | #ifdef BZERO |
| 36 | # define REG_C r2 |
| 37 | # define REG_DST r4 |
| 38 | # define REG_LEN r5 |
| 39 | #else |
| 40 | # define REG_DST0 r3 |
| 41 | # define REG_DST r4 |
| 42 | # define REG_C r5 |
| 43 | # define REG_LEN r6 |
| 44 | #endif |
| 45 | |
| 46 | #ifdef BZERO |
| 47 | ENTRY(bzero) |
| 48 | #else |
| 49 | ENTRY(memset) |
| 50 | mov REG_DST,REG_DST0 /* for return value */ |
| 51 | #endif |
| 52 | /* small amount to fill ? */ |
| 53 | mov #28,REG_TMP1 |
| 54 | cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ |
| 55 | bt/s large |
| 56 | mov #12,REG_TMP1 /* if (len >= 12) goto small; */ |
| 57 | cmp/hs REG_TMP1,REG_LEN |
| 58 | bt/s small |
| 59 | #ifdef BZERO |
| 60 | mov #0,REG_C |
| 61 | #endif |
| 62 | /* very little fill (0 ~ 11 bytes) */ |
| 63 | tst REG_LEN,REG_LEN |
| 64 | add REG_DST,REG_LEN |
| 65 | bt/s done |
| 66 | add #1,REG_DST |
| 67 | |
| 68 | /* unroll 4 loops */ |
| 69 | cmp/eq REG_DST,REG_LEN |
| 70 | 1: mov.b REG_C,@-REG_LEN |
| 71 | bt/s done |
| 72 | cmp/eq REG_DST,REG_LEN |
| 73 | mov.b REG_C,@-REG_LEN |
| 74 | bt/s done |
| 75 | cmp/eq REG_DST,REG_LEN |
| 76 | mov.b REG_C,@-REG_LEN |
| 77 | bt/s done |
| 78 | cmp/eq REG_DST,REG_LEN |
| 79 | mov.b REG_C,@-REG_LEN |
| 80 | bf/s 1b |
| 81 | cmp/eq REG_DST,REG_LEN |
| 82 | done: |
| 83 | #ifdef BZERO |
| 84 | rts |
| 85 | nop |
| 86 | #else |
| 87 | rts |
| 88 | mov REG_DST0,r0 |
| 89 | #endif |
| 90 | |
| 91 | |
| 92 | small: |
| 93 | mov REG_DST,r0 |
| 94 | tst #1,r0 |
| 95 | bt/s small_aligned |
| 96 | mov REG_DST,REG_TMP1 |
| 97 | shll REG_LEN |
| 98 | mova 1f,r0 /* 1f must be 4bytes aligned! */ |
| 99 | add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ |
| 100 | sub REG_LEN,r0 |
| 101 | jmp @r0 |
| 102 | mov REG_C,r0 |
| 103 | |
| 104 | .align 2 |
| 105 | mov.b r0,@(15,REG_TMP1) |
| 106 | mov.b r0,@(14,REG_TMP1) |
| 107 | mov.b r0,@(13,REG_TMP1) |
| 108 | mov.b r0,@(12,REG_TMP1) |
| 109 | mov.b r0,@(11,REG_TMP1) |
| 110 | mov.b r0,@(10,REG_TMP1) |
| 111 | mov.b r0,@(9,REG_TMP1) |
| 112 | mov.b r0,@(8,REG_TMP1) |
| 113 | mov.b r0,@(7,REG_TMP1) |
| 114 | mov.b r0,@(6,REG_TMP1) |
| 115 | mov.b r0,@(5,REG_TMP1) |
| 116 | mov.b r0,@(4,REG_TMP1) |
| 117 | mov.b r0,@(3,REG_TMP1) |
| 118 | mov.b r0,@(2,REG_TMP1) |
| 119 | mov.b r0,@(1,REG_TMP1) |
| 120 | mov.b r0,@REG_TMP1 |
| 121 | mov.b r0,@(15,REG_DST) |
| 122 | mov.b r0,@(14,REG_DST) |
| 123 | mov.b r0,@(13,REG_DST) |
| 124 | mov.b r0,@(12,REG_DST) |
| 125 | mov.b r0,@(11,REG_DST) |
| 126 | mov.b r0,@(10,REG_DST) |
| 127 | mov.b r0,@(9,REG_DST) |
| 128 | mov.b r0,@(8,REG_DST) |
| 129 | mov.b r0,@(7,REG_DST) |
| 130 | mov.b r0,@(6,REG_DST) |
| 131 | mov.b r0,@(5,REG_DST) |
| 132 | mov.b r0,@(4,REG_DST) |
| 133 | mov.b r0,@(3,REG_DST) |
| 134 | mov.b r0,@(2,REG_DST) |
| 135 | mov.b r0,@(1,REG_DST) |
| 136 | #ifdef BZERO |
| 137 | rts |
| 138 | 1: mov.b r0,@REG_DST |
| 139 | #else |
| 140 | mov.b r0,@REG_DST |
| 141 | 1: rts |
| 142 | mov REG_DST0,r0 |
| 143 | #endif |
| 144 | |
| 145 | |
| 146 | /* 2 bytes aligned small fill */ |
| 147 | small_aligned: |
| 148 | #ifndef BZERO |
| 149 | extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ |
| 150 | shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ |
| 151 | or REG_TMP1,REG_C /* REG_C = ????xxxx */ |
| 152 | #endif |
| 153 | |
| 154 | mov REG_LEN,r0 |
| 155 | tst #1,r0 /* len is aligned? */ |
| 156 | bt/s 1f |
| 157 | add #-1,r0 |
| 158 | mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ |
| 159 | mov r0,REG_LEN |
| 160 | 1: |
| 161 | |
| 162 | mova 1f,r0 /* 1f must be 4bytes aligned! */ |
| 163 | sub REG_LEN,r0 |
| 164 | jmp @r0 |
| 165 | mov REG_C,r0 |
| 166 | |
| 167 | .align 2 |
| 168 | mov.w r0,@(30,REG_DST) |
| 169 | mov.w r0,@(28,REG_DST) |
| 170 | mov.w r0,@(26,REG_DST) |
| 171 | mov.w r0,@(24,REG_DST) |
| 172 | mov.w r0,@(22,REG_DST) |
| 173 | mov.w r0,@(20,REG_DST) |
| 174 | mov.w r0,@(18,REG_DST) |
| 175 | mov.w r0,@(16,REG_DST) |
| 176 | mov.w r0,@(14,REG_DST) |
| 177 | mov.w r0,@(12,REG_DST) |
| 178 | mov.w r0,@(10,REG_DST) |
| 179 | mov.w r0,@(8,REG_DST) |
| 180 | mov.w r0,@(6,REG_DST) |
| 181 | mov.w r0,@(4,REG_DST) |
| 182 | mov.w r0,@(2,REG_DST) |
| 183 | #ifdef BZERO |
| 184 | rts |
| 185 | 1: mov.w r0,@REG_DST |
| 186 | #else |
| 187 | mov.w r0,@REG_DST |
| 188 | 1: rts |
| 189 | mov REG_DST0,r0 |
| 190 | #endif |
| 191 | |
| 192 | |
| 193 | |
| 194 | .align 2 |
| 195 | large: |
| 196 | #ifdef BZERO |
| 197 | mov #0,REG_C |
| 198 | #else |
| 199 | extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ |
| 200 | shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ |
| 201 | or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ |
| 202 | swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ |
| 203 | xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ |
| 204 | #endif |
| 205 | |
| 206 | mov #3,REG_TMP1 |
| 207 | tst REG_TMP1,REG_DST |
| 208 | mov REG_DST,REG_PTR |
| 209 | bf/s unaligned_dst |
| 210 | add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ |
| 211 | tst REG_TMP1,REG_LEN |
| 212 | bf/s unaligned_len |
| 213 | |
| 214 | aligned: |
| 215 | /* fill 32*n bytes */ |
| 216 | mov #32,REG_TMP1 |
| 217 | cmp/hi REG_LEN,REG_TMP1 |
| 218 | bt 9f |
| 219 | .align 2 |
| 220 | 1: sub REG_TMP1,REG_PTR |
| 221 | mov.l REG_C,@REG_PTR |
| 222 | sub REG_TMP1,REG_LEN |
| 223 | mov.l REG_C,@(4,REG_PTR) |
| 224 | cmp/hi REG_LEN,REG_TMP1 |
| 225 | mov.l REG_C,@(8,REG_PTR) |
| 226 | mov.l REG_C,@(12,REG_PTR) |
| 227 | mov.l REG_C,@(16,REG_PTR) |
| 228 | mov.l REG_C,@(20,REG_PTR) |
| 229 | mov.l REG_C,@(24,REG_PTR) |
| 230 | bf/s 1b |
| 231 | mov.l REG_C,@(28,REG_PTR) |
| 232 | 9: |
| 233 | |
| 234 | /* fill left 4*n bytes */ |
| 235 | cmp/eq REG_DST,REG_PTR |
| 236 | bt 9f |
| 237 | add #4,REG_DST |
| 238 | cmp/eq REG_DST,REG_PTR |
| 239 | 1: mov.l REG_C,@-REG_PTR |
| 240 | bt/s 9f |
| 241 | cmp/eq REG_DST,REG_PTR |
| 242 | mov.l REG_C,@-REG_PTR |
| 243 | bt/s 9f |
| 244 | cmp/eq REG_DST,REG_PTR |
| 245 | mov.l REG_C,@-REG_PTR |
| 246 | bt/s 9f |
| 247 | cmp/eq REG_DST,REG_PTR |
| 248 | mov.l REG_C,@-REG_PTR |
| 249 | bf/s 1b |
| 250 | cmp/eq REG_DST,REG_PTR |
| 251 | 9: |
| 252 | #ifdef BZERO |
| 253 | rts |
| 254 | nop |
| 255 | #else |
| 256 | rts |
| 257 | mov REG_DST0,r0 |
| 258 | #endif |
| 259 | |
| 260 | |
| 261 | unaligned_dst: |
| 262 | mov #1,REG_TMP1 |
| 263 | tst REG_TMP1,REG_DST /* if (dst & 1) { */ |
| 264 | add #1,REG_TMP1 |
| 265 | bt/s 2f |
| 266 | tst REG_TMP1,REG_DST |
| 267 | mov.b REG_C,@REG_DST /* *dst++ = c; */ |
| 268 | add #1,REG_DST |
| 269 | tst REG_TMP1,REG_DST |
| 270 | 2: /* } */ |
| 271 | /* if (dst & 2) { */ |
| 272 | bt 4f |
| 273 | mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ |
| 274 | add #2,REG_DST |
| 275 | 4: /* } */ |
| 276 | |
| 277 | |
| 278 | tst #3,REG_PTR /* if (ptr & 3) { */ |
| 279 | bt/s 4f /* */ |
| 280 | unaligned_len: |
| 281 | tst #1,REG_PTR /* if (ptr & 1) { */ |
| 282 | bt/s 2f |
| 283 | tst #2,REG_PTR |
| 284 | mov.b REG_C,@-REG_PTR /* --ptr = c; */ |
| 285 | 2: /* } */ |
| 286 | /* if (ptr & 2) { */ |
| 287 | bt 4f |
| 288 | mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ |
| 289 | 4: /* } */ |
| 290 | /* } */ |
| 291 | |
| 292 | mov REG_PTR,REG_LEN |
| 293 | bra aligned |
| 294 | sub REG_DST,REG_LEN |
| 295 | |