blob: 73b0d066908a7be0692b3d0613cb5ed12dedac99 [file] [log] [blame]
Shin-ichiro KAWASAKI10093272009-09-28 16:11:39 +09001/* $OpenBSD: memset.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $ */
2/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */
3
4/*-
5 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32#define REG_PTR r0
33#define REG_TMP1 r1
34
35#ifdef BZERO
36# define REG_C r2
37# define REG_DST r4
38# define REG_LEN r5
39#else
40# define REG_DST0 r3
41# define REG_DST r4
42# define REG_C r5
43# define REG_LEN r6
44#endif
45
46#ifdef BZERO
47ENTRY(bzero)
48#else
49ENTRY(memset)
50 mov REG_DST,REG_DST0 /* for return value */
51#endif
52 /* small amount to fill ? */
53 mov #28,REG_TMP1
54 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
55 bt/s large
56 mov #12,REG_TMP1 /* if (len >= 12) goto small; */
57 cmp/hs REG_TMP1,REG_LEN
58 bt/s small
59#ifdef BZERO
60 mov #0,REG_C
61#endif
62 /* very little fill (0 ~ 11 bytes) */
63 tst REG_LEN,REG_LEN
64 add REG_DST,REG_LEN
65 bt/s done
66 add #1,REG_DST
67
68 /* unroll 4 loops */
69 cmp/eq REG_DST,REG_LEN
701: mov.b REG_C,@-REG_LEN
71 bt/s done
72 cmp/eq REG_DST,REG_LEN
73 mov.b REG_C,@-REG_LEN
74 bt/s done
75 cmp/eq REG_DST,REG_LEN
76 mov.b REG_C,@-REG_LEN
77 bt/s done
78 cmp/eq REG_DST,REG_LEN
79 mov.b REG_C,@-REG_LEN
80 bf/s 1b
81 cmp/eq REG_DST,REG_LEN
82done:
83#ifdef BZERO
84 rts
85 nop
86#else
87 rts
88 mov REG_DST0,r0
89#endif
90
91
92small:
93 mov REG_DST,r0
94 tst #1,r0
95 bt/s small_aligned
96 mov REG_DST,REG_TMP1
97 shll REG_LEN
98 mova 1f,r0 /* 1f must be 4bytes aligned! */
99 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
100 sub REG_LEN,r0
101 jmp @r0
102 mov REG_C,r0
103
104 .align 2
105 mov.b r0,@(15,REG_TMP1)
106 mov.b r0,@(14,REG_TMP1)
107 mov.b r0,@(13,REG_TMP1)
108 mov.b r0,@(12,REG_TMP1)
109 mov.b r0,@(11,REG_TMP1)
110 mov.b r0,@(10,REG_TMP1)
111 mov.b r0,@(9,REG_TMP1)
112 mov.b r0,@(8,REG_TMP1)
113 mov.b r0,@(7,REG_TMP1)
114 mov.b r0,@(6,REG_TMP1)
115 mov.b r0,@(5,REG_TMP1)
116 mov.b r0,@(4,REG_TMP1)
117 mov.b r0,@(3,REG_TMP1)
118 mov.b r0,@(2,REG_TMP1)
119 mov.b r0,@(1,REG_TMP1)
120 mov.b r0,@REG_TMP1
121 mov.b r0,@(15,REG_DST)
122 mov.b r0,@(14,REG_DST)
123 mov.b r0,@(13,REG_DST)
124 mov.b r0,@(12,REG_DST)
125 mov.b r0,@(11,REG_DST)
126 mov.b r0,@(10,REG_DST)
127 mov.b r0,@(9,REG_DST)
128 mov.b r0,@(8,REG_DST)
129 mov.b r0,@(7,REG_DST)
130 mov.b r0,@(6,REG_DST)
131 mov.b r0,@(5,REG_DST)
132 mov.b r0,@(4,REG_DST)
133 mov.b r0,@(3,REG_DST)
134 mov.b r0,@(2,REG_DST)
135 mov.b r0,@(1,REG_DST)
136#ifdef BZERO
137 rts
1381: mov.b r0,@REG_DST
139#else
140 mov.b r0,@REG_DST
1411: rts
142 mov REG_DST0,r0
143#endif
144
145
146/* 2 bytes aligned small fill */
147small_aligned:
148#ifndef BZERO
149 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
150 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
151 or REG_TMP1,REG_C /* REG_C = ????xxxx */
152#endif
153
154 mov REG_LEN,r0
155 tst #1,r0 /* len is aligned? */
156 bt/s 1f
157 add #-1,r0
158 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
159 mov r0,REG_LEN
1601:
161
162 mova 1f,r0 /* 1f must be 4bytes aligned! */
163 sub REG_LEN,r0
164 jmp @r0
165 mov REG_C,r0
166
167 .align 2
168 mov.w r0,@(30,REG_DST)
169 mov.w r0,@(28,REG_DST)
170 mov.w r0,@(26,REG_DST)
171 mov.w r0,@(24,REG_DST)
172 mov.w r0,@(22,REG_DST)
173 mov.w r0,@(20,REG_DST)
174 mov.w r0,@(18,REG_DST)
175 mov.w r0,@(16,REG_DST)
176 mov.w r0,@(14,REG_DST)
177 mov.w r0,@(12,REG_DST)
178 mov.w r0,@(10,REG_DST)
179 mov.w r0,@(8,REG_DST)
180 mov.w r0,@(6,REG_DST)
181 mov.w r0,@(4,REG_DST)
182 mov.w r0,@(2,REG_DST)
183#ifdef BZERO
184 rts
1851: mov.w r0,@REG_DST
186#else
187 mov.w r0,@REG_DST
1881: rts
189 mov REG_DST0,r0
190#endif
191
192
193
194 .align 2
195large:
196#ifdef BZERO
197 mov #0,REG_C
198#else
199 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
200 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
201 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
202 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
203 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
204#endif
205
206 mov #3,REG_TMP1
207 tst REG_TMP1,REG_DST
208 mov REG_DST,REG_PTR
209 bf/s unaligned_dst
210 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
211 tst REG_TMP1,REG_LEN
212 bf/s unaligned_len
213
214aligned:
215 /* fill 32*n bytes */
216 mov #32,REG_TMP1
217 cmp/hi REG_LEN,REG_TMP1
218 bt 9f
219 .align 2
2201: sub REG_TMP1,REG_PTR
221 mov.l REG_C,@REG_PTR
222 sub REG_TMP1,REG_LEN
223 mov.l REG_C,@(4,REG_PTR)
224 cmp/hi REG_LEN,REG_TMP1
225 mov.l REG_C,@(8,REG_PTR)
226 mov.l REG_C,@(12,REG_PTR)
227 mov.l REG_C,@(16,REG_PTR)
228 mov.l REG_C,@(20,REG_PTR)
229 mov.l REG_C,@(24,REG_PTR)
230 bf/s 1b
231 mov.l REG_C,@(28,REG_PTR)
2329:
233
234 /* fill left 4*n bytes */
235 cmp/eq REG_DST,REG_PTR
236 bt 9f
237 add #4,REG_DST
238 cmp/eq REG_DST,REG_PTR
2391: mov.l REG_C,@-REG_PTR
240 bt/s 9f
241 cmp/eq REG_DST,REG_PTR
242 mov.l REG_C,@-REG_PTR
243 bt/s 9f
244 cmp/eq REG_DST,REG_PTR
245 mov.l REG_C,@-REG_PTR
246 bt/s 9f
247 cmp/eq REG_DST,REG_PTR
248 mov.l REG_C,@-REG_PTR
249 bf/s 1b
250 cmp/eq REG_DST,REG_PTR
2519:
252#ifdef BZERO
253 rts
254 nop
255#else
256 rts
257 mov REG_DST0,r0
258#endif
259
260
261unaligned_dst:
262 mov #1,REG_TMP1
263 tst REG_TMP1,REG_DST /* if (dst & 1) { */
264 add #1,REG_TMP1
265 bt/s 2f
266 tst REG_TMP1,REG_DST
267 mov.b REG_C,@REG_DST /* *dst++ = c; */
268 add #1,REG_DST
269 tst REG_TMP1,REG_DST
2702: /* } */
271 /* if (dst & 2) { */
272 bt 4f
273 mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
274 add #2,REG_DST
2754: /* } */
276
277
278 tst #3,REG_PTR /* if (ptr & 3) { */
279 bt/s 4f /* */
280unaligned_len:
281 tst #1,REG_PTR /* if (ptr & 1) { */
282 bt/s 2f
283 tst #2,REG_PTR
284 mov.b REG_C,@-REG_PTR /* --ptr = c; */
2852: /* } */
286 /* if (ptr & 2) { */
287 bt 4f
288 mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
2894: /* } */
290 /* } */
291
292 mov REG_PTR,REG_LEN
293 bra aligned
294 sub REG_DST,REG_LEN
295