blob: 30a676722b8fb7b190da51970acb2575caf56b82 [file] [log] [blame]
Brent DeGraafa8c02212012-05-30 22:50:19 -04001@ Copyright (c) 2012, The Linux Foundation. All rights reserved.
2@
3@ Redistribution and use in source and binary forms, with or without
4@ modification, are permitted provided that the following conditions are
5@ met:
6@ * Redistributions of source code must retain the above copyright
7@ notice, this list of conditions and the following disclaimer.
8@ * Redistributions in binary form must reproduce the above
9@ copyright notice, this list of conditions and the following
10@ disclaimer in the documentation and/or other materials provided
11@ with the distribution.
12@ * Neither the name of Code Aurora Forum, Inc. nor the names of its
13@ contributors may be used to endorse or promote products derived
14@ from this software without specific prior written permission.
15@
16@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
17@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
19@ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
20@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27@
28@ Additional notices preserved for attributions purposes only.
29@
30@ ====================================================
31@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
32@
33@ Developed at SunSoft, a Sun Microsystems, Inc. business.
34@ Permission to use, copy, modify, and distribute this
35@ software is freely granted, provided that this notice
36@ is preserved.
37@ ====================================================
38@
39@ ====================================================
40@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
41@
42@ Developed at SunPro, a Sun Microsystems, Inc. business.
43@ Permission to use, copy, modify, and distribute this
44@ software is freely granted, provided that this notice
45@ is preserved.
46@ ====================================================
47
48#include <machine/cpu-features.h>
49#include <machine/asm.h>
50
51#define vmov_f64 fconstd
52
53ENTRY(cos)
54 push {r4, r6, r7, lr}
55 vmov d0, r0, r1
56 mov r2, r0
57 mov r3, r1
58 movw r1, #0x21fb
59 movt r1, #0x3fe9
60 mov r4, r3
61 bic r3, r3, #0x80000000
62 sub sp, sp, #48
63 cmp r3, r1
64 bgt .Lxgtpio4
65 cmp r3, #0x3e400000
66 bge .Lxnottiny
67 vcvt.s32.f64 s15, d0
68 vmov r3, s15
69 cmp r3, #0
70 beq .Lreturnone
71.Lxnottiny:
72 vmov.i64 d1, #0
73 bl __kernel_cos
74.Lleave_cos:
75 vmov r0, r1, d0
76.Lleave_cos_direct:
77 add sp, sp, #48
78 pop {r4, r6, r7, pc}
79.Lxgtpio4:
80 movw r2, #0xffff
81 movt r2, #0x7fef
82 cmp r3, r2
83 bgt .LxisNaN
84 movw r0, #0xd97b
85 movt r0, #0x4002
86 cmp r3, r0
87 movw r2, #0x21fb
88 bgt .Lxge3pio4
89 cmp r4, #0
90 movt r2, #0x3ff9
91 ble .Lsmallxisnegative
92 vldr d16, .Lpio2_1
93 cmp r3, r2
94 vsub.f64 d16, d0, d16
95 beq .Lxnearpio2
96 vldr d17, .Lpio2_1t
97.Lfinalizesmallxremainder:
98 vsub.f64 d0, d16, d17
99 vsub.f64 d16, d16, d0
100 vstr d0, [sp, #8]
101 vsub.f64 d1, d16, d17
102 vstr d1, [sp, #16]
103.Lnmod3is1:
104 mov r0, #1
105 bl __kernel_sin
106 vneg.f64 d0, d0
107 b .Lleave_cos
108.Lreturnone:
109 mov r0, #0
110 movw r1, #0x0000
111 movt r1, #0x3ff0
112 vmov_f64 d0, #0x70
113 b .Lleave_cos_direct
114.LxisNaN:
115 vsub.f64 d0, d0, d0
116 b .Lleave_cos
117.Lxge3pio4:
118 movt r2, #0x4139
119 cmp r3, r2
120 bgt .Lxgigantic
121 vmov_f64 d3, #0x60
122 vldr d2, .Linvpio2
123 vldr d18, .Lpio2_1
124 vabs.f64 d16, d0
125 vmla.f64 d3, d16, d2
126 vcvt.s32.f64 s3, d3
127 vcvt.f64.s32 d17, s3
128 vmov r0, s3
129 cmp r0, #31
130 vmls.f64 d16, d17, d18
131 vldr d18, .Lpio2_1t
132 vmul.f64 d18, d17, d18
133 bgt .Lcomputeremainder
134 ldr r2, .Lnpio2_hw_ptr
135 sub lr, r0, #1
136.LPICnpio2_hw0:
137 add r12, pc, r2
138 ldr r1, [r12, lr, lsl #2]
139 cmp r3, r1
140 beq .Lcomputeremainder
141.Lfinishthirditeration:
142 vsub.f64 d0, d16, d18
143 vstr d0, [sp, #8]
144.Lfinishcomputingremainder:
145 vsub.f64 d16, d16, d0
146 cmp r4, #0
147 vsub.f64 d1, d16, d18
148 vstr d1, [sp, #16]
149 blt .Lhandlenegativex
150.Lselectregion:
151 and r0, r0, #3
152 cmp r0, #1
153 beq .Lnmod3is1
154 cmp r0, #2
155 beq .Lnmod3is2
156 cmp r0, #0
157 bne .Lnmod3is0
158 bl __kernel_cos
159 b .Lleave_cos
160.Lxgigantic:
161 asr r2, r3, #20
162 vmov r6, r7, d0
163 sub r2, r2, #1040
164 mov r0, r6
165 sub r2, r2, #6
166 vldr d16, .Ltwo24
167 sub r1, r3, r2, lsl #20
168 vmov d18, r0, r1
169 vcvt.s32.f64 s15, d18
170 add r1, sp, #48
171 mov r3, #3
172 vcvt.f64.s32 d17, s15
173 vsub.f64 d18, d18, d17
174 vstr d17, [sp, #24]
175 vmul.f64 d18, d18, d16
176 vcvt.s32.f64 s15, d18
177 vcvt.f64.s32 d17, s15
178 vsub.f64 d18, d18, d17
179 vstr d17, [sp, #32]
180 vmul.f64 d16, d18, d16
181 fcmpzd d16
182 vstmdb r1!, {d16}
183 vmrs APSR_nzcv, fpscr
184 bne .Lprocessnonzeroterm
185.Lskipzeroterms:
186 vldmdb r1!, {d16}
187 sub r3, r3, #1
188 fcmpzd d16
189 vmrs APSR_nzcv, fpscr
190 beq .Lskipzeroterms
191.Lprocessnonzeroterm:
192 ldr r12, .Ltwo_over_pi_ptr
193 add r0, sp, #24
194 add r1, sp, #8
195.LPICtwo_over_pi0:
196 add lr, pc, r12
197 mov r12, #2
198 str lr, [sp, #4]
199 str r12, [sp]
200 bl __kernel_rem_pio2
201 cmp r4, #0
202 vldr d0, [sp, #8]
203 blt .Lhandlenegativxalso
204 vldr d1, [sp, #16]
205 b .Lselectregion
206.Lxnearpio2:
207 vldr d17, .Lpio2_2
208 vsub.f64 d16, d16, d17
209 vldr d17, .Lpio2_2t
210 b .Lfinalizesmallxremainder
211.Lsmallxisnegative:
212 vldr d1, .Lpio2_1
213 cmp r3, r2
214 vadd.f64 d16, d0, d1
215 beq .Lxnearnegpio2
216 vldr d17, .Lpio2_1t
217.Lfinalizesmallnegxremainder:
218 vadd.f64 d0, d16, d17
219 vsub.f64 d16, d16, d0
220 vstr d0, [sp, #8]
221 vadd.f64 d1, d16, d17
222 vstr d1, [sp, #16]
223.Lnmod3is0:
224 mov r0, #1
225 bl __kernel_sin
226 b .Lleave_cos
227.Lnmod3is2:
228 bl __kernel_cos
229 vneg.f64 d0, d0
230 b .Lleave_cos
231.Lcomputeremainder:
232 vsub.f64 d0, d16, d18
233 asr r1, r3, #20
234 vmov r2, r3, d0
235 ubfx r3, r3, #20, #11
236 rsb r3, r3, r1
237 vstr d0, [sp, #8]
238 cmp r3, #16
239 ble .Lfinishcomputingremainder
240 vldr d18, .Lpio2_2
241 vmul.f64 d20, d17, d18
242 vsub.f64 d19, d16, d20
243 vsub.f64 d16, d16, d19
244 vsub.f64 d18, d16, d20
245 vldr d16, .Lpio2_2t
246 vnmls.f64 d18, d17, d16
247 vsub.f64 d0, d19, d18
248 vmov r2, r3, d0
249 ubfx r3, r3, #20, #11
250 rsb r1, r3, r1
251 vstr d0, [sp, #8]
252 cmp r1, #49
253 ble .Lfinishseconditeration
254 vldr d5, .Lpio2_3
255 vmul.f64 d20, d17, d5
256 vsub.f64 d16, d19, d20
257 vsub.f64 d4, d19, d16
258 vldr d19, .Lpio2_3t
259 vsub.f64 d18, d4, d20
260 vnmls.f64 d18, d17, d19
261 b .Lfinishthirditeration
262.Lhandlenegativex:
263 vneg.f64 d0, d0
264 rsb r0, r0, #0
265 vneg.f64 d1, d1
266 vstr d0, [sp, #8]
267 vstr d1, [sp, #16]
268 b .Lselectregion
269.Lfinishseconditeration:
270 vmov d16, d19
271 b .Lfinishcomputingremainder
272.Lxnearnegpio2:
273 vldr d0, .Lpio2_2
274 vldr d17, .Lpio2_2t
275 vadd.f64 d16, d16, d0
276 b .Lfinalizesmallnegxremainder
277.Lhandlenegativxalso:
278 vldr d6, [sp, #16]
279 vneg.f64 d0, d0
280 rsb r0, r0, #0
281 vneg.f64 d1, d6
282 vstr d0, [sp, #8]
283 vstr d1, [sp, #16]
284 b .Lselectregion
285
286.align 3
287.Lpio2_1:
288 .word 0x54400000, 0x3ff921fb
289.Lpio2_1t:
290 .word 0x1a626331, 0x3dd0b461
291.Linvpio2:
292 .word 0x6dc9c883, 0x3fe45f30
293.Ltwo24:
294 .word 0x00000000, 0x41700000
295.Lpio2_2:
296 .word 0x1a600000, 0x3dd0b461
297.Lpio2_2t:
298 .word 0x2e037073, 0x3ba3198a
299.Lpio2_3:
300 .word 0x2e000000, 0x3ba3198a
301.Lpio2_3t:
302 .word 0x252049c1, 0x397b839a
303.Lnpio2_hw_ptr:
304 .word .Lnpio2_hw-(.LPICnpio2_hw0+8)
305.Ltwo_over_pi_ptr:
306 .word .Ltwo_over_pi-(.LPICtwo_over_pi0+8)
307END(cos)
308
309 .section .rodata.npio2_hw,"a",%progbits
310 .align 2
311.Lnpio2_hw = . + 0
312 .type npio2_hw, %object
313 .size npio2_hw, 128
314npio2_hw:
315 .word 0x3ff921fb
316 .word 0x400921fb
317 .word 0x4012d97c
318 .word 0x401921fb
319 .word 0x401f6a7a
320 .word 0x4022d97c
321 .word 0x4025fdbb
322 .word 0x402921fb
323 .word 0x402c463a
324 .word 0x402f6a7a
325 .word 0x4031475c
326 .word 0x4032d97c
327 .word 0x40346b9c
328 .word 0x4035fdbb
329 .word 0x40378fdb
330 .word 0x403921fb
331 .word 0x403ab41b
332 .word 0x403c463a
333 .word 0x403dd85a
334 .word 0x403f6a7a
335 .word 0x40407e4c
336 .word 0x4041475c
337 .word 0x4042106c
338 .word 0x4042d97c
339 .word 0x4043a28c
340 .word 0x40446b9c
341 .word 0x404534ac
342 .word 0x4045fdbb
343 .word 0x4046c6cb
344 .word 0x40478fdb
345 .word 0x404858eb
346 .word 0x404921fb
347
348 .section .rodata.two_over_pi,"a",%progbits
349 .align 2
350.Ltwo_over_pi = . + 0
351 .type two_over_pi, %object
352 .size two_over_pi, 264
353two_over_pi:
354 .word 0x00a2f983
355 .word 0x006e4e44
356 .word 0x001529fc
357 .word 0x002757d1
358 .word 0x00f534dd
359 .word 0x00c0db62
360 .word 0x0095993c
361 .word 0x00439041
362 .word 0x00fe5163
363 .word 0x00abdebb
364 .word 0x00c561b7
365 .word 0x00246e3a
366 .word 0x00424dd2
367 .word 0x00e00649
368 .word 0x002eea09
369 .word 0x00d1921c
370 .word 0x00fe1deb
371 .word 0x001cb129
372 .word 0x00a73ee8
373 .word 0x008235f5
374 .word 0x002ebb44
375 .word 0x0084e99c
376 .word 0x007026b4
377 .word 0x005f7e41
378 .word 0x003991d6
379 .word 0x00398353
380 .word 0x0039f49c
381 .word 0x00845f8b
382 .word 0x00bdf928
383 .word 0x003b1ff8
384 .word 0x0097ffde
385 .word 0x0005980f
386 .word 0x00ef2f11
387 .word 0x008b5a0a
388 .word 0x006d1f6d
389 .word 0x00367ecf
390 .word 0x0027cb09
391 .word 0x00b74f46
392 .word 0x003f669e
393 .word 0x005fea2d
394 .word 0x007527ba
395 .word 0x00c7ebe5
396 .word 0x00f17b3d
397 .word 0x000739f7
398 .word 0x008a5292
399 .word 0x00ea6bfb
400 .word 0x005fb11f
401 .word 0x008d5d08
402 .word 0x00560330
403 .word 0x0046fc7b
404 .word 0x006babf0
405 .word 0x00cfbc20
406 .word 0x009af436
407 .word 0x001da9e3
408 .word 0x0091615e
409 .word 0x00e61b08
410 .word 0x00659985
411 .word 0x005f14a0
412 .word 0x0068408d
413 .word 0x00ffd880
414 .word 0x004d7327
415 .word 0x00310606
416 .word 0x001556ca
417 .word 0x0073a8c9
418 .word 0x0060e27b
419 .word 0x00c08c6b