Brent DeGraaf | a8c0221 | 2012-05-30 22:50:19 -0400 | [diff] [blame^] | 1 | @ Copyright (c) 2012, The Linux Foundation. All rights reserved. |
| 2 | @ |
| 3 | @ Redistribution and use in source and binary forms, with or without |
| 4 | @ modification, are permitted provided that the following conditions are |
| 5 | @ met: |
| 6 | @ * Redistributions of source code must retain the above copyright |
| 7 | @ notice, this list of conditions and the following disclaimer. |
| 8 | @ * Redistributions in binary form must reproduce the above |
| 9 | @ copyright notice, this list of conditions and the following |
| 10 | @ disclaimer in the documentation and/or other materials provided |
| 11 | @ with the distribution. |
| 12 | @ * Neither the name of Code Aurora Forum, Inc. nor the names of its |
| 13 | @ contributors may be used to endorse or promote products derived |
| 14 | @ from this software without specific prior written permission. |
| 15 | @ |
| 16 | @ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED |
| 17 | @ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 18 | @ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT |
| 19 | @ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS |
| 20 | @ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | @ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | @ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| 23 | @ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| 24 | @ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| 25 | @ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN |
| 26 | @ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | @ |
| 28 | @ Additional notices preserved for attributions purposes only. |
| 29 | @ |
| 30 | @ ==================================================== |
| 31 | @ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 32 | @ |
| 33 | @ Developed at SunSoft, a Sun Microsystems, Inc. business. |
| 34 | @ Permission to use, copy, modify, and distribute this |
| 35 | @ software is freely granted, provided that this notice |
| 36 | @ is preserved. |
| 37 | @ ==================================================== |
| 38 | @ |
| 39 | @ ==================================================== |
| 40 | @ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 41 | @ |
| 42 | @ Developed at SunPro, a Sun Microsystems, Inc. business. |
| 43 | @ Permission to use, copy, modify, and distribute this |
| 44 | @ software is freely granted, provided that this notice |
| 45 | @ is preserved. |
| 46 | @ ==================================================== |
| 47 | |
| 48 | #include <machine/cpu-features.h> |
| 49 | #include <machine/asm.h> |
| 50 | |
| 51 | #define vmov_f64 fconstd |
| 52 | |
| 53 | ENTRY(sin) |
| 54 | push {r4, r6, r7, lr} |
| 55 | vmov d0, r0, r1 |
| 56 | mov r2, r0 |
| 57 | mov r3, r1 |
| 58 | movw r1, #0x21fb |
| 59 | movt r1, #0x3fe9 |
| 60 | mov r4, r3 |
| 61 | bic r3, r3, #0x80000000 |
| 62 | sub sp, sp, #48 |
| 63 | cmp r3, r1 |
| 64 | bgt .Lxgtpio4 |
| 65 | cmp r3, #0x3e400000 |
| 66 | bge .Lxnottiny |
| 67 | vcvt.s32.f64 s15, d0 |
| 68 | vmov r3, s15 |
| 69 | cmp r3, #0 |
| 70 | bne .Lxnottiny |
| 71 | .Lleave_sin: |
| 72 | vmov r0, r1, d0 |
| 73 | add sp, sp, #48 |
| 74 | pop {r4, r6, r7, pc} |
| 75 | .Lxgtpio4: |
| 76 | movw r2, #0xffff |
| 77 | movt r2, #0x7fef |
| 78 | cmp r3, r2 |
| 79 | bgt .LxisNaN |
| 80 | movw r0, #0xd97b |
| 81 | movt r0, #0x4002 |
| 82 | cmp r3, r0 |
| 83 | movw r2, #0x21fb |
| 84 | bgt .Lxge3pio4 |
| 85 | cmp r4, #0 |
| 86 | movt r2, #0x3ff9 |
| 87 | ble .Lsmallxisnegative |
| 88 | vldr d16, .Lpio2_1 |
| 89 | cmp r3, r2 |
| 90 | vsub.f64 d16, d0, d16 |
| 91 | beq .Lxnearpio2 |
| 92 | vldr d17, .Lpio2_1t |
| 93 | .Lfinalizesmallxremainder: |
| 94 | vsub.f64 d0, d16, d17 |
| 95 | vsub.f64 d16, d16, d0 |
| 96 | vstr d0, [sp, #8] |
| 97 | vsub.f64 d1, d16, d17 |
| 98 | vstr d1, [sp, #16] |
| 99 | .Lnmod3is1: |
| 100 | bl __kernel_cos |
| 101 | b .Lleave_sin |
| 102 | .Lxnottiny: |
| 103 | vmov.i64 d1, #0 |
| 104 | mov r0, #0 |
| 105 | bl __kernel_sin |
| 106 | b .Lleave_sin |
| 107 | .LxisNaN: |
| 108 | vsub.f64 d0, d0, d0 |
| 109 | b .Lleave_sin |
| 110 | .Lxge3pio4: |
| 111 | movt r2, #0x4139 |
| 112 | cmp r3, r2 |
| 113 | bgt .Lxgigantic |
| 114 | vmov_f64 d3, #0x60 |
| 115 | vldr d2, .Linvpio2 |
| 116 | vldr d18, .Lpio2_1 |
| 117 | vabs.f64 d16, d0 |
| 118 | vmla.f64 d3, d16, d2 |
| 119 | vcvt.s32.f64 s3, d3 |
| 120 | vcvt.f64.s32 d17, s3 |
| 121 | vmov r0, s3 |
| 122 | cmp r0, #31 |
| 123 | vmls.f64 d16, d17, d18 |
| 124 | vldr d18, .Lpio2_1t |
| 125 | vmul.f64 d18, d17, d18 |
| 126 | bgt .Lcomputeremainder |
| 127 | ldr r2, .Lnpio2_hw_ptr |
| 128 | sub lr, r0, #1 |
| 129 | .LPICnpio2_hw0: |
| 130 | add r12, pc, r2 |
| 131 | ldr r1, [r12, lr, lsl #2] |
| 132 | cmp r3, r1 |
| 133 | beq .Lcomputeremainder |
| 134 | .Lfinishthirditeration: |
| 135 | vsub.f64 d0, d16, d18 |
| 136 | vstr d0, [sp, #8] |
| 137 | .Lfinishcomputingremainder: |
| 138 | vsub.f64 d16, d16, d0 |
| 139 | cmp r4, #0 |
| 140 | vsub.f64 d1, d16, d18 |
| 141 | vstr d1, [sp, #16] |
| 142 | blt .Lhandlenegativex |
| 143 | .Lselectregion: |
| 144 | and r0, r0, #3 |
| 145 | cmp r0, #1 |
| 146 | beq .Lnmod3is1 |
| 147 | cmp r0, #2 |
| 148 | beq .Lnmod3is2 |
| 149 | cmp r0, #0 |
| 150 | bne .Lnmod3is0 |
| 151 | mov r0, #1 |
| 152 | bl __kernel_sin |
| 153 | b .Lleave_sin |
| 154 | .Lxgigantic: |
| 155 | asr r2, r3, #20 |
| 156 | vmov r6, r7, d0 |
| 157 | sub r2, r2, #1040 |
| 158 | mov r0, r6 |
| 159 | sub r2, r2, #6 |
| 160 | vldr d16, .Ltwo24 |
| 161 | sub r1, r3, r2, lsl #20 |
| 162 | vmov d18, r0, r1 |
| 163 | vcvt.s32.f64 s15, d18 |
| 164 | add r1, sp, #48 |
| 165 | mov r3, #3 |
| 166 | vcvt.f64.s32 d17, s15 |
| 167 | vsub.f64 d18, d18, d17 |
| 168 | vstr d17, [sp, #24] |
| 169 | vmul.f64 d18, d18, d16 |
| 170 | vcvt.s32.f64 s15, d18 |
| 171 | vcvt.f64.s32 d17, s15 |
| 172 | vsub.f64 d18, d18, d17 |
| 173 | vstr d17, [sp, #32] |
| 174 | vmul.f64 d16, d18, d16 |
| 175 | fcmpzd d16 |
| 176 | vstmdb r1!, {d16} |
| 177 | vmrs APSR_nzcv, fpscr |
| 178 | bne .Lprocessnonzeroterm |
| 179 | .Lskipzeroterms: |
| 180 | vldmdb r1!, {d16} |
| 181 | sub r3, r3, #1 |
| 182 | fcmpzd d16 |
| 183 | vmrs APSR_nzcv, fpscr |
| 184 | beq .Lskipzeroterms |
| 185 | .Lprocessnonzeroterm: |
| 186 | ldr r12, .Ltwo_over_pi_ptr |
| 187 | add r0, sp, #24 |
| 188 | add r1, sp, #8 |
| 189 | .LPICtwo_over_pi0: |
| 190 | add lr, pc, r12 |
| 191 | mov r12, #2 |
| 192 | str lr, [sp, #4] |
| 193 | str r12, [sp] |
| 194 | bl __kernel_rem_pio2 |
| 195 | cmp r4, #0 |
| 196 | vldr d0, [sp, #8] |
| 197 | blt .Lhandlenegativexalso |
| 198 | vldr d1, [sp, #16] |
| 199 | b .Lselectregion |
| 200 | .Lxnearpio2: |
| 201 | vldr d17, .Lpio2_2 |
| 202 | vsub.f64 d16, d16, d17 |
| 203 | vldr d17, .Lpio2_2t |
| 204 | b .Lfinalizesmallxremainder |
| 205 | .Lsmallxisnegative: |
| 206 | vldr d1, .Lpio2_1 |
| 207 | cmp r3, r2 |
| 208 | vadd.f64 d16, d0, d1 |
| 209 | beq .Lxnearnegpio2 |
| 210 | vldr d17, .Lpio2_1t |
| 211 | .Lfinalizesmallnegxremainder: |
| 212 | vadd.f64 d0, d16, d17 |
| 213 | vsub.f64 d16, d16, d0 |
| 214 | vstr d0, [sp, #8] |
| 215 | vadd.f64 d1, d16, d17 |
| 216 | vstr d1, [sp, #16] |
| 217 | .Lnmod3is0: |
| 218 | bl __kernel_cos |
| 219 | vneg.f64 d0, d0 |
| 220 | b .Lleave_sin |
| 221 | .Lnmod3is2: |
| 222 | mov r0, #1 |
| 223 | bl __kernel_sin |
| 224 | vneg.f64 d0, d0 |
| 225 | b .Lleave_sin |
| 226 | .Lcomputeremainder: |
| 227 | vsub.f64 d0, d16, d18 |
| 228 | asr r1, r3, #20 |
| 229 | vmov r2, r3, d0 |
| 230 | ubfx r3, r3, #20, #11 |
| 231 | rsb r3, r3, r1 |
| 232 | vstr d0, [sp, #8] |
| 233 | cmp r3, #16 |
| 234 | ble .Lfinishcomputingremainder |
| 235 | vldr d18, .Lpio2_2 |
| 236 | vmul.f64 d20, d17, d18 |
| 237 | vsub.f64 d19, d16, d20 |
| 238 | vsub.f64 d16, d16, d19 |
| 239 | vsub.f64 d18, d16, d20 |
| 240 | vldr d16, .Lpio2_2t |
| 241 | vnmls.f64 d18, d17, d16 |
| 242 | vsub.f64 d0, d19, d18 |
| 243 | vmov r2, r3, d0 |
| 244 | ubfx r3, r3, #20, #11 |
| 245 | rsb r1, r3, r1 |
| 246 | vstr d0, [sp, #8] |
| 247 | cmp r1, #49 |
| 248 | ble .Lfinishseconditeration |
| 249 | vldr d5, .Lpio2_3 |
| 250 | vmul.f64 d20, d17, d5 |
| 251 | vsub.f64 d16, d19, d20 |
| 252 | vsub.f64 d4, d19, d16 |
| 253 | vldr d19, .Lpio2_3t |
| 254 | vsub.f64 d18, d4, d20 |
| 255 | vnmls.f64 d18, d17, d19 |
| 256 | b .Lfinishthirditeration |
| 257 | .Lhandlenegativex: |
| 258 | vneg.f64 d0, d0 |
| 259 | rsb r0, r0, #0 |
| 260 | vneg.f64 d1, d1 |
| 261 | vstr d0, [sp, #8] |
| 262 | vstr d1, [sp, #16] |
| 263 | b .Lselectregion |
| 264 | .Lfinishseconditeration: |
| 265 | vmov d16, d19 |
| 266 | b .Lfinishcomputingremainder |
| 267 | .Lxnearnegpio2: |
| 268 | vldr d0, .Lpio2_2 |
| 269 | vldr d17, .Lpio2_2t |
| 270 | vadd.f64 d16, d16, d0 |
| 271 | b .Lfinalizesmallnegxremainder |
| 272 | .Lhandlenegativexalso: |
| 273 | vldr d6, [sp, #16] |
| 274 | vneg.f64 d0, d0 |
| 275 | rsb r0, r0, #0 |
| 276 | vneg.f64 d1, d6 |
| 277 | vstr d0, [sp, #8] |
| 278 | vstr d1, [sp, #16] |
| 279 | b .Lselectregion |
| 280 | |
| 281 | .align 3 |
| 282 | .Lpio2_1: |
| 283 | .word 0x54400000, 0x3ff921fb |
| 284 | .Lpio2_1t: |
| 285 | .word 0x1a626331, 0x3dd0b461 |
| 286 | .Linvpio2: |
| 287 | .word 0x6dc9c883, 0x3fe45f30 |
| 288 | .Ltwo24: |
| 289 | .word 0x00000000, 0x41700000 |
| 290 | .Lpio2_2: |
| 291 | .word 0x1a600000, 0x3dd0b461 |
| 292 | .Lpio2_2t: |
| 293 | .word 0x2e037073, 0x3ba3198a |
| 294 | .Lpio2_3: |
| 295 | .word 0x2e000000, 0x3ba3198a |
| 296 | .Lpio2_3t: |
| 297 | .word 0x252049c1, 0x397b839a |
| 298 | .Lnpio2_hw_ptr: |
| 299 | .word .Lnpio2_hw-(.LPICnpio2_hw0+8) |
| 300 | .Ltwo_over_pi_ptr: |
| 301 | .word .Ltwo_over_pi-(.LPICtwo_over_pi0+8) |
| 302 | END(sin) |
| 303 | |
| 304 | .section .rodata.npio2_hw,"a",%progbits |
| 305 | .align 2 |
| 306 | .Lnpio2_hw = . + 0 |
| 307 | .type npio2_hw, %object |
| 308 | .size npio2_hw, 128 |
| 309 | npio2_hw: |
| 310 | .word 0x3ff921fb |
| 311 | .word 0x400921fb |
| 312 | .word 0x4012d97c |
| 313 | .word 0x401921fb |
| 314 | .word 0x401f6a7a |
| 315 | .word 0x4022d97c |
| 316 | .word 0x4025fdbb |
| 317 | .word 0x402921fb |
| 318 | .word 0x402c463a |
| 319 | .word 0x402f6a7a |
| 320 | .word 0x4031475c |
| 321 | .word 0x4032d97c |
| 322 | .word 0x40346b9c |
| 323 | .word 0x4035fdbb |
| 324 | .word 0x40378fdb |
| 325 | .word 0x403921fb |
| 326 | .word 0x403ab41b |
| 327 | .word 0x403c463a |
| 328 | .word 0x403dd85a |
| 329 | .word 0x403f6a7a |
| 330 | .word 0x40407e4c |
| 331 | .word 0x4041475c |
| 332 | .word 0x4042106c |
| 333 | .word 0x4042d97c |
| 334 | .word 0x4043a28c |
| 335 | .word 0x40446b9c |
| 336 | .word 0x404534ac |
| 337 | .word 0x4045fdbb |
| 338 | .word 0x4046c6cb |
| 339 | .word 0x40478fdb |
| 340 | .word 0x404858eb |
| 341 | .word 0x404921fb |
| 342 | |
| 343 | .section .rodata.two_over_pi,"a",%progbits |
| 344 | .align 2 |
| 345 | .Ltwo_over_pi = . + 0 |
| 346 | .type two_over_pi, %object |
| 347 | .size two_over_pi, 264 |
| 348 | two_over_pi: |
| 349 | .word 0x00a2f983 |
| 350 | .word 0x006e4e44 |
| 351 | .word 0x001529fc |
| 352 | .word 0x002757d1 |
| 353 | .word 0x00f534dd |
| 354 | .word 0x00c0db62 |
| 355 | .word 0x0095993c |
| 356 | .word 0x00439041 |
| 357 | .word 0x00fe5163 |
| 358 | .word 0x00abdebb |
| 359 | .word 0x00c561b7 |
| 360 | .word 0x00246e3a |
| 361 | .word 0x00424dd2 |
| 362 | .word 0x00e00649 |
| 363 | .word 0x002eea09 |
| 364 | .word 0x00d1921c |
| 365 | .word 0x00fe1deb |
| 366 | .word 0x001cb129 |
| 367 | .word 0x00a73ee8 |
| 368 | .word 0x008235f5 |
| 369 | .word 0x002ebb44 |
| 370 | .word 0x0084e99c |
| 371 | .word 0x007026b4 |
| 372 | .word 0x005f7e41 |
| 373 | .word 0x003991d6 |
| 374 | .word 0x00398353 |
| 375 | .word 0x0039f49c |
| 376 | .word 0x00845f8b |
| 377 | .word 0x00bdf928 |
| 378 | .word 0x003b1ff8 |
| 379 | .word 0x0097ffde |
| 380 | .word 0x0005980f |
| 381 | .word 0x00ef2f11 |
| 382 | .word 0x008b5a0a |
| 383 | .word 0x006d1f6d |
| 384 | .word 0x00367ecf |
| 385 | .word 0x0027cb09 |
| 386 | .word 0x00b74f46 |
| 387 | .word 0x003f669e |
| 388 | .word 0x005fea2d |
| 389 | .word 0x007527ba |
| 390 | .word 0x00c7ebe5 |
| 391 | .word 0x00f17b3d |
| 392 | .word 0x000739f7 |
| 393 | .word 0x008a5292 |
| 394 | .word 0x00ea6bfb |
| 395 | .word 0x005fb11f |
| 396 | .word 0x008d5d08 |
| 397 | .word 0x00560330 |
| 398 | .word 0x0046fc7b |
| 399 | .word 0x006babf0 |
| 400 | .word 0x00cfbc20 |
| 401 | .word 0x009af436 |
| 402 | .word 0x001da9e3 |
| 403 | .word 0x0091615e |
| 404 | .word 0x00e61b08 |
| 405 | .word 0x00659985 |
| 406 | .word 0x005f14a0 |
| 407 | .word 0x0068408d |
| 408 | .word 0x00ffd880 |
| 409 | .word 0x004d7327 |
| 410 | .word 0x00310606 |
| 411 | .word 0x001556ca |
| 412 | .word 0x0073a8c9 |
| 413 | .word 0x0060e27b |
| 414 | .word 0x00c08c6b |