blob: 4f43a36e55aa1dafbae0f543f87634759f722272 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Let x=2^k * mx, mx in [1,2)
36//
37// Get B~1/mx based on the output of rcpss instruction (B0)
38// B = int((B0*LH*2^7+0.5))/2^7
39// LH is a short approximation for log10(e)
40//
41// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
42//
43// Result: k*log10(2) - log(B) + p(r)
44// p(r) is a degree 7 polynomial
45// -log(B) read from data table (high, low parts)
46// Result is formed from high and low parts
47//
48// Special cases:
49// log10(0) = -INF with divide-by-zero exception raised
50// log10(1) = +0
51// log10(x) = NaN with invalid exception raised if x < -0, including -INF
52// log10(+INF) = +INF
53//
54/******************************************************************************/
55
56#include <private/bionic_asm.h>
57# -- Begin log10
58ENTRY(log10)
59# parameter 1: %xmm0
60..B1.1:
61..___tag_value_log10.1:
62 subq $24, %rsp
63..___tag_value_log10.3:
64 movsd %xmm0, (%rsp)
65..B1.2:
66 xorpd %xmm2, %xmm2
67 movl $16368, %eax
68 pinsrw $3, %eax, %xmm2
69 movl $1054736384, %ecx
70 movd %ecx, %xmm7
71 xorpd %xmm3, %xmm3
72 movl $30704, %edx
73 pinsrw $3, %edx, %xmm3
74 movq %xmm0, %xmm1
75 movl $32768, %edx
76 movd %edx, %xmm4
77 movapd HIGHSIGMASK(%rip), %xmm5
78 pextrw $3, %xmm0, %eax
79 orpd %xmm2, %xmm0
80 movl $16352, %ecx
81 psrlq $27, %xmm0
82 movq LOG10_E(%rip), %xmm2
83 psrld $2, %xmm0
84 rcpps %xmm0, %xmm0
85 psllq $12, %xmm1
86 pshufd $78, %xmm5, %xmm6
87 psrlq $12, %xmm1
88 subl $16, %eax
89 cmpl $32736, %eax
90 jae .L_2TAG_PACKET_0.0.2
91.L_2TAG_PACKET_1.0.2:
92 mulss %xmm7, %xmm0
93 orpd %xmm3, %xmm1
94 lea L_tbl(%rip), %r11
95 andpd %xmm1, %xmm5
96 paddd %xmm4, %xmm0
97 subsd %xmm5, %xmm1
98 movd %xmm0, %edx
99 psllq $29, %xmm0
100 andpd %xmm6, %xmm0
101 andl $32752, %eax
102 subl %ecx, %eax
103 cvtsi2sd %eax, %xmm7
104 mulpd %xmm0, %xmm5
105 mulsd %xmm0, %xmm1
106 movq log2(%rip), %xmm6
107 movapd coeff(%rip), %xmm3
108 subsd %xmm2, %xmm5
109 andl $16711680, %edx
110 shrl $12, %edx
111 movapd -1504(%r11,%rdx), %xmm0
112 movapd 16+coeff(%rip), %xmm4
113 addsd %xmm5, %xmm1
114 movapd 32+coeff(%rip), %xmm2
115 mulsd %xmm7, %xmm6
116 pshufd $68, %xmm1, %xmm5
117 mulsd 8+log2(%rip), %xmm7
118 mulsd %xmm1, %xmm3
119 addsd %xmm6, %xmm0
120 mulpd %xmm5, %xmm4
121 movq 8+LOG10_E(%rip), %xmm6
122 mulpd %xmm5, %xmm5
123 addpd %xmm2, %xmm4
124 mulpd %xmm5, %xmm3
125 pshufd $228, %xmm0, %xmm2
126 addsd %xmm1, %xmm0
127 mulsd %xmm1, %xmm4
128 subsd %xmm0, %xmm2
129 mulsd %xmm1, %xmm6
130 addsd %xmm2, %xmm1
131 pshufd $238, %xmm0, %xmm2
132 mulsd %xmm5, %xmm5
133 addsd %xmm2, %xmm7
134 addsd %xmm6, %xmm1
135 addpd %xmm3, %xmm4
136 addsd %xmm7, %xmm1
137 mulpd %xmm5, %xmm4
138 addsd %xmm4, %xmm1
139 pshufd $238, %xmm4, %xmm5
140 addsd %xmm5, %xmm1
141 addsd %xmm1, %xmm0
142 jmp ..B1.5
143.L_2TAG_PACKET_0.0.2:
144 movq (%rsp), %xmm0
145 movq (%rsp), %xmm1
146 addl $16, %eax
147 cmpl $32768, %eax
148 jae .L_2TAG_PACKET_2.0.2
149 cmpl $16, %eax
150 jb .L_2TAG_PACKET_3.0.2
151.L_2TAG_PACKET_4.0.2:
152 addsd %xmm0, %xmm0
153 jmp ..B1.5
154.L_2TAG_PACKET_5.0.2:
155 ja .L_2TAG_PACKET_4.0.2
156 cmpl $0, %edx
157 ja .L_2TAG_PACKET_4.0.2
158 jmp .L_2TAG_PACKET_6.0.2
159.L_2TAG_PACKET_3.0.2:
160 xorpd %xmm1, %xmm1
161 addsd %xmm0, %xmm1
162 movd %xmm1, %edx
163 psrlq $32, %xmm1
164 movd %xmm1, %ecx
165 orl %ecx, %edx
166 cmpl $0, %edx
167 je .L_2TAG_PACKET_7.0.2
168 xorpd %xmm1, %xmm1
169 movl $18416, %eax
170 pinsrw $3, %eax, %xmm1
171 mulsd %xmm1, %xmm0
172 xorpd %xmm2, %xmm2
173 movl $16368, %eax
174 pinsrw $3, %eax, %xmm2
175 movq %xmm0, %xmm1
176 pextrw $3, %xmm0, %eax
177 orpd %xmm2, %xmm0
178 movl $18416, %ecx
179 psrlq $27, %xmm0
180 movq LOG10_E(%rip), %xmm2
181 psrld $2, %xmm0
182 rcpps %xmm0, %xmm0
183 psllq $12, %xmm1
184 pshufd $78, %xmm5, %xmm6
185 psrlq $12, %xmm1
186 jmp .L_2TAG_PACKET_1.0.2
187.L_2TAG_PACKET_2.0.2:
188 movd %xmm1, %edx
189 psrlq $32, %xmm1
190 movd %xmm1, %ecx
191 addl %ecx, %ecx
192 cmpl $-2097152, %ecx
193 jae .L_2TAG_PACKET_5.0.2
194 orl %ecx, %edx
195 cmpl $0, %edx
196 je .L_2TAG_PACKET_7.0.2
197.L_2TAG_PACKET_6.0.2:
198 xorpd %xmm1, %xmm1
199 xorpd %xmm0, %xmm0
200 movl $32752, %eax
201 pinsrw $3, %eax, %xmm1
202 mulsd %xmm1, %xmm0
203 movl $9, 16(%rsp)
204 jmp .L_2TAG_PACKET_8.0.2
205.L_2TAG_PACKET_7.0.2:
206 xorpd %xmm1, %xmm1
207 xorpd %xmm0, %xmm0
208 movl $49136, %eax
209 pinsrw $3, %eax, %xmm0
210 divsd %xmm1, %xmm0
211 movl $8, 16(%rsp)
212.L_2TAG_PACKET_8.0.2:
213 movq %xmm0, 8(%rsp)
214..B1.3:
215 movq 8(%rsp), %xmm0
216.L_2TAG_PACKET_9.0.2:
217..B1.5:
218 addq $24, %rsp
219..___tag_value_log10.4:
220 ret
221..___tag_value_log10.5:
222END(log10)
223# -- End log10
224 .section .rodata, "a"
225 .align 16
226 .align 16
227HIGHSIGMASK:
228 .long 4160749568
229 .long 4294967295
230 .long 0
231 .long 4294959104
232 .type HIGHSIGMASK,@object
233 .size HIGHSIGMASK,16
234 .align 16
235LOG10_E:
236 .long 0
237 .long 1071366144
238 .long 3207479560
239 .long 1062894188
240 .type LOG10_E,@object
241 .size LOG10_E,16
242 .align 16
243L_tbl:
244 .long 1352628224
245 .long 1070810131
246 .long 521319256
247 .long 1025503025
248 .long 2150839296
249 .long 1070801944
250 .long 3329350096
251 .long 3170190015
252 .long 1360613376
253 .long 1070793794
254 .long 2024059075
255 .long 1024991594
256 .long 1875350528
257 .long 1070785680
258 .long 2163882141
259 .long 3163564137
260 .long 2312126464
261 .long 1070777602
262 .long 1975711076
263 .long 1023674196
264 .long 1306336256
265 .long 1070769560
266 .long 3524899523
267 .long 3170508164
268 .long 1806334976
269 .long 1070761553
270 .long 4254777025
271 .long 1025238739
272 .long 2483193856
273 .long 1070753581
274 .long 3800671317
275 .long 3172916830
276 .long 2025350144
277 .long 1070745644
278 .long 1731514745
279 .long 1025501083
280 .long 3433285632
281 .long 1070737741
282 .long 2551857336
283 .long 3169662186
284 .long 1134317568
285 .long 1070729873
286 .long 3426297655
287 .long 3172637891
288 .long 2457152512
289 .long 1070722038
290 .long 63549415
291 .long 1025415416
292 .long 1861803008
293 .long 1070714237
294 .long 1910171636
295 .long 1023977580
296 .long 2414140416
297 .long 1070706469
298 .long 4002514337
299 .long 3170841618
300 .long 2900726784
301 .long 1070698734
302 .long 3268064083
303 .long 1022459609
304 .long 2123517952
305 .long 1070691032
306 .long 1767031218
307 .long 1022448156
308 .long 3194569728
309 .long 1070683362
310 .long 3402332618
311 .long 3171671160
312 .long 650882048
313 .long 1070675725
314 .long 4146023905
315 .long 3171023038
316 .long 1928988672
317 .long 1070668119
318 .long 1438617867
319 .long 1016360491
320 .long 1594908672
321 .long 1070660545
322 .long 971389377
323 .long 1024763979
324 .long 2818746368
325 .long 1070653002
326 .long 3555925341
327 .long 3172434821
328 .long 194584576
329 .long 1070645491
330 .long 943919215
331 .long 3172950063
332 .long 1215096832
333 .long 1070638010
334 .long 2283358588
335 .long 1022335098
336 .long 501519360
337 .long 1070630560
338 .long 480904295
339 .long 1024437959
340 .long 1278266368
341 .long 1070623140
342 .long 2755806066
343 .long 3172342012
344 .long 2487812096
345 .long 1070615750
346 .long 2489653202
347 .long 3172481099
348 .long 3085451264
349 .long 1070608390
350 .long 3759184951
351 .long 3172574892
352 .long 2039090176
353 .long 1070601060
354 .long 1361176676
355 .long 3172355319
356 .long 953057280
357 .long 1070591423
358 .long 1176587546
359 .long 3166422018
360 .long 3370524672
361 .long 1070576879
362 .long 3669570051
363 .long 1025376630
364 .long 749742080
365 .long 1070562394
366 .long 707700964
367 .long 3170814058
368 .long 4008353792
369 .long 1070547965
370 .long 3247327652
371 .long 1022431400
372 .long 2612455424
373 .long 1070533594
374 .long 2453457344
375 .long 3172322969
376 .long 3230920704
377 .long 1070519279
378 .long 1296781801
379 .long 1025115335
380 .long 3965253632
381 .long 1070505020
382 .long 373075289
383 .long 1017938528
384 .long 2593157120
385 .long 1070476669
386 .long 1068054086
387 .long 1021616576
388 .long 925962240
389 .long 1070448537
390 .long 850121213
391 .long 1023928989
392 .long 1732556800
393 .long 1070420620
394 .long 1305206740
395 .long 3172665570
396 .long 3815630848
397 .long 1070392915
398 .long 192642943
399 .long 3172699907
400 .long 2001758208
401 .long 1070365420
402 .long 2820786683
403 .long 1024704867
404 .long 16746496
405 .long 1070338131
406 .long 1399573110
407 .long 3171372773
408 .long 1886492672
409 .long 1070311044
410 .long 3621428075
411 .long 3172974358
412 .long 3338196992
413 .long 1070284157
414 .long 3793882035
415 .long 1025124701
416 .long 381769728
417 .long 1070257468
418 .long 3877933342
419 .long 3170195490
420 .long 2186491904
421 .long 1070230972
422 .long 1838687089
423 .long 1017927292
424 .long 1008330752
425 .long 1070204668
426 .long 2228321664
427 .long 1025352196
428 .long 2247065600
429 .long 1070178552
430 .long 1413900906
431 .long 3170902532
432 .long 2964070400
433 .long 1070152622
434 .long 3590454629
435 .long 1025016844
436 .long 465154048
437 .long 1070126876
438 .long 2079688550
439 .long 3172268183
440 .long 883615744
441 .long 1070101310
442 .long 989244452
443 .long 3171900485
444 .long 1993768960
445 .long 1070075922
446 .long 1124327841
447 .long 3172964992
448 .long 1794471936
449 .long 1070050710
450 .long 1140575046
451 .long 1022673726
452 .long 2797932544
453 .long 1070025671
454 .long 1894836933
455 .long 3172544059
456 .long 3433797632
457 .long 1070000803
458 .long 3221831166
459 .long 3171921685
460 .long 2338371584
461 .long 1069976104
462 .long 3732461053
463 .long 3164513518
464 .long 2644013056
465 .long 1069951571
466 .long 2519460462
467 .long 3172548740
468 .long 3383814144
469 .long 1069927202
470 .long 2290997657
471 .long 1025499649
472 .long 3781380096
473 .long 1069902995
474 .long 380479405
475 .long 1025184136
476 .long 3245785088
477 .long 1069878948
478 .long 1096398261
479 .long 3169885192
480 .long 1366712320
481 .long 1069855059
482 .long 2218343715
483 .long 3170281628
484 .long 2204717056
485 .long 1069831325
486 .long 2668334011
487 .long 1025264524
488 .long 1401772032
489 .long 1069807745
490 .long 4103993159
491 .long 1022925721
492 .long 3356721152
493 .long 1069784316
494 .long 3573790772
495 .long 3172186527
496 .long 4041148416
497 .long 1069761037
498 .long 4027691910
499 .long 3171276990
500 .long 3880151040
501 .long 1069737906
502 .long 4087118786
503 .long 3172710734
504 .long 3453364224
505 .long 1069714921
506 .long 99014299
507 .long 3172003077
508 .long 3491092480
509 .long 1069692080
510 .long 3801836701
511 .long 3172989287
512 .long 575580160
513 .long 1069669382
514 .long 1920406012
515 .long 3170874125
516 .long 22282240
517 .long 1069646824
518 .long 964193370
519 .long 1019363159
520 .long 2991429632
521 .long 1069624404
522 .long 3372589890
523 .long 1023425053
524 .long 2189645824
525 .long 1069602122
526 .long 2610503872
527 .long 1023652442
528 .long 3341467648
529 .long 1069579975
530 .long 1190292004
531 .long 1022425665
532 .long 3711293440
533 .long 1069557962
534 .long 1104795356
535 .long 1023625829
536 .long 1380401152
537 .long 1069524644
538 .long 1156998217
539 .long 1025100499
540 .long 765710336
541 .long 1069481144
542 .long 1736649113
543 .long 1024999439
544 .long 849412096
545 .long 1069437902
546 .long 2618178330
547 .long 3170853629
548 .long 1433104384
549 .long 1069394915
550 .long 43477267
551 .long 3170378811
552 .long 2548596736
553 .long 1069352180
554 .long 3967367063
555 .long 1025246584
556 .long 157577216
557 .long 1069309695
558 .long 100402533
559 .long 3172825502
560 .long 3326238720
561 .long 1069267455
562 .long 1176892909
563 .long 1025464099
564 .long 4155494400
565 .long 1069225459
566 .long 3713707617
567 .long 3172630046
568 .long 3545804800
569 .long 1069183704
570 .long 857007315
571 .long 1024965777
572 .long 2602520576
573 .long 1069142187
574 .long 2588758347
575 .long 1022463131
576 .long 2631196672
577 .long 1069100905
578 .long 2118424235
579 .long 1022490989
580 .long 838135808
581 .long 1069059856
582 .long 4117002727
583 .long 1024874520
584 .long 3210903552
585 .long 1069019036
586 .long 650070125
587 .long 3172012966
588 .long 3039211520
589 .long 1068978444
590 .long 438055812
591 .long 1017743757
592 .long 2385633280
593 .long 1068938077
594 .long 3011990369
595 .long 3171312044
596 .long 3491618816
597 .long 1068897932
598 .long 712813818
599 .long 3172720400
600 .long 183644160
601 .long 1068858008
602 .long 4287006742
603 .long 1022379728
604 .long 3639214080
605 .long 1068818300
606 .long 353762279
607 .long 3172980009
608 .long 3728416768
609 .long 1068778808
610 .long 1851367730
611 .long 1025486574
612 .long 3370094592
613 .long 1068739529
614 .long 4046594913
615 .long 3172567047
616 .long 1348407296
617 .long 1068700461
618 .long 143189675
619 .long 1025397632
620 .long 899403776
621 .long 1068661601
622 .long 3753687842
623 .long 3170772772
624 .long 1117708288
625 .long 1068622947
626 .long 1857340812
627 .long 3170782678
628 .long 1248276480
629 .long 1068584497
630 .long 1289858203
631 .long 1025222289
632 .long 683237376
633 .long 1068546249
634 .long 2356679608
635 .long 3171629170
636 .long 3253764096
637 .long 1068508200
638 .long 3267136556
639 .long 1018554987
640 .long 94478336
641 .long 1068441756
642 .long 1927868814
643 .long 3169378180
644 .long 3233144832
645 .long 1068366445
646 .long 2682188854
647 .long 1023964004
648 .long 2940297216
649 .long 1068291522
650 .long 275301289
651 .long 1023944679
652 .long 3677708288
653 .long 1068216982
654 .long 302658771
655 .long 1024465567
656 .long 1576968192
657 .long 1068142822
658 .long 3672035940
659 .long 3172254610
660 .long 1614069760
661 .long 1068069037
662 .long 480052905
663 .long 3172692062
664 .long 424435712
665 .long 1067995624
666 .long 2207869657
667 .long 3170965436
668 .long 3477782528
669 .long 1067922578
670 .long 2980661858
671 .long 3164990018
672 .long 3598401536
673 .long 1067849897
674 .long 1974393034
675 .long 3171357083
676 .long 2435235840
677 .long 1067777577
678 .long 1385289011
679 .long 1024615823
680 .long 1867333632
681 .long 1067705614
682 .long 3442236633
683 .long 1025334384
684 .long 3999301632
685 .long 1067634004
686 .long 3506472073
687 .long 1025132546
688 .long 2566971392
689 .long 1067562745
690 .long 1425757592
691 .long 3172358463
692 .long 112943104
693 .long 1067491833
694 .long 1693407156
695 .long 3172426603
696 .long 3079929856
697 .long 1067392159
698 .long 3999942455
699 .long 1018549369
700 .long 2443837440
701 .long 1067251701
702 .long 974534460
703 .long 1023963412
704 .long 359366656
705 .long 1067111917
706 .long 2204915018
707 .long 1013514416
708 .long 3564519424
709 .long 1066972799
710 .long 3977441659
711 .long 3170879860
712 .long 2011086848
713 .long 1066834343
714 .long 590145514
715 .long 1025390011
716 .long 3216982016
717 .long 1066696541
718 .long 3629120110
719 .long 1024330313
720 .long 2194128896
721 .long 1066559388
722 .long 2367098512
723 .long 3172260338
724 .long 2916220928
725 .long 1066422877
726 .long 2262431886
727 .long 1021229446
728 .long 2263941120
729 .long 1066172214
730 .long 3118507287
731 .long 1021484970
732 .long 3076292608
733 .long 1065901726
734 .long 1411737803
735 .long 3172957147
736 .long 1186136064
737 .long 1065632488
738 .long 3109349337
739 .long 1025397383
740 .long 3085303808
741 .long 1065364487
742 .long 584715031
743 .long 3172596519
744 .long 1821048832
745 .long 1064842211
746 .long 2182246895
747 .long 3172536214
748 .long 697368576
749 .long 1064311094
750 .long 3157561765
751 .long 3172716357
752 .long 894042112
753 .long 1063260131
754 .long 3237958154
755 .long 3172587292
756 .long 0
757 .long 0
758 .long 0
759 .long 0
760 .type L_tbl,@object
761 .size L_tbl,2064
762 .align 16
763log2:
764 .long 1352628224
765 .long 1066615827
766 .long 521319256
767 .long 1021308721
768 .type log2,@object
769 .size log2,16
770 .align 16
771coeff:
772 .long 3248877870
773 .long 1077250164
774 .long 1691676429
775 .long 3221787401
776 .long 945132465
777 .long 3223701783
778 .long 3700831335
779 .long 1073506818
780 .long 2141010593
781 .long 1075227551
782 .long 3698831637
783 .long 3220339442
784 .type coeff,@object
785 .size coeff,48
786 .data
787 .section .note.GNU-stack, ""
788// -- Begin DWARF2 SEGMENT .eh_frame
789 .section .eh_frame,"a",@progbits
790.eh_frame_seg:
791 .align 1
792 .4byte 0x00000014
793 .8byte 0x00527a0100000000
794 .8byte 0x08070c1b01107801
795 .4byte 0x00000190
796 .4byte 0x0000001c
797 .4byte 0x0000001c
798 .4byte ..___tag_value_log10.1-.
799 .4byte ..___tag_value_log10.5-..___tag_value_log10.1
800 .2byte 0x0400
801 .4byte ..___tag_value_log10.3-..___tag_value_log10.1
802 .2byte 0x200e
803 .byte 0x04
804 .4byte ..___tag_value_log10.4-..___tag_value_log10.3
805 .2byte 0x080e
806 .byte 0x00
807# End