| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | | 
|  | 2 | |	stan.sa 3.3 7/29/91 | 
|  | 3 | | | 
|  | 4 | |	The entry point stan computes the tangent of | 
|  | 5 | |	an input argument; | 
|  | 6 | |	stand does the same except for denormalized input. | 
|  | 7 | | | 
|  | 8 | |	Input: Double-extended number X in location pointed to | 
|  | 9 | |		by address register a0. | 
|  | 10 | | | 
|  | 11 | |	Output: The value tan(X) returned in floating-point register Fp0. | 
|  | 12 | | | 
|  | 13 | |	Accuracy and Monotonicity: The returned result is within 3 ulp in | 
|  | 14 | |		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | 
|  | 15 | |		result is subsequently rounded to double precision. The | 
|  | 16 | |		result is provably monotonic in double precision. | 
|  | 17 | | | 
|  | 18 | |	Speed: The program sTAN takes approximately 170 cycles for | 
|  | 19 | |		input argument X such that |X| < 15Pi, which is the usual | 
|  | 20 | |		situation. | 
|  | 21 | | | 
|  | 22 | |	Algorithm: | 
|  | 23 | | | 
|  | 24 | |	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. | 
|  | 25 | | | 
|  | 26 | |	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let | 
|  | 27 | |		k = N mod 2, so in particular, k = 0 or 1. | 
|  | 28 | | | 
|  | 29 | |	3. If k is odd, go to 5. | 
|  | 30 | | | 
|  | 31 | |	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a | 
|  | 32 | |		rational function U/V where | 
|  | 33 | |		U = r + r*s*(P1 + s*(P2 + s*P3)), and | 
|  | 34 | |		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r. | 
|  | 35 | |		Exit. | 
|  | 36 | | | 
|  | 37 | |	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a | 
|  | 38 | |		rational function U/V where | 
|  | 39 | |		U = r + r*s*(P1 + s*(P2 + s*P3)), and | 
|  | 40 | |		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, | 
|  | 41 | |		-Cot(r) = -V/U. Exit. | 
|  | 42 | | | 
|  | 43 | |	6. If |X| > 1, go to 8. | 
|  | 44 | | | 
|  | 45 | |	7. (|X|<2**(-40)) Tan(X) = X. Exit. | 
|  | 46 | | | 
|  | 47 | |	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. | 
|  | 48 | | | 
|  | 49 |  | 
|  | 50 | |		Copyright (C) Motorola, Inc. 1990 | 
|  | 51 | |			All Rights Reserved | 
|  | 52 | | | 
| Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 53 | |       For details on the license for this file, please see the | 
|  | 54 | |       file, README, in this same directory. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 |  | 
|  | 56 | |STAN	idnt	2,1 | Motorola 040 Floating Point Software Package | 
|  | 57 |  | 
|  | 58 | |section	8 | 
|  | 59 |  | 
|  | 60 | #include "fpsp.h" | 
|  | 61 |  | 
|  | 62 | BOUNDS1:	.long 0x3FD78000,0x4004BC7E | 
|  | 63 | TWOBYPI:	.long 0x3FE45F30,0x6DC9C883 | 
|  | 64 |  | 
|  | 65 | TANQ4:	.long 0x3EA0B759,0xF50F8688 | 
|  | 66 | TANP3:	.long 0xBEF2BAA5,0xA8924F04 | 
|  | 67 |  | 
|  | 68 | TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 | 
|  | 69 |  | 
|  | 70 | TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 | 
|  | 71 |  | 
|  | 72 | TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 | 
|  | 73 |  | 
|  | 74 | TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 | 
|  | 75 |  | 
|  | 76 | TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 | 
|  | 77 |  | 
|  | 78 | INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 | 
|  | 79 |  | 
|  | 80 | TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 | 
|  | 81 | TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 | 
|  | 82 |  | 
|  | 83 | |--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING | 
|  | 84 | |--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT | 
|  | 85 | |--MOST 69 BITS LONG. | 
|  | 86 | .global	PITBL | 
|  | 87 | PITBL: | 
|  | 88 | .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 | 
|  | 89 | .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 | 
|  | 90 | .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 | 
|  | 91 | .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000 | 
|  | 92 | .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 | 
|  | 93 | .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 | 
|  | 94 | .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 | 
|  | 95 | .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 | 
|  | 96 | .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 | 
|  | 97 | .long  0xC0040000,0x90836524,0x88034B96,0x20B00000 | 
|  | 98 | .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 | 
|  | 99 | .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 | 
|  | 100 | .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 | 
|  | 101 | .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 | 
|  | 102 | .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 | 
|  | 103 | .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 | 
|  | 104 | .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 | 
|  | 105 | .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 | 
|  | 106 | .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 | 
|  | 107 | .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 | 
|  | 108 | .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 | 
|  | 109 | .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 | 
|  | 110 | .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 | 
|  | 111 | .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 | 
|  | 112 | .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 | 
|  | 113 | .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 | 
|  | 114 | .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 | 
|  | 115 | .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 | 
|  | 116 | .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 | 
|  | 117 | .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 | 
|  | 118 | .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 | 
|  | 119 | .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 | 
|  | 120 | .long  0x00000000,0x00000000,0x00000000,0x00000000 | 
|  | 121 | .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 | 
|  | 122 | .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 | 
|  | 123 | .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 | 
|  | 124 | .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 | 
|  | 125 | .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 | 
|  | 126 | .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 | 
|  | 127 | .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 | 
|  | 128 | .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 | 
|  | 129 | .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 | 
|  | 130 | .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 | 
|  | 131 | .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000 | 
|  | 132 | .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 | 
|  | 133 | .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 | 
|  | 134 | .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 | 
|  | 135 | .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 | 
|  | 136 | .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 | 
|  | 137 | .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 | 
|  | 138 | .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 | 
|  | 139 | .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 | 
|  | 140 | .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 | 
|  | 141 | .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 | 
|  | 142 | .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000 | 
|  | 143 | .long  0x40040000,0x90836524,0x88034B96,0xA0B00000 | 
|  | 144 | .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 | 
|  | 145 | .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 | 
|  | 146 | .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 | 
|  | 147 | .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 | 
|  | 148 | .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 | 
|  | 149 | .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000 | 
|  | 150 | .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 | 
|  | 151 | .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 | 
|  | 152 | .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 | 
|  | 153 |  | 
|  | 154 | .set	INARG,FP_SCR4 | 
|  | 155 |  | 
|  | 156 | .set	TWOTO63,L_SCR1 | 
|  | 157 | .set	ENDFLAG,L_SCR2 | 
|  | 158 | .set	N,L_SCR3 | 
|  | 159 |  | 
|  | 160 | | xref	t_frcinx | 
|  | 161 | |xref	t_extdnrm | 
|  | 162 |  | 
|  | 163 | .global	stand | 
|  | 164 | stand: | 
|  | 165 | |--TAN(X) = X FOR DENORMALIZED X | 
|  | 166 |  | 
|  | 167 | bra		t_extdnrm | 
|  | 168 |  | 
|  | 169 | .global	stan | 
|  | 170 | stan: | 
|  | 171 | fmovex		(%a0),%fp0	| ...LOAD INPUT | 
|  | 172 |  | 
|  | 173 | movel		(%a0),%d0 | 
|  | 174 | movew		4(%a0),%d0 | 
|  | 175 | andil		#0x7FFFFFFF,%d0 | 
|  | 176 |  | 
|  | 177 | cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)? | 
|  | 178 | bges		TANOK1 | 
|  | 179 | bra		TANSM | 
|  | 180 | TANOK1: | 
|  | 181 | cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI? | 
|  | 182 | blts		TANMAIN | 
|  | 183 | bra		REDUCEX | 
|  | 184 |  | 
|  | 185 |  | 
|  | 186 | TANMAIN: | 
|  | 187 | |--THIS IS THE USUAL CASE, |X| <= 15 PI. | 
|  | 188 | |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | 
|  | 189 | fmovex		%fp0,%fp1 | 
|  | 190 | fmuld		TWOBYPI,%fp1	| ...X*2/PI | 
|  | 191 |  | 
|  | 192 | |--HIDE THE NEXT TWO INSTRUCTIONS | 
|  | 193 | leal		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 | 
|  | 194 |  | 
|  | 195 | |--FP1 IS NOW READY | 
|  | 196 | fmovel		%fp1,%d0		| ...CONVERT TO INTEGER | 
|  | 197 |  | 
|  | 198 | asll		#4,%d0 | 
|  | 199 | addal		%d0,%a1		| ...ADDRESS N*PIBY2 IN Y1, Y2 | 
|  | 200 |  | 
|  | 201 | fsubx		(%a1)+,%fp0	| ...X-Y1 | 
|  | 202 | |--HIDE THE NEXT ONE | 
|  | 203 |  | 
|  | 204 | fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2 | 
|  | 205 |  | 
|  | 206 | rorl		#5,%d0 | 
|  | 207 | andil		#0x80000000,%d0	| ...D0 WAS ODD IFF D0 < 0 | 
|  | 208 |  | 
|  | 209 | TANCONT: | 
|  | 210 |  | 
|  | 211 | cmpil		#0,%d0 | 
|  | 212 | blt		NODD | 
|  | 213 |  | 
|  | 214 | fmovex		%fp0,%fp1 | 
|  | 215 | fmulx		%fp1,%fp1		| ...S = R*R | 
|  | 216 |  | 
|  | 217 | fmoved		TANQ4,%fp3 | 
|  | 218 | fmoved		TANP3,%fp2 | 
|  | 219 |  | 
|  | 220 | fmulx		%fp1,%fp3		| ...SQ4 | 
|  | 221 | fmulx		%fp1,%fp2		| ...SP3 | 
|  | 222 |  | 
|  | 223 | faddd		TANQ3,%fp3	| ...Q3+SQ4 | 
|  | 224 | faddx		TANP2,%fp2	| ...P2+SP3 | 
|  | 225 |  | 
|  | 226 | fmulx		%fp1,%fp3		| ...S(Q3+SQ4) | 
|  | 227 | fmulx		%fp1,%fp2		| ...S(P2+SP3) | 
|  | 228 |  | 
|  | 229 | faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4) | 
|  | 230 | faddx		TANP1,%fp2	| ...P1+S(P2+SP3) | 
|  | 231 |  | 
|  | 232 | fmulx		%fp1,%fp3		| ...S(Q2+S(Q3+SQ4)) | 
|  | 233 | fmulx		%fp1,%fp2		| ...S(P1+S(P2+SP3)) | 
|  | 234 |  | 
|  | 235 | faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4)) | 
|  | 236 | fmulx		%fp0,%fp2		| ...RS(P1+S(P2+SP3)) | 
|  | 237 |  | 
|  | 238 | fmulx		%fp3,%fp1		| ...S(Q1+S(Q2+S(Q3+SQ4))) | 
|  | 239 |  | 
|  | 240 |  | 
|  | 241 | faddx		%fp2,%fp0		| ...R+RS(P1+S(P2+SP3)) | 
|  | 242 |  | 
|  | 243 |  | 
|  | 244 | fadds		#0x3F800000,%fp1	| ...1+S(Q1+...) | 
|  | 245 |  | 
|  | 246 | fmovel		%d1,%fpcr		|restore users exceptions | 
|  | 247 | fdivx		%fp1,%fp0		|last inst - possible exception set | 
|  | 248 |  | 
|  | 249 | bra		t_frcinx | 
|  | 250 |  | 
|  | 251 | NODD: | 
|  | 252 | fmovex		%fp0,%fp1 | 
|  | 253 | fmulx		%fp0,%fp0		| ...S = R*R | 
|  | 254 |  | 
|  | 255 | fmoved		TANQ4,%fp3 | 
|  | 256 | fmoved		TANP3,%fp2 | 
|  | 257 |  | 
|  | 258 | fmulx		%fp0,%fp3		| ...SQ4 | 
|  | 259 | fmulx		%fp0,%fp2		| ...SP3 | 
|  | 260 |  | 
|  | 261 | faddd		TANQ3,%fp3	| ...Q3+SQ4 | 
|  | 262 | faddx		TANP2,%fp2	| ...P2+SP3 | 
|  | 263 |  | 
|  | 264 | fmulx		%fp0,%fp3		| ...S(Q3+SQ4) | 
|  | 265 | fmulx		%fp0,%fp2		| ...S(P2+SP3) | 
|  | 266 |  | 
|  | 267 | faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4) | 
|  | 268 | faddx		TANP1,%fp2	| ...P1+S(P2+SP3) | 
|  | 269 |  | 
|  | 270 | fmulx		%fp0,%fp3		| ...S(Q2+S(Q3+SQ4)) | 
|  | 271 | fmulx		%fp0,%fp2		| ...S(P1+S(P2+SP3)) | 
|  | 272 |  | 
|  | 273 | faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4)) | 
|  | 274 | fmulx		%fp1,%fp2		| ...RS(P1+S(P2+SP3)) | 
|  | 275 |  | 
|  | 276 | fmulx		%fp3,%fp0		| ...S(Q1+S(Q2+S(Q3+SQ4))) | 
|  | 277 |  | 
|  | 278 |  | 
|  | 279 | faddx		%fp2,%fp1		| ...R+RS(P1+S(P2+SP3)) | 
|  | 280 | fadds		#0x3F800000,%fp0	| ...1+S(Q1+...) | 
|  | 281 |  | 
|  | 282 |  | 
|  | 283 | fmovex		%fp1,-(%sp) | 
|  | 284 | eoril		#0x80000000,(%sp) | 
|  | 285 |  | 
|  | 286 | fmovel		%d1,%fpcr		|restore users exceptions | 
|  | 287 | fdivx		(%sp)+,%fp0	|last inst - possible exception set | 
|  | 288 |  | 
|  | 289 | bra		t_frcinx | 
|  | 290 |  | 
|  | 291 | TANBORS: | 
|  | 292 | |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | 
|  | 293 | |--IF |X| < 2**(-40), RETURN X OR 1. | 
|  | 294 | cmpil		#0x3FFF8000,%d0 | 
|  | 295 | bgts		REDUCEX | 
|  | 296 |  | 
|  | 297 | TANSM: | 
|  | 298 |  | 
|  | 299 | fmovex		%fp0,-(%sp) | 
|  | 300 | fmovel		%d1,%fpcr		 |restore users exceptions | 
|  | 301 | fmovex		(%sp)+,%fp0	|last inst - possible exception set | 
|  | 302 |  | 
|  | 303 | bra		t_frcinx | 
|  | 304 |  | 
|  | 305 |  | 
|  | 306 | REDUCEX: | 
|  | 307 | |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | 
|  | 308 | |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | 
|  | 309 | |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | 
|  | 310 |  | 
|  | 311 | fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5 | 
|  | 312 | movel		%d2,-(%a7) | 
|  | 313 | fmoves         #0x00000000,%fp1 | 
|  | 314 |  | 
|  | 315 | |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | 
|  | 316 | |--there is a danger of unwanted overflow in first LOOP iteration.  In this | 
|  | 317 | |--case, reduce argument by one remainder step to make subsequent reduction | 
|  | 318 | |--safe. | 
|  | 319 | cmpil	#0x7ffeffff,%d0		|is argument dangerously large? | 
|  | 320 | bnes	LOOP | 
|  | 321 | movel	#0x7ffe0000,FP_SCR2(%a6)	|yes | 
|  | 322 | |					;create 2**16383*PI/2 | 
|  | 323 | movel	#0xc90fdaa2,FP_SCR2+4(%a6) | 
|  | 324 | clrl	FP_SCR2+8(%a6) | 
|  | 325 | ftstx	%fp0			|test sign of argument | 
|  | 326 | movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383* | 
|  | 327 | |					;PI/2 at FP_SCR3 | 
|  | 328 | movel	#0x85a308d3,FP_SCR3+4(%a6) | 
|  | 329 | clrl   FP_SCR3+8(%a6) | 
|  | 330 | fblt	red_neg | 
|  | 331 | orw	#0x8000,FP_SCR2(%a6)	|positive arg | 
|  | 332 | orw	#0x8000,FP_SCR3(%a6) | 
|  | 333 | red_neg: | 
|  | 334 | faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact | 
|  | 335 | fmovex  %fp0,%fp1		|save high result in fp1 | 
|  | 336 | faddx  FP_SCR3(%a6),%fp0		|low part of reduction | 
|  | 337 | fsubx  %fp0,%fp1			|determine low component of result | 
|  | 338 | faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument. | 
|  | 339 |  | 
|  | 340 | |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | 
|  | 341 | |--integer quotient will be stored in N | 
|  | 342 | |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) | 
|  | 343 |  | 
|  | 344 | LOOP: | 
|  | 345 | fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2 | 
|  | 346 | movew		INARG(%a6),%d0 | 
|  | 347 | movel          %d0,%a1		| ...save a copy of D0 | 
|  | 348 | andil		#0x00007FFF,%d0 | 
|  | 349 | subil		#0x00003FFF,%d0	| ...D0 IS K | 
|  | 350 | cmpil		#28,%d0 | 
|  | 351 | bles		LASTLOOP | 
|  | 352 | CONTLOOP: | 
|  | 353 | subil		#27,%d0	 | ...D0 IS L := K-27 | 
|  | 354 | movel		#0,ENDFLAG(%a6) | 
|  | 355 | bras		WORK | 
|  | 356 | LASTLOOP: | 
|  | 357 | clrl		%d0		| ...D0 IS L := 0 | 
|  | 358 | movel		#1,ENDFLAG(%a6) | 
|  | 359 |  | 
|  | 360 | WORK: | 
|  | 361 | |--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN | 
|  | 362 | |--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29. | 
|  | 363 |  | 
|  | 364 | |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | 
|  | 365 | |--2**L * (PIby2_1), 2**L * (PIby2_2) | 
|  | 366 |  | 
|  | 367 | movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI | 
|  | 368 | subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI) | 
|  | 369 |  | 
|  | 370 | movel		#0xA2F9836E,FP_SCR1+4(%a6) | 
|  | 371 | movel		#0x4E44152A,FP_SCR1+8(%a6) | 
|  | 372 | movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI) | 
|  | 373 |  | 
|  | 374 | fmovex		%fp0,%fp2 | 
|  | 375 | fmulx		FP_SCR1(%a6),%fp2 | 
|  | 376 | |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | 
|  | 377 | |--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N | 
|  | 378 | |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | 
|  | 379 | |--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE | 
|  | 380 | |--US THE DESIRED VALUE IN FLOATING POINT. | 
|  | 381 |  | 
|  | 382 | |--HIDE SIX CYCLES OF INSTRUCTION | 
|  | 383 | movel		%a1,%d2 | 
|  | 384 | swap		%d2 | 
|  | 385 | andil		#0x80000000,%d2 | 
|  | 386 | oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL | 
|  | 387 | movel		%d2,TWOTO63(%a6) | 
|  | 388 |  | 
|  | 389 | movel		%d0,%d2 | 
|  | 390 | addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2) | 
|  | 391 |  | 
|  | 392 | |--FP2 IS READY | 
|  | 393 | fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED | 
|  | 394 |  | 
|  | 395 | |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2 | 
|  | 396 | movew		%d2,FP_SCR2(%a6) | 
|  | 397 | clrw           FP_SCR2+2(%a6) | 
|  | 398 | movel		#0xC90FDAA2,FP_SCR2+4(%a6) | 
|  | 399 | clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1 | 
|  | 400 |  | 
|  | 401 | |--FP2 IS READY | 
|  | 402 | fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N | 
|  | 403 |  | 
|  | 404 | addil		#0x00003FDD,%d0 | 
|  | 405 | movew		%d0,FP_SCR3(%a6) | 
|  | 406 | clrw           FP_SCR3+2(%a6) | 
|  | 407 | movel		#0x85A308D3,FP_SCR3+4(%a6) | 
|  | 408 | clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2 | 
|  | 409 |  | 
|  | 410 | movel		ENDFLAG(%a6),%d0 | 
|  | 411 |  | 
|  | 412 | |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | 
|  | 413 | |--P2 = 2**(L) * Piby2_2 | 
|  | 414 | fmovex		%fp2,%fp4 | 
|  | 415 | fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1 | 
|  | 416 | fmovex		%fp2,%fp5 | 
|  | 417 | fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2 | 
|  | 418 | fmovex		%fp4,%fp3 | 
|  | 419 | |--we want P+p = W+w  but  |p| <= half ulp of P | 
|  | 420 | |--Then, we need to compute  A := R-P   and  a := r-p | 
|  | 421 | faddx		%fp5,%fp3			| ...FP3 is P | 
|  | 422 | fsubx		%fp3,%fp4			| ...W-P | 
|  | 423 |  | 
|  | 424 | fsubx		%fp3,%fp0			| ...FP0 is A := R - P | 
|  | 425 | faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w | 
|  | 426 |  | 
|  | 427 | fmovex		%fp0,%fp3			| ...FP3 A | 
|  | 428 | fsubx		%fp4,%fp1			| ...FP1 is a := r - p | 
|  | 429 |  | 
|  | 430 | |--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but | 
|  | 431 | |--|r| <= half ulp of R. | 
|  | 432 | faddx		%fp1,%fp0			| ...FP0 is R := A+a | 
|  | 433 | |--No need to calculate r if this is the last loop | 
|  | 434 | cmpil		#0,%d0 | 
|  | 435 | bgt		RESTORE | 
|  | 436 |  | 
|  | 437 | |--Need to calculate r | 
|  | 438 | fsubx		%fp0,%fp3			| ...A-R | 
|  | 439 | faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a | 
|  | 440 | bra		LOOP | 
|  | 441 |  | 
|  | 442 | RESTORE: | 
|  | 443 | fmovel		%fp2,N(%a6) | 
|  | 444 | movel		(%a7)+,%d2 | 
|  | 445 | fmovemx	(%a7)+,%fp2-%fp5 | 
|  | 446 |  | 
|  | 447 |  | 
|  | 448 | movel		N(%a6),%d0 | 
|  | 449 | rorl		#1,%d0 | 
|  | 450 |  | 
|  | 451 |  | 
|  | 452 | bra		TANCONT | 
|  | 453 |  | 
|  | 454 | |end |