| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | | 
|  | 2 | |	round.sa 3.4 7/29/91 | 
|  | 3 | | | 
|  | 4 | |	handle rounding and normalization tasks | 
|  | 5 | | | 
|  | 6 | | | 
|  | 7 | | | 
|  | 8 | |		Copyright (C) Motorola, Inc. 1990 | 
|  | 9 | |			All Rights Reserved | 
|  | 10 | | | 
| Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 11 | |       For details on the license for this file, please see the | 
|  | 12 | |       file, README, in this same directory. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 13 |  | 
|  | 14 | |ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package | 
|  | 15 |  | 
|  | 16 | |section	8 | 
|  | 17 |  | 
|  | 18 | #include "fpsp.h" | 
|  | 19 |  | 
|  | 20 | | | 
|  | 21 | |	round --- round result according to precision/mode | 
|  | 22 | | | 
|  | 23 | |	a0 points to the input operand in the internal extended format | 
|  | 24 | |	d1(high word) contains rounding precision: | 
|  | 25 | |		ext = $0000xxxx | 
|  | 26 | |		sgl = $0001xxxx | 
|  | 27 | |		dbl = $0002xxxx | 
|  | 28 | |	d1(low word) contains rounding mode: | 
|  | 29 | |		RN  = $xxxx0000 | 
|  | 30 | |		RZ  = $xxxx0001 | 
|  | 31 | |		RM  = $xxxx0010 | 
|  | 32 | |		RP  = $xxxx0011 | 
|  | 33 | |	d0{31:29} contains the g,r,s bits (extended) | 
|  | 34 | | | 
|  | 35 | |	On return the value pointed to by a0 is correctly rounded, | 
|  | 36 | |	a0 is preserved and the g-r-s bits in d0 are cleared. | 
|  | 37 | |	The result is not typed - the tag field is invalid.  The | 
|  | 38 | |	result is still in the internal extended format. | 
|  | 39 | | | 
|  | 40 | |	The INEX bit of USER_FPSR will be set if the rounded result was | 
|  | 41 | |	inexact (i.e. if any of the g-r-s bits were set). | 
|  | 42 | | | 
|  | 43 |  | 
|  | 44 | .global	round | 
|  | 45 | round: | 
|  | 46 | | If g=r=s=0 then result is exact and round is done, else set | 
|  | 47 | | the inex flag in status reg and continue. | 
|  | 48 | | | 
|  | 49 | bsrs	ext_grs			|this subroutine looks at the | 
|  | 50 | |					:rounding precision and sets | 
|  | 51 | |					;the appropriate g-r-s bits. | 
|  | 52 | tstl	%d0			|if grs are zero, go force | 
|  | 53 | bne	rnd_cont		|lower bits to zero for size | 
|  | 54 |  | 
|  | 55 | swap	%d1			|set up d1.w for round prec. | 
|  | 56 | bra	truncate | 
|  | 57 |  | 
|  | 58 | rnd_cont: | 
|  | 59 | | | 
|  | 60 | | Use rounding mode as an index into a jump table for these modes. | 
|  | 61 | | | 
|  | 62 | orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
|  | 63 | lea	mode_tab,%a1 | 
|  | 64 | movel	(%a1,%d1.w*4),%a1 | 
|  | 65 | jmp	(%a1) | 
|  | 66 | | | 
|  | 67 | | Jump table indexed by rounding mode in d1.w.  All following assumes | 
|  | 68 | | grs != 0. | 
|  | 69 | | | 
|  | 70 | mode_tab: | 
|  | 71 | .long	rnd_near | 
|  | 72 | .long	rnd_zero | 
|  | 73 | .long	rnd_mnus | 
|  | 74 | .long	rnd_plus | 
|  | 75 | | | 
|  | 76 | |	ROUND PLUS INFINITY | 
|  | 77 | | | 
|  | 78 | |	If sign of fp number = 0 (positive), then add 1 to l. | 
|  | 79 | | | 
|  | 80 | rnd_plus: | 
|  | 81 | swap	%d1			|set up d1 for round prec. | 
|  | 82 | tstb	LOCAL_SGN(%a0)		|check for sign | 
|  | 83 | bmi	truncate		|if positive then truncate | 
|  | 84 | movel	#0xffffffff,%d0		|force g,r,s to be all f's | 
|  | 85 | lea	add_to_l,%a1 | 
|  | 86 | movel	(%a1,%d1.w*4),%a1 | 
|  | 87 | jmp	(%a1) | 
|  | 88 | | | 
|  | 89 | |	ROUND MINUS INFINITY | 
|  | 90 | | | 
|  | 91 | |	If sign of fp number = 1 (negative), then add 1 to l. | 
|  | 92 | | | 
|  | 93 | rnd_mnus: | 
|  | 94 | swap	%d1			|set up d1 for round prec. | 
|  | 95 | tstb	LOCAL_SGN(%a0)		|check for sign | 
|  | 96 | bpl	truncate		|if negative then truncate | 
|  | 97 | movel	#0xffffffff,%d0		|force g,r,s to be all f's | 
|  | 98 | lea	add_to_l,%a1 | 
|  | 99 | movel	(%a1,%d1.w*4),%a1 | 
|  | 100 | jmp	(%a1) | 
|  | 101 | | | 
|  | 102 | |	ROUND ZERO | 
|  | 103 | | | 
|  | 104 | |	Always truncate. | 
|  | 105 | rnd_zero: | 
|  | 106 | swap	%d1			|set up d1 for round prec. | 
|  | 107 | bra	truncate | 
|  | 108 | | | 
|  | 109 | | | 
|  | 110 | |	ROUND NEAREST | 
|  | 111 | | | 
|  | 112 | |	If (g=1), then add 1 to l and if (r=s=0), then clear l | 
|  | 113 | |	Note that this will round to even in case of a tie. | 
|  | 114 | | | 
|  | 115 | rnd_near: | 
|  | 116 | swap	%d1			|set up d1 for round prec. | 
|  | 117 | asll	#1,%d0			|shift g-bit to c-bit | 
|  | 118 | bcc	truncate		|if (g=1) then | 
|  | 119 | lea	add_to_l,%a1 | 
|  | 120 | movel	(%a1,%d1.w*4),%a1 | 
|  | 121 | jmp	(%a1) | 
|  | 122 |  | 
|  | 123 | | | 
|  | 124 | |	ext_grs --- extract guard, round and sticky bits | 
|  | 125 | | | 
|  | 126 | | Input:	d1 =		PREC:ROUND | 
|  | 127 | | Output:	d0{31:29}=	guard, round, sticky | 
|  | 128 | | | 
|  | 129 | | The ext_grs extract the guard/round/sticky bits according to the | 
|  | 130 | | selected rounding precision. It is called by the round subroutine | 
|  | 131 | | only.  All registers except d0 are kept intact. d0 becomes an | 
|  | 132 | | updated guard,round,sticky in d0{31:29} | 
|  | 133 | | | 
|  | 134 | | Notes: the ext_grs uses the round PREC, and therefore has to swap d1 | 
|  | 135 | |	 prior to usage, and needs to restore d1 to original. | 
|  | 136 | | | 
|  | 137 | ext_grs: | 
|  | 138 | swap	%d1			|have d1.w point to round precision | 
|  | 139 | cmpiw	#0,%d1 | 
|  | 140 | bnes	sgl_or_dbl | 
|  | 141 | bras	end_ext_grs | 
|  | 142 |  | 
|  | 143 | sgl_or_dbl: | 
|  | 144 | moveml	%d2/%d3,-(%a7)		|make some temp registers | 
|  | 145 | cmpiw	#1,%d1 | 
|  | 146 | bnes	grs_dbl | 
|  | 147 | grs_sgl: | 
|  | 148 | bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right | 
|  | 149 | movel	#30,%d2			|of the sgl prec. limits | 
|  | 150 | lsll	%d2,%d3			|shift g-r bits to MSB of d3 | 
|  | 151 | movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test | 
|  | 152 | andil	#0x0000003f,%d2		|s bit is the or of all other | 
|  | 153 | bnes	st_stky			|bits to the right of g-r | 
|  | 154 | tstl	LOCAL_LO(%a0)		|test lower mantissa | 
|  | 155 | bnes	st_stky			|if any are set, set sticky | 
|  | 156 | tstl	%d0			|test original g,r,s | 
|  | 157 | bnes	st_stky			|if any are set, set sticky | 
|  | 158 | bras	end_sd			|if words 3 and 4 are clr, exit | 
|  | 159 | grs_dbl: | 
|  | 160 | bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right | 
|  | 161 | movel	#30,%d2			|of the dbl prec. limits | 
|  | 162 | lsll	%d2,%d3			|shift g-r bits to the MSB of d3 | 
|  | 163 | movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test | 
|  | 164 | andil	#0x000001ff,%d2		|s bit is the or-ing of all | 
|  | 165 | bnes	st_stky			|other bits to the right of g-r | 
|  | 166 | tstl	%d0			|test word original g,r,s | 
|  | 167 | bnes	st_stky			|if any are set, set sticky | 
|  | 168 | bras	end_sd			|if clear, exit | 
|  | 169 | st_stky: | 
|  | 170 | bset	#rnd_stky_bit,%d3 | 
|  | 171 | end_sd: | 
|  | 172 | movel	%d3,%d0			|return grs to d0 | 
|  | 173 | moveml	(%a7)+,%d2/%d3		|restore scratch registers | 
|  | 174 | end_ext_grs: | 
|  | 175 | swap	%d1			|restore d1 to original | 
|  | 176 | rts | 
|  | 177 |  | 
|  | 178 | |*******************  Local Equates | 
|  | 179 | .set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec | 
|  | 180 | .set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec | 
|  | 181 |  | 
|  | 182 |  | 
|  | 183 | |Jump table for adding 1 to the l-bit indexed by rnd prec | 
|  | 184 |  | 
|  | 185 | add_to_l: | 
|  | 186 | .long	add_ext | 
|  | 187 | .long	add_sgl | 
|  | 188 | .long	add_dbl | 
|  | 189 | .long	add_dbl | 
|  | 190 | | | 
|  | 191 | |	ADD SINGLE | 
|  | 192 | | | 
|  | 193 | add_sgl: | 
|  | 194 | addl	#ad_1_sgl,LOCAL_HI(%a0) | 
|  | 195 | bccs	scc_clr			|no mantissa overflow | 
|  | 196 | roxrw  LOCAL_HI(%a0)		|shift v-bit back in | 
|  | 197 | roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in | 
|  | 198 | addw	#0x1,LOCAL_EX(%a0)	|and incr exponent | 
|  | 199 | scc_clr: | 
|  | 200 | tstl	%d0			|test for rs = 0 | 
|  | 201 | bnes	sgl_done | 
|  | 202 | andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit | 
|  | 203 | sgl_done: | 
|  | 204 | andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit | 
|  | 205 | clrl	LOCAL_LO(%a0)		|clear d2 | 
|  | 206 | rts | 
|  | 207 |  | 
|  | 208 | | | 
|  | 209 | |	ADD EXTENDED | 
|  | 210 | | | 
|  | 211 | add_ext: | 
|  | 212 | addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit | 
|  | 213 | bccs	xcc_clr			|test for carry out | 
|  | 214 | addql  #1,LOCAL_HI(%a0)		|propagate carry | 
|  | 215 | bccs	xcc_clr | 
|  | 216 | roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit | 
|  | 217 | roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit | 
|  | 218 | roxrw	LOCAL_LO(%a0) | 
|  | 219 | roxrw	LOCAL_LO+2(%a0) | 
|  | 220 | addw	#0x1,LOCAL_EX(%a0)	|and inc exp | 
|  | 221 | xcc_clr: | 
|  | 222 | tstl	%d0			|test rs = 0 | 
|  | 223 | bnes	add_ext_done | 
|  | 224 | andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit | 
|  | 225 | add_ext_done: | 
|  | 226 | rts | 
|  | 227 | | | 
|  | 228 | |	ADD DOUBLE | 
|  | 229 | | | 
|  | 230 | add_dbl: | 
|  | 231 | addl	#ad_1_dbl,LOCAL_LO(%a0) | 
|  | 232 | bccs	dcc_clr | 
|  | 233 | addql	#1,LOCAL_HI(%a0)		|propagate carry | 
|  | 234 | bccs	dcc_clr | 
|  | 235 | roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit | 
|  | 236 | roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit | 
|  | 237 | roxrw	LOCAL_LO(%a0) | 
|  | 238 | roxrw	LOCAL_LO+2(%a0) | 
|  | 239 | addw	#0x1,LOCAL_EX(%a0)	|incr exponent | 
|  | 240 | dcc_clr: | 
|  | 241 | tstl	%d0			|test for rs = 0 | 
|  | 242 | bnes	dbl_done | 
|  | 243 | andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit | 
|  | 244 |  | 
|  | 245 | dbl_done: | 
|  | 246 | andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit | 
|  | 247 | rts | 
|  | 248 |  | 
|  | 249 | error: | 
|  | 250 | rts | 
|  | 251 | | | 
|  | 252 | | Truncate all other bits | 
|  | 253 | | | 
|  | 254 | trunct: | 
|  | 255 | .long	end_rnd | 
|  | 256 | .long	sgl_done | 
|  | 257 | .long	dbl_done | 
|  | 258 | .long	dbl_done | 
|  | 259 |  | 
|  | 260 | truncate: | 
|  | 261 | lea	trunct,%a1 | 
|  | 262 | movel	(%a1,%d1.w*4),%a1 | 
|  | 263 | jmp	(%a1) | 
|  | 264 |  | 
|  | 265 | end_rnd: | 
|  | 266 | rts | 
|  | 267 |  | 
|  | 268 | | | 
|  | 269 | |	NORMALIZE | 
|  | 270 | | | 
|  | 271 | | These routines (nrm_zero & nrm_set) normalize the unnorm.  This | 
|  | 272 | | is done by shifting the mantissa left while decrementing the | 
|  | 273 | | exponent. | 
|  | 274 | | | 
|  | 275 | | NRM_SET shifts and decrements until there is a 1 set in the integer | 
|  | 276 | | bit of the mantissa (msb in d1). | 
|  | 277 | | | 
|  | 278 | | NRM_ZERO shifts and decrements until there is a 1 set in the integer | 
|  | 279 | | bit of the mantissa (msb in d1) unless this would mean the exponent | 
|  | 280 | | would go less than 0.  In that case the number becomes a denorm - the | 
|  | 281 | | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not | 
|  | 282 | | normalized. | 
|  | 283 | | | 
|  | 284 | | Note that both routines have been optimized (for the worst case) and | 
|  | 285 | | therefore do not have the easy to follow decrement/shift loop. | 
|  | 286 | | | 
|  | 287 | |	NRM_ZERO | 
|  | 288 | | | 
|  | 289 | |	Distance to first 1 bit in mantissa = X | 
|  | 290 | |	Distance to 0 from exponent = Y | 
|  | 291 | |	If X < Y | 
|  | 292 | |	Then | 
|  | 293 | |	  nrm_set | 
|  | 294 | |	Else | 
|  | 295 | |	  shift mantissa by Y | 
|  | 296 | |	  set exponent = 0 | 
|  | 297 | | | 
|  | 298 | |input: | 
|  | 299 | |	FP_SCR1 = exponent, ms mantissa part, ls mantissa part | 
|  | 300 | |output: | 
|  | 301 | |	L_SCR1{4} = fpte15 or ete15 bit | 
|  | 302 | | | 
|  | 303 | .global	nrm_zero | 
|  | 304 | nrm_zero: | 
|  | 305 | movew	LOCAL_EX(%a0),%d0 | 
|  | 306 | cmpw   #64,%d0          |see if exp > 64 | 
|  | 307 | bmis	d0_less | 
|  | 308 | bsr	nrm_set		|exp > 64 so exp won't exceed 0 | 
|  | 309 | rts | 
|  | 310 | d0_less: | 
|  | 311 | moveml	%d2/%d3/%d5/%d6,-(%a7) | 
|  | 312 | movel	LOCAL_HI(%a0),%d1 | 
|  | 313 | movel	LOCAL_LO(%a0),%d2 | 
|  | 314 |  | 
|  | 315 | bfffo	%d1{#0:#32},%d3	|get the distance to the first 1 | 
|  | 316 | |				;in ms mant | 
|  | 317 | beqs	ms_clr		|branch if no bits were set | 
|  | 318 | cmpw	%d3,%d0		|of X>Y | 
|  | 319 | bmis	greater		|then exp will go past 0 (neg) if | 
|  | 320 | |				;it is just shifted | 
|  | 321 | bsr	nrm_set		|else exp won't go past 0 | 
|  | 322 | moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
|  | 323 | rts | 
|  | 324 | greater: | 
|  | 325 | movel	%d2,%d6		|save ls mant in d6 | 
|  | 326 | lsll	%d0,%d2		|shift ls mant by count | 
|  | 327 | lsll	%d0,%d1		|shift ms mant by count | 
|  | 328 | movel	#32,%d5 | 
|  | 329 | subl	%d0,%d5		|make op a denorm by shifting bits | 
|  | 330 | lsrl	%d5,%d6		|by the number in the exp, then | 
|  | 331 | |				;set exp = 0. | 
|  | 332 | orl	%d6,%d1		|shift the ls mant bits into the ms mant | 
|  | 333 | movel	#0,%d0		|same as if decremented exp to 0 | 
|  | 334 | |				;while shifting | 
|  | 335 | movew	%d0,LOCAL_EX(%a0) | 
|  | 336 | movel	%d1,LOCAL_HI(%a0) | 
|  | 337 | movel	%d2,LOCAL_LO(%a0) | 
|  | 338 | moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
|  | 339 | rts | 
|  | 340 | ms_clr: | 
|  | 341 | bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant | 
|  | 342 | beqs	all_clr		|branch if none set | 
|  | 343 | addw	#32,%d3 | 
|  | 344 | cmpw	%d3,%d0		|if X>Y | 
|  | 345 | bmis	greater		|then branch | 
|  | 346 | bsr	nrm_set		|else exp won't go past 0 | 
|  | 347 | moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
|  | 348 | rts | 
|  | 349 | all_clr: | 
|  | 350 | movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0. | 
|  | 351 | moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
|  | 352 | rts | 
|  | 353 | | | 
|  | 354 | |	NRM_SET | 
|  | 355 | | | 
|  | 356 | .global	nrm_set | 
|  | 357 | nrm_set: | 
|  | 358 | movel	%d7,-(%a7) | 
|  | 359 | bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) | 
|  | 360 | beqs	lower		|branch if ms mant is all 0's | 
|  | 361 |  | 
|  | 362 | movel	%d6,-(%a7) | 
|  | 363 |  | 
|  | 364 | subw	%d7,LOCAL_EX(%a0)	|sub exponent by count | 
|  | 365 | movel	LOCAL_HI(%a0),%d0	|d0 has ms mant | 
|  | 366 | movel	LOCAL_LO(%a0),%d1 |d1 has ls mant | 
|  | 367 |  | 
|  | 368 | lsll	%d7,%d0		|shift first 1 to j bit position | 
|  | 369 | movel	%d1,%d6		|copy ls mant into d6 | 
|  | 370 | lsll	%d7,%d6		|shift ls mant by count | 
|  | 371 | movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory | 
|  | 372 | moveql	#32,%d6 | 
|  | 373 | subl	%d7,%d6		|continue shift | 
|  | 374 | lsrl	%d6,%d1		|shift off all bits but those that will | 
|  | 375 | |				;be shifted into ms mant | 
|  | 376 | orl	%d1,%d0		|shift the ls mant bits into the ms mant | 
|  | 377 | movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory | 
|  | 378 | moveml	(%a7)+,%d7/%d6	|restore registers | 
|  | 379 | rts | 
|  | 380 |  | 
|  | 381 | | | 
|  | 382 | | We get here if ms mant was = 0, and we assume ls mant has bits | 
|  | 383 | | set (otherwise this would have been tagged a zero not a denorm). | 
|  | 384 | | | 
|  | 385 | lower: | 
|  | 386 | movew	LOCAL_EX(%a0),%d0	|d0 has exponent | 
|  | 387 | movel	LOCAL_LO(%a0),%d1	|d1 has ls mant | 
|  | 388 | subw	#32,%d0		|account for ms mant being all zeros | 
|  | 389 | bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7) | 
|  | 390 | subw	%d7,%d0		|subtract shift count from exp | 
|  | 391 | lsll	%d7,%d1		|shift first 1 to integer bit in ms mant | 
|  | 392 | movew	%d0,LOCAL_EX(%a0)	|store ms mant | 
|  | 393 | movel	%d1,LOCAL_HI(%a0)	|store exp | 
|  | 394 | clrl	LOCAL_LO(%a0)	|clear ls mant | 
|  | 395 | movel	(%a7)+,%d7 | 
|  | 396 | rts | 
|  | 397 | | | 
|  | 398 | |	denorm --- denormalize an intermediate result | 
|  | 399 | | | 
|  | 400 | |	Used by underflow. | 
|  | 401 | | | 
|  | 402 | | Input: | 
|  | 403 | |	a0	 points to the operand to be denormalized | 
|  | 404 | |		 (in the internal extended format) | 
|  | 405 | | | 
|  | 406 | |	d0:	 rounding precision | 
|  | 407 | | Output: | 
|  | 408 | |	a0	 points to the denormalized result | 
|  | 409 | |		 (in the internal extended format) | 
|  | 410 | | | 
|  | 411 | |	d0	is guard,round,sticky | 
|  | 412 | | | 
|  | 413 | | d0 comes into this routine with the rounding precision. It | 
|  | 414 | | is then loaded with the denormalized exponent threshold for the | 
|  | 415 | | rounding precision. | 
|  | 416 | | | 
|  | 417 |  | 
|  | 418 | .global	denorm | 
|  | 419 | denorm: | 
|  | 420 | btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000 | 
|  | 421 | beqs	no_sgn_ext | 
|  | 422 | bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so | 
|  | 423 | no_sgn_ext: | 
|  | 424 |  | 
|  | 425 | cmpib	#0,%d0		|if 0 then extended precision | 
|  | 426 | bnes	not_ext		|else branch | 
|  | 427 |  | 
|  | 428 | clrl	%d1		|load d1 with ext threshold | 
|  | 429 | clrl	%d0		|clear the sticky flag | 
|  | 430 | bsr	dnrm_lp		|denormalize the number | 
|  | 431 | tstb	%d1		|check for inex | 
|  | 432 | beq	no_inex		|if clr, no inex | 
|  | 433 | bras	dnrm_inex	|if set, set inex | 
|  | 434 |  | 
|  | 435 | not_ext: | 
|  | 436 | cmpil	#1,%d0		|if 1 then single precision | 
|  | 437 | beqs	load_sgl	|else must be 2, double prec | 
|  | 438 |  | 
|  | 439 | load_dbl: | 
|  | 440 | movew	#dbl_thresh,%d1	|put copy of threshold in d1 | 
|  | 441 | movel	%d1,%d0		|copy d1 into d0 | 
|  | 442 | subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp | 
|  | 443 | cmpw	#67,%d0		|if diff > 67 (mant + grs bits) | 
|  | 444 | bpls	chk_stky	|then branch (all bits would be | 
|  | 445 | |				; shifted off in denorm routine) | 
|  | 446 | clrl	%d0		|else clear the sticky flag | 
|  | 447 | bsr	dnrm_lp		|denormalize the number | 
|  | 448 | tstb	%d1		|check flag | 
|  | 449 | beqs	no_inex		|if clr, no inex | 
|  | 450 | bras	dnrm_inex	|if set, set inex | 
|  | 451 |  | 
|  | 452 | load_sgl: | 
|  | 453 | movew	#sgl_thresh,%d1	|put copy of threshold in d1 | 
|  | 454 | movel	%d1,%d0		|copy d1 into d0 | 
|  | 455 | subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp | 
|  | 456 | cmpw	#67,%d0		|if diff > 67 (mant + grs bits) | 
|  | 457 | bpls	chk_stky	|then branch (all bits would be | 
|  | 458 | |				; shifted off in denorm routine) | 
|  | 459 | clrl	%d0		|else clear the sticky flag | 
|  | 460 | bsr	dnrm_lp		|denormalize the number | 
|  | 461 | tstb	%d1		|check flag | 
|  | 462 | beqs	no_inex		|if clr, no inex | 
|  | 463 | bras	dnrm_inex	|if set, set inex | 
|  | 464 |  | 
|  | 465 | chk_stky: | 
|  | 466 | tstl	LOCAL_HI(%a0)	|check for any bits set | 
|  | 467 | bnes	set_stky | 
|  | 468 | tstl	LOCAL_LO(%a0)	|check for any bits set | 
|  | 469 | bnes	set_stky | 
|  | 470 | bras	clr_mant | 
|  | 471 | set_stky: | 
|  | 472 | orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
|  | 473 | movel	#0x20000000,%d0	|set sticky bit in return value | 
|  | 474 | clr_mant: | 
|  | 475 | movew	%d1,LOCAL_EX(%a0)		|load exp with threshold | 
|  | 476 | movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa) | 
|  | 477 | movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa) | 
|  | 478 | rts | 
|  | 479 | dnrm_inex: | 
|  | 480 | orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
|  | 481 | no_inex: | 
|  | 482 | rts | 
|  | 483 |  | 
|  | 484 | | | 
|  | 485 | |	dnrm_lp --- normalize exponent/mantissa to specified threshold | 
|  | 486 | | | 
|  | 487 | | Input: | 
|  | 488 | |	a0		points to the operand to be denormalized | 
|  | 489 | |	d0{31:29}	initial guard,round,sticky | 
|  | 490 | |	d1{15:0}	denormalization threshold | 
|  | 491 | | Output: | 
|  | 492 | |	a0		points to the denormalized operand | 
|  | 493 | |	d0{31:29}	final guard,round,sticky | 
|  | 494 | |	d1.b		inexact flag:  all ones means inexact result | 
|  | 495 | | | 
|  | 496 | | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 | 
|  | 497 | | so that bfext can be used to extract the new low part of the mantissa. | 
|  | 498 | | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there | 
|  | 499 | | is no LOCAL_GRS scratch word following it on the fsave frame. | 
|  | 500 | | | 
|  | 501 | .global	dnrm_lp | 
|  | 502 | dnrm_lp: | 
|  | 503 | movel	%d2,-(%sp)		|save d2 for temp use | 
|  | 504 | btstb	#E3,E_BYTE(%a6)		|test for type E3 exception | 
|  | 505 | beqs	not_E3			|not type E3 exception | 
|  | 506 | bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit | 
|  | 507 | movel	#29,%d0 | 
|  | 508 | lsll	%d0,%d2			|shift g,r,s to their positions | 
|  | 509 | movel	%d2,%d0 | 
|  | 510 | not_E3: | 
|  | 511 | movel	(%sp)+,%d2		|restore d2 | 
|  | 512 | movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) | 
|  | 513 | movel	%d0,FP_SCR2+LOCAL_GRS(%a6) | 
|  | 514 | movel	%d1,%d0			|copy the denorm threshold | 
|  | 515 | subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent | 
|  | 516 | bles	no_lp			|d1 <= 0 | 
|  | 517 | cmpw	#32,%d1 | 
|  | 518 | blts	case_1			|0 = d1 < 32 | 
|  | 519 | cmpw	#64,%d1 | 
|  | 520 | blts	case_2			|32 <= d1 < 64 | 
|  | 521 | bra	case_3			|d1 >= 64 | 
|  | 522 | | | 
|  | 523 | | No normalization necessary | 
|  | 524 | | | 
|  | 525 | no_lp: | 
|  | 526 | clrb	%d1			|set no inex2 reported | 
|  | 527 | movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s | 
|  | 528 | rts | 
|  | 529 | | | 
|  | 530 | | case (0<d1<32) | 
|  | 531 | | | 
|  | 532 | case_1: | 
|  | 533 | movel	%d2,-(%sp) | 
|  | 534 | movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold | 
|  | 535 | movel	#32,%d0 | 
|  | 536 | subw	%d1,%d0			|d0 = 32 - d1 | 
|  | 537 | bfextu	LOCAL_EX(%a0){%d0:#32},%d2 | 
|  | 538 | bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI | 
|  | 539 | bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO | 
|  | 540 | bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S | 
|  | 541 | movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI | 
|  | 542 | movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO | 
|  | 543 | clrb	%d1 | 
|  | 544 | bftst	%d0{#2:#30} | 
|  | 545 | beqs	c1nstky | 
|  | 546 | bsetl	#rnd_stky_bit,%d0 | 
|  | 547 | st	%d1 | 
|  | 548 | c1nstky: | 
|  | 549 | movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s | 
|  | 550 | andil	#0xe0000000,%d2		|clear all but G,R,S | 
|  | 551 | tstl	%d2			|test if original G,R,S are clear | 
|  | 552 | beqs	grs_clear | 
|  | 553 | orl	#0x20000000,%d0		|set sticky bit in d0 | 
|  | 554 | grs_clear: | 
|  | 555 | andil	#0xe0000000,%d0		|clear all but G,R,S | 
|  | 556 | movel	(%sp)+,%d2 | 
|  | 557 | rts | 
|  | 558 | | | 
|  | 559 | | case (32<=d1<64) | 
|  | 560 | | | 
|  | 561 | case_2: | 
|  | 562 | movel	%d2,-(%sp) | 
|  | 563 | movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold | 
|  | 564 | subw	#32,%d1			|d1 now between 0 and 32 | 
|  | 565 | movel	#32,%d0 | 
|  | 566 | subw	%d1,%d0			|d0 = 32 - d1 | 
|  | 567 | bfextu	LOCAL_EX(%a0){%d0:#32},%d2 | 
|  | 568 | bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO | 
|  | 569 | bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S | 
|  | 570 | bftst	%d1{#2:#30} | 
|  | 571 | bnes	c2_sstky		|bra if sticky bit to be set | 
|  | 572 | bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32} | 
|  | 573 | bnes	c2_sstky		|bra if sticky bit to be set | 
|  | 574 | movel	%d1,%d0 | 
|  | 575 | clrb	%d1 | 
|  | 576 | bras	end_c2 | 
|  | 577 | c2_sstky: | 
|  | 578 | movel	%d1,%d0 | 
|  | 579 | bsetl	#rnd_stky_bit,%d0 | 
|  | 580 | st	%d1 | 
|  | 581 | end_c2: | 
|  | 582 | clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0 | 
|  | 583 | movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO | 
|  | 584 | movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s | 
|  | 585 | andil	#0xe0000000,%d2		|clear all but G,R,S | 
|  | 586 | tstl	%d2			|test if original G,R,S are clear | 
|  | 587 | beqs	clear_grs | 
|  | 588 | orl	#0x20000000,%d0		|set sticky bit in d0 | 
|  | 589 | clear_grs: | 
|  | 590 | andil	#0xe0000000,%d0		|get rid of all but G,R,S | 
|  | 591 | movel	(%sp)+,%d2 | 
|  | 592 | rts | 
|  | 593 | | | 
|  | 594 | | d1 >= 64 Force the exponent to be the denorm threshold with the | 
|  | 595 | | correct sign. | 
|  | 596 | | | 
|  | 597 | case_3: | 
|  | 598 | movew	%d0,LOCAL_EX(%a0) | 
|  | 599 | tstw	LOCAL_SGN(%a0) | 
|  | 600 | bges	c3con | 
|  | 601 | c3neg: | 
|  | 602 | orl	#0x80000000,LOCAL_EX(%a0) | 
|  | 603 | c3con: | 
|  | 604 | cmpw	#64,%d1 | 
|  | 605 | beqs	sixty_four | 
|  | 606 | cmpw	#65,%d1 | 
|  | 607 | beqs	sixty_five | 
|  | 608 | | | 
|  | 609 | | Shift value is out of range.  Set d1 for inex2 flag and | 
|  | 610 | | return a zero with the given threshold. | 
|  | 611 | | | 
|  | 612 | clrl	LOCAL_HI(%a0) | 
|  | 613 | clrl	LOCAL_LO(%a0) | 
|  | 614 | movel	#0x20000000,%d0 | 
|  | 615 | st	%d1 | 
|  | 616 | rts | 
|  | 617 |  | 
|  | 618 | sixty_four: | 
|  | 619 | movel	LOCAL_HI(%a0),%d0 | 
|  | 620 | bfextu	%d0{#2:#30},%d1 | 
|  | 621 | andil	#0xc0000000,%d0 | 
|  | 622 | bras	c3com | 
|  | 623 |  | 
|  | 624 | sixty_five: | 
|  | 625 | movel	LOCAL_HI(%a0),%d0 | 
|  | 626 | bfextu	%d0{#1:#31},%d1 | 
|  | 627 | andil	#0x80000000,%d0 | 
|  | 628 | lsrl	#1,%d0			|shift high bit into R bit | 
|  | 629 |  | 
|  | 630 | c3com: | 
|  | 631 | tstl	%d1 | 
|  | 632 | bnes	c3ssticky | 
|  | 633 | tstl	LOCAL_LO(%a0) | 
|  | 634 | bnes	c3ssticky | 
|  | 635 | tstb	FP_SCR2+LOCAL_GRS(%a6) | 
|  | 636 | bnes	c3ssticky | 
|  | 637 | clrb	%d1 | 
|  | 638 | bras	c3end | 
|  | 639 |  | 
|  | 640 | c3ssticky: | 
|  | 641 | bsetl	#rnd_stky_bit,%d0 | 
|  | 642 | st	%d1 | 
|  | 643 | c3end: | 
|  | 644 | clrl	LOCAL_HI(%a0) | 
|  | 645 | clrl	LOCAL_LO(%a0) | 
|  | 646 | rts | 
|  | 647 |  | 
|  | 648 | |end |