| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | | 
 | 2 | |	round.sa 3.4 7/29/91 | 
 | 3 | | | 
 | 4 | |	handle rounding and normalization tasks | 
 | 5 | | | 
 | 6 | | | 
 | 7 | | | 
 | 8 | |		Copyright (C) Motorola, Inc. 1990 | 
 | 9 | |			All Rights Reserved | 
 | 10 | | | 
| Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 11 | |       For details on the license for this file, please see the | 
 | 12 | |       file, README, in this same directory. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 13 |  | 
 | 14 | |ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package | 
 | 15 |  | 
 | 16 | 	|section	8 | 
 | 17 |  | 
 | 18 | #include "fpsp.h" | 
 | 19 |  | 
 | 20 | | | 
 | 21 | |	round --- round result according to precision/mode | 
 | 22 | | | 
 | 23 | |	a0 points to the input operand in the internal extended format | 
 | 24 | |	d1(high word) contains rounding precision: | 
 | 25 | |		ext = $0000xxxx | 
 | 26 | |		sgl = $0001xxxx | 
 | 27 | |		dbl = $0002xxxx | 
 | 28 | |	d1(low word) contains rounding mode: | 
 | 29 | |		RN  = $xxxx0000 | 
 | 30 | |		RZ  = $xxxx0001 | 
 | 31 | |		RM  = $xxxx0010 | 
 | 32 | |		RP  = $xxxx0011 | 
 | 33 | |	d0{31:29} contains the g,r,s bits (extended) | 
 | 34 | | | 
 | 35 | |	On return the value pointed to by a0 is correctly rounded, | 
 | 36 | |	a0 is preserved and the g-r-s bits in d0 are cleared. | 
 | 37 | |	The result is not typed - the tag field is invalid.  The | 
 | 38 | |	result is still in the internal extended format. | 
 | 39 | | | 
 | 40 | |	The INEX bit of USER_FPSR will be set if the rounded result was | 
 | 41 | |	inexact (i.e. if any of the g-r-s bits were set). | 
 | 42 | | | 
 | 43 |  | 
 | 44 | 	.global	round | 
 | 45 | round: | 
 | 46 | | If g=r=s=0 then result is exact and round is done, else set | 
 | 47 | | the inex flag in status reg and continue. | 
 | 48 | | | 
 | 49 | 	bsrs	ext_grs			|this subroutine looks at the | 
 | 50 | |					:rounding precision and sets | 
 | 51 | |					;the appropriate g-r-s bits. | 
 | 52 | 	tstl	%d0			|if grs are zero, go force | 
 | 53 | 	bne	rnd_cont		|lower bits to zero for size | 
 | 54 |  | 
 | 55 | 	swap	%d1			|set up d1.w for round prec. | 
 | 56 | 	bra	truncate | 
 | 57 |  | 
 | 58 | rnd_cont: | 
 | 59 | | | 
 | 60 | | Use rounding mode as an index into a jump table for these modes. | 
 | 61 | | | 
 | 62 | 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
 | 63 | 	lea	mode_tab,%a1 | 
 | 64 | 	movel	(%a1,%d1.w*4),%a1 | 
 | 65 | 	jmp	(%a1) | 
 | 66 | | | 
 | 67 | | Jump table indexed by rounding mode in d1.w.  All following assumes | 
 | 68 | | grs != 0. | 
 | 69 | | | 
 | 70 | mode_tab: | 
 | 71 | 	.long	rnd_near | 
 | 72 | 	.long	rnd_zero | 
 | 73 | 	.long	rnd_mnus | 
 | 74 | 	.long	rnd_plus | 
 | 75 | | | 
 | 76 | |	ROUND PLUS INFINITY | 
 | 77 | | | 
 | 78 | |	If sign of fp number = 0 (positive), then add 1 to l. | 
 | 79 | | | 
 | 80 | rnd_plus: | 
 | 81 | 	swap	%d1			|set up d1 for round prec. | 
 | 82 | 	tstb	LOCAL_SGN(%a0)		|check for sign | 
 | 83 | 	bmi	truncate		|if positive then truncate | 
 | 84 | 	movel	#0xffffffff,%d0		|force g,r,s to be all f's | 
 | 85 | 	lea	add_to_l,%a1 | 
 | 86 | 	movel	(%a1,%d1.w*4),%a1 | 
 | 87 | 	jmp	(%a1) | 
 | 88 | | | 
 | 89 | |	ROUND MINUS INFINITY | 
 | 90 | | | 
 | 91 | |	If sign of fp number = 1 (negative), then add 1 to l. | 
 | 92 | | | 
 | 93 | rnd_mnus: | 
 | 94 | 	swap	%d1			|set up d1 for round prec. | 
 | 95 | 	tstb	LOCAL_SGN(%a0)		|check for sign | 
 | 96 | 	bpl	truncate		|if negative then truncate | 
 | 97 | 	movel	#0xffffffff,%d0		|force g,r,s to be all f's | 
 | 98 | 	lea	add_to_l,%a1 | 
 | 99 | 	movel	(%a1,%d1.w*4),%a1 | 
 | 100 | 	jmp	(%a1) | 
 | 101 | | | 
 | 102 | |	ROUND ZERO | 
 | 103 | | | 
 | 104 | |	Always truncate. | 
 | 105 | rnd_zero: | 
 | 106 | 	swap	%d1			|set up d1 for round prec. | 
 | 107 | 	bra	truncate | 
 | 108 | | | 
 | 109 | | | 
 | 110 | |	ROUND NEAREST | 
 | 111 | | | 
 | 112 | |	If (g=1), then add 1 to l and if (r=s=0), then clear l | 
 | 113 | |	Note that this will round to even in case of a tie. | 
 | 114 | | | 
 | 115 | rnd_near: | 
 | 116 | 	swap	%d1			|set up d1 for round prec. | 
 | 117 | 	asll	#1,%d0			|shift g-bit to c-bit | 
 | 118 | 	bcc	truncate		|if (g=1) then | 
 | 119 | 	lea	add_to_l,%a1 | 
 | 120 | 	movel	(%a1,%d1.w*4),%a1 | 
 | 121 | 	jmp	(%a1) | 
 | 122 |  | 
 | 123 | | | 
 | 124 | |	ext_grs --- extract guard, round and sticky bits | 
 | 125 | | | 
 | 126 | | Input:	d1 =		PREC:ROUND | 
 | 127 | | Output:	d0{31:29}=	guard, round, sticky | 
 | 128 | | | 
 | 129 | | The ext_grs extract the guard/round/sticky bits according to the | 
 | 130 | | selected rounding precision. It is called by the round subroutine | 
 | 131 | | only.  All registers except d0 are kept intact. d0 becomes an | 
 | 132 | | updated guard,round,sticky in d0{31:29} | 
 | 133 | | | 
 | 134 | | Notes: the ext_grs uses the round PREC, and therefore has to swap d1 | 
 | 135 | |	 prior to usage, and needs to restore d1 to original. | 
 | 136 | | | 
 | 137 | ext_grs: | 
 | 138 | 	swap	%d1			|have d1.w point to round precision | 
 | 139 | 	cmpiw	#0,%d1 | 
 | 140 | 	bnes	sgl_or_dbl | 
 | 141 | 	bras	end_ext_grs | 
 | 142 |  | 
 | 143 | sgl_or_dbl: | 
 | 144 | 	moveml	%d2/%d3,-(%a7)		|make some temp registers | 
 | 145 | 	cmpiw	#1,%d1 | 
 | 146 | 	bnes	grs_dbl | 
 | 147 | grs_sgl: | 
 | 148 | 	bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right | 
 | 149 | 	movel	#30,%d2			|of the sgl prec. limits | 
 | 150 | 	lsll	%d2,%d3			|shift g-r bits to MSB of d3 | 
 | 151 | 	movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test | 
 | 152 | 	andil	#0x0000003f,%d2		|s bit is the or of all other | 
 | 153 | 	bnes	st_stky			|bits to the right of g-r | 
 | 154 | 	tstl	LOCAL_LO(%a0)		|test lower mantissa | 
 | 155 | 	bnes	st_stky			|if any are set, set sticky | 
 | 156 | 	tstl	%d0			|test original g,r,s | 
 | 157 | 	bnes	st_stky			|if any are set, set sticky | 
 | 158 | 	bras	end_sd			|if words 3 and 4 are clr, exit | 
 | 159 | grs_dbl: | 
 | 160 | 	bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right | 
 | 161 | 	movel	#30,%d2			|of the dbl prec. limits | 
 | 162 | 	lsll	%d2,%d3			|shift g-r bits to the MSB of d3 | 
 | 163 | 	movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test | 
 | 164 | 	andil	#0x000001ff,%d2		|s bit is the or-ing of all | 
 | 165 | 	bnes	st_stky			|other bits to the right of g-r | 
 | 166 | 	tstl	%d0			|test word original g,r,s | 
 | 167 | 	bnes	st_stky			|if any are set, set sticky | 
 | 168 | 	bras	end_sd			|if clear, exit | 
 | 169 | st_stky: | 
 | 170 | 	bset	#rnd_stky_bit,%d3 | 
 | 171 | end_sd: | 
 | 172 | 	movel	%d3,%d0			|return grs to d0 | 
 | 173 | 	moveml	(%a7)+,%d2/%d3		|restore scratch registers | 
 | 174 | end_ext_grs: | 
 | 175 | 	swap	%d1			|restore d1 to original | 
 | 176 | 	rts | 
 | 177 |  | 
 | 178 | |*******************  Local Equates | 
 | 179 | 	.set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec | 
 | 180 | 	.set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec | 
 | 181 |  | 
 | 182 |  | 
 | 183 | |Jump table for adding 1 to the l-bit indexed by rnd prec | 
 | 184 |  | 
 | 185 | add_to_l: | 
 | 186 | 	.long	add_ext | 
 | 187 | 	.long	add_sgl | 
 | 188 | 	.long	add_dbl | 
 | 189 | 	.long	add_dbl | 
 | 190 | | | 
 | 191 | |	ADD SINGLE | 
 | 192 | | | 
 | 193 | add_sgl: | 
 | 194 | 	addl	#ad_1_sgl,LOCAL_HI(%a0) | 
 | 195 | 	bccs	scc_clr			|no mantissa overflow | 
 | 196 | 	roxrw  LOCAL_HI(%a0)		|shift v-bit back in | 
 | 197 | 	roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in | 
 | 198 | 	addw	#0x1,LOCAL_EX(%a0)	|and incr exponent | 
 | 199 | scc_clr: | 
 | 200 | 	tstl	%d0			|test for rs = 0 | 
 | 201 | 	bnes	sgl_done | 
 | 202 | 	andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit | 
 | 203 | sgl_done: | 
 | 204 | 	andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit | 
 | 205 | 	clrl	LOCAL_LO(%a0)		|clear d2 | 
 | 206 | 	rts | 
 | 207 |  | 
 | 208 | | | 
 | 209 | |	ADD EXTENDED | 
 | 210 | | | 
 | 211 | add_ext: | 
 | 212 | 	addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit | 
 | 213 | 	bccs	xcc_clr			|test for carry out | 
 | 214 | 	addql  #1,LOCAL_HI(%a0)		|propagate carry | 
 | 215 | 	bccs	xcc_clr | 
 | 216 | 	roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit | 
 | 217 | 	roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit | 
 | 218 | 	roxrw	LOCAL_LO(%a0) | 
 | 219 | 	roxrw	LOCAL_LO+2(%a0) | 
 | 220 | 	addw	#0x1,LOCAL_EX(%a0)	|and inc exp | 
 | 221 | xcc_clr: | 
 | 222 | 	tstl	%d0			|test rs = 0 | 
 | 223 | 	bnes	add_ext_done | 
 | 224 | 	andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit | 
 | 225 | add_ext_done: | 
 | 226 | 	rts | 
 | 227 | | | 
 | 228 | |	ADD DOUBLE | 
 | 229 | | | 
 | 230 | add_dbl: | 
 | 231 | 	addl	#ad_1_dbl,LOCAL_LO(%a0) | 
 | 232 | 	bccs	dcc_clr | 
 | 233 | 	addql	#1,LOCAL_HI(%a0)		|propagate carry | 
 | 234 | 	bccs	dcc_clr | 
 | 235 | 	roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit | 
 | 236 | 	roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit | 
 | 237 | 	roxrw	LOCAL_LO(%a0) | 
 | 238 | 	roxrw	LOCAL_LO+2(%a0) | 
 | 239 | 	addw	#0x1,LOCAL_EX(%a0)	|incr exponent | 
 | 240 | dcc_clr: | 
 | 241 | 	tstl	%d0			|test for rs = 0 | 
 | 242 | 	bnes	dbl_done | 
 | 243 | 	andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit | 
 | 244 |  | 
 | 245 | dbl_done: | 
 | 246 | 	andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit | 
 | 247 | 	rts | 
 | 248 |  | 
 | 249 | error: | 
 | 250 | 	rts | 
 | 251 | | | 
 | 252 | | Truncate all other bits | 
 | 253 | | | 
 | 254 | trunct: | 
 | 255 | 	.long	end_rnd | 
 | 256 | 	.long	sgl_done | 
 | 257 | 	.long	dbl_done | 
 | 258 | 	.long	dbl_done | 
 | 259 |  | 
 | 260 | truncate: | 
 | 261 | 	lea	trunct,%a1 | 
 | 262 | 	movel	(%a1,%d1.w*4),%a1 | 
 | 263 | 	jmp	(%a1) | 
 | 264 |  | 
 | 265 | end_rnd: | 
 | 266 | 	rts | 
 | 267 |  | 
 | 268 | | | 
 | 269 | |	NORMALIZE | 
 | 270 | | | 
 | 271 | | These routines (nrm_zero & nrm_set) normalize the unnorm.  This | 
 | 272 | | is done by shifting the mantissa left while decrementing the | 
 | 273 | | exponent. | 
 | 274 | | | 
 | 275 | | NRM_SET shifts and decrements until there is a 1 set in the integer | 
 | 276 | | bit of the mantissa (msb in d1). | 
 | 277 | | | 
 | 278 | | NRM_ZERO shifts and decrements until there is a 1 set in the integer | 
 | 279 | | bit of the mantissa (msb in d1) unless this would mean the exponent | 
 | 280 | | would go less than 0.  In that case the number becomes a denorm - the | 
 | 281 | | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not | 
 | 282 | | normalized. | 
 | 283 | | | 
 | 284 | | Note that both routines have been optimized (for the worst case) and | 
 | 285 | | therefore do not have the easy to follow decrement/shift loop. | 
 | 286 | | | 
 | 287 | |	NRM_ZERO | 
 | 288 | | | 
 | 289 | |	Distance to first 1 bit in mantissa = X | 
 | 290 | |	Distance to 0 from exponent = Y | 
 | 291 | |	If X < Y | 
 | 292 | |	Then | 
 | 293 | |	  nrm_set | 
 | 294 | |	Else | 
 | 295 | |	  shift mantissa by Y | 
 | 296 | |	  set exponent = 0 | 
 | 297 | | | 
 | 298 | |input: | 
 | 299 | |	FP_SCR1 = exponent, ms mantissa part, ls mantissa part | 
 | 300 | |output: | 
 | 301 | |	L_SCR1{4} = fpte15 or ete15 bit | 
 | 302 | | | 
 | 303 | 	.global	nrm_zero | 
 | 304 | nrm_zero: | 
 | 305 | 	movew	LOCAL_EX(%a0),%d0 | 
 | 306 | 	cmpw   #64,%d0          |see if exp > 64 | 
 | 307 | 	bmis	d0_less | 
 | 308 | 	bsr	nrm_set		|exp > 64 so exp won't exceed 0 | 
 | 309 | 	rts | 
 | 310 | d0_less: | 
 | 311 | 	moveml	%d2/%d3/%d5/%d6,-(%a7) | 
 | 312 | 	movel	LOCAL_HI(%a0),%d1 | 
 | 313 | 	movel	LOCAL_LO(%a0),%d2 | 
 | 314 |  | 
 | 315 | 	bfffo	%d1{#0:#32},%d3	|get the distance to the first 1 | 
 | 316 | |				;in ms mant | 
 | 317 | 	beqs	ms_clr		|branch if no bits were set | 
 | 318 | 	cmpw	%d3,%d0		|of X>Y | 
 | 319 | 	bmis	greater		|then exp will go past 0 (neg) if | 
 | 320 | |				;it is just shifted | 
 | 321 | 	bsr	nrm_set		|else exp won't go past 0 | 
 | 322 | 	moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
 | 323 | 	rts | 
 | 324 | greater: | 
 | 325 | 	movel	%d2,%d6		|save ls mant in d6 | 
 | 326 | 	lsll	%d0,%d2		|shift ls mant by count | 
 | 327 | 	lsll	%d0,%d1		|shift ms mant by count | 
 | 328 | 	movel	#32,%d5 | 
 | 329 | 	subl	%d0,%d5		|make op a denorm by shifting bits | 
 | 330 | 	lsrl	%d5,%d6		|by the number in the exp, then | 
 | 331 | |				;set exp = 0. | 
 | 332 | 	orl	%d6,%d1		|shift the ls mant bits into the ms mant | 
 | 333 | 	movel	#0,%d0		|same as if decremented exp to 0 | 
 | 334 | |				;while shifting | 
 | 335 | 	movew	%d0,LOCAL_EX(%a0) | 
 | 336 | 	movel	%d1,LOCAL_HI(%a0) | 
 | 337 | 	movel	%d2,LOCAL_LO(%a0) | 
 | 338 | 	moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
 | 339 | 	rts | 
 | 340 | ms_clr: | 
 | 341 | 	bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant | 
 | 342 | 	beqs	all_clr		|branch if none set | 
 | 343 | 	addw	#32,%d3 | 
 | 344 | 	cmpw	%d3,%d0		|if X>Y | 
 | 345 | 	bmis	greater		|then branch | 
 | 346 | 	bsr	nrm_set		|else exp won't go past 0 | 
 | 347 | 	moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
 | 348 | 	rts | 
 | 349 | all_clr: | 
 | 350 | 	movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0. | 
 | 351 | 	moveml	(%a7)+,%d2/%d3/%d5/%d6 | 
 | 352 | 	rts | 
 | 353 | | | 
 | 354 | |	NRM_SET | 
 | 355 | | | 
 | 356 | 	.global	nrm_set | 
 | 357 | nrm_set: | 
 | 358 | 	movel	%d7,-(%a7) | 
 | 359 | 	bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) | 
 | 360 | 	beqs	lower		|branch if ms mant is all 0's | 
 | 361 |  | 
 | 362 | 	movel	%d6,-(%a7) | 
 | 363 |  | 
 | 364 | 	subw	%d7,LOCAL_EX(%a0)	|sub exponent by count | 
 | 365 | 	movel	LOCAL_HI(%a0),%d0	|d0 has ms mant | 
 | 366 | 	movel	LOCAL_LO(%a0),%d1 |d1 has ls mant | 
 | 367 |  | 
 | 368 | 	lsll	%d7,%d0		|shift first 1 to j bit position | 
 | 369 | 	movel	%d1,%d6		|copy ls mant into d6 | 
 | 370 | 	lsll	%d7,%d6		|shift ls mant by count | 
 | 371 | 	movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory | 
 | 372 | 	moveql	#32,%d6 | 
 | 373 | 	subl	%d7,%d6		|continue shift | 
 | 374 | 	lsrl	%d6,%d1		|shift off all bits but those that will | 
 | 375 | |				;be shifted into ms mant | 
 | 376 | 	orl	%d1,%d0		|shift the ls mant bits into the ms mant | 
 | 377 | 	movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory | 
 | 378 | 	moveml	(%a7)+,%d7/%d6	|restore registers | 
 | 379 | 	rts | 
 | 380 |  | 
 | 381 | | | 
 | 382 | | We get here if ms mant was = 0, and we assume ls mant has bits | 
 | 383 | | set (otherwise this would have been tagged a zero not a denorm). | 
 | 384 | | | 
 | 385 | lower: | 
 | 386 | 	movew	LOCAL_EX(%a0),%d0	|d0 has exponent | 
 | 387 | 	movel	LOCAL_LO(%a0),%d1	|d1 has ls mant | 
 | 388 | 	subw	#32,%d0		|account for ms mant being all zeros | 
 | 389 | 	bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7) | 
 | 390 | 	subw	%d7,%d0		|subtract shift count from exp | 
 | 391 | 	lsll	%d7,%d1		|shift first 1 to integer bit in ms mant | 
 | 392 | 	movew	%d0,LOCAL_EX(%a0)	|store ms mant | 
 | 393 | 	movel	%d1,LOCAL_HI(%a0)	|store exp | 
 | 394 | 	clrl	LOCAL_LO(%a0)	|clear ls mant | 
 | 395 | 	movel	(%a7)+,%d7 | 
 | 396 | 	rts | 
 | 397 | | | 
 | 398 | |	denorm --- denormalize an intermediate result | 
 | 399 | | | 
 | 400 | |	Used by underflow. | 
 | 401 | | | 
 | 402 | | Input: | 
 | 403 | |	a0	 points to the operand to be denormalized | 
 | 404 | |		 (in the internal extended format) | 
 | 405 | | | 
 | 406 | |	d0:	 rounding precision | 
 | 407 | | Output: | 
 | 408 | |	a0	 points to the denormalized result | 
 | 409 | |		 (in the internal extended format) | 
 | 410 | | | 
 | 411 | |	d0	is guard,round,sticky | 
 | 412 | | | 
 | 413 | | d0 comes into this routine with the rounding precision. It | 
 | 414 | | is then loaded with the denormalized exponent threshold for the | 
 | 415 | | rounding precision. | 
 | 416 | | | 
 | 417 |  | 
 | 418 | 	.global	denorm | 
 | 419 | denorm: | 
 | 420 | 	btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000 | 
 | 421 | 	beqs	no_sgn_ext | 
 | 422 | 	bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so | 
 | 423 | no_sgn_ext: | 
 | 424 |  | 
 | 425 | 	cmpib	#0,%d0		|if 0 then extended precision | 
 | 426 | 	bnes	not_ext		|else branch | 
 | 427 |  | 
 | 428 | 	clrl	%d1		|load d1 with ext threshold | 
 | 429 | 	clrl	%d0		|clear the sticky flag | 
 | 430 | 	bsr	dnrm_lp		|denormalize the number | 
 | 431 | 	tstb	%d1		|check for inex | 
 | 432 | 	beq	no_inex		|if clr, no inex | 
 | 433 | 	bras	dnrm_inex	|if set, set inex | 
 | 434 |  | 
 | 435 | not_ext: | 
 | 436 | 	cmpil	#1,%d0		|if 1 then single precision | 
 | 437 | 	beqs	load_sgl	|else must be 2, double prec | 
 | 438 |  | 
 | 439 | load_dbl: | 
 | 440 | 	movew	#dbl_thresh,%d1	|put copy of threshold in d1 | 
 | 441 | 	movel	%d1,%d0		|copy d1 into d0 | 
 | 442 | 	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp | 
 | 443 | 	cmpw	#67,%d0		|if diff > 67 (mant + grs bits) | 
 | 444 | 	bpls	chk_stky	|then branch (all bits would be | 
 | 445 | |				; shifted off in denorm routine) | 
 | 446 | 	clrl	%d0		|else clear the sticky flag | 
 | 447 | 	bsr	dnrm_lp		|denormalize the number | 
 | 448 | 	tstb	%d1		|check flag | 
 | 449 | 	beqs	no_inex		|if clr, no inex | 
 | 450 | 	bras	dnrm_inex	|if set, set inex | 
 | 451 |  | 
 | 452 | load_sgl: | 
 | 453 | 	movew	#sgl_thresh,%d1	|put copy of threshold in d1 | 
 | 454 | 	movel	%d1,%d0		|copy d1 into d0 | 
 | 455 | 	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp | 
 | 456 | 	cmpw	#67,%d0		|if diff > 67 (mant + grs bits) | 
 | 457 | 	bpls	chk_stky	|then branch (all bits would be | 
 | 458 | |				; shifted off in denorm routine) | 
 | 459 | 	clrl	%d0		|else clear the sticky flag | 
 | 460 | 	bsr	dnrm_lp		|denormalize the number | 
 | 461 | 	tstb	%d1		|check flag | 
 | 462 | 	beqs	no_inex		|if clr, no inex | 
 | 463 | 	bras	dnrm_inex	|if set, set inex | 
 | 464 |  | 
 | 465 | chk_stky: | 
 | 466 | 	tstl	LOCAL_HI(%a0)	|check for any bits set | 
 | 467 | 	bnes	set_stky | 
 | 468 | 	tstl	LOCAL_LO(%a0)	|check for any bits set | 
 | 469 | 	bnes	set_stky | 
 | 470 | 	bras	clr_mant | 
 | 471 | set_stky: | 
 | 472 | 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
 | 473 | 	movel	#0x20000000,%d0	|set sticky bit in return value | 
 | 474 | clr_mant: | 
 | 475 | 	movew	%d1,LOCAL_EX(%a0)		|load exp with threshold | 
 | 476 | 	movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa) | 
 | 477 | 	movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa) | 
 | 478 | 	rts | 
 | 479 | dnrm_inex: | 
 | 480 | 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | 
 | 481 | no_inex: | 
 | 482 | 	rts | 
 | 483 |  | 
 | 484 | | | 
 | 485 | |	dnrm_lp --- normalize exponent/mantissa to specified threshold | 
 | 486 | | | 
 | 487 | | Input: | 
 | 488 | |	a0		points to the operand to be denormalized | 
 | 489 | |	d0{31:29}	initial guard,round,sticky | 
 | 490 | |	d1{15:0}	denormalization threshold | 
 | 491 | | Output: | 
 | 492 | |	a0		points to the denormalized operand | 
 | 493 | |	d0{31:29}	final guard,round,sticky | 
 | 494 | |	d1.b		inexact flag:  all ones means inexact result | 
 | 495 | | | 
 | 496 | | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 | 
 | 497 | | so that bfext can be used to extract the new low part of the mantissa. | 
 | 498 | | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there | 
 | 499 | | is no LOCAL_GRS scratch word following it on the fsave frame. | 
 | 500 | | | 
 | 501 | 	.global	dnrm_lp | 
 | 502 | dnrm_lp: | 
 | 503 | 	movel	%d2,-(%sp)		|save d2 for temp use | 
 | 504 | 	btstb	#E3,E_BYTE(%a6)		|test for type E3 exception | 
 | 505 | 	beqs	not_E3			|not type E3 exception | 
 | 506 | 	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit | 
 | 507 | 	movel	#29,%d0 | 
 | 508 | 	lsll	%d0,%d2			|shift g,r,s to their positions | 
 | 509 | 	movel	%d2,%d0 | 
 | 510 | not_E3: | 
 | 511 | 	movel	(%sp)+,%d2		|restore d2 | 
 | 512 | 	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) | 
 | 513 | 	movel	%d0,FP_SCR2+LOCAL_GRS(%a6) | 
 | 514 | 	movel	%d1,%d0			|copy the denorm threshold | 
 | 515 | 	subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent | 
 | 516 | 	bles	no_lp			|d1 <= 0 | 
 | 517 | 	cmpw	#32,%d1 | 
 | 518 | 	blts	case_1			|0 = d1 < 32 | 
 | 519 | 	cmpw	#64,%d1 | 
 | 520 | 	blts	case_2			|32 <= d1 < 64 | 
 | 521 | 	bra	case_3			|d1 >= 64 | 
 | 522 | | | 
 | 523 | | No normalization necessary | 
 | 524 | | | 
 | 525 | no_lp: | 
 | 526 | 	clrb	%d1			|set no inex2 reported | 
 | 527 | 	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s | 
 | 528 | 	rts | 
 | 529 | | | 
 | 530 | | case (0<d1<32) | 
 | 531 | | | 
 | 532 | case_1: | 
 | 533 | 	movel	%d2,-(%sp) | 
 | 534 | 	movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold | 
 | 535 | 	movel	#32,%d0 | 
 | 536 | 	subw	%d1,%d0			|d0 = 32 - d1 | 
 | 537 | 	bfextu	LOCAL_EX(%a0){%d0:#32},%d2 | 
 | 538 | 	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI | 
 | 539 | 	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO | 
 | 540 | 	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S | 
 | 541 | 	movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI | 
 | 542 | 	movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO | 
 | 543 | 	clrb	%d1 | 
 | 544 | 	bftst	%d0{#2:#30} | 
 | 545 | 	beqs	c1nstky | 
 | 546 | 	bsetl	#rnd_stky_bit,%d0 | 
 | 547 | 	st	%d1 | 
 | 548 | c1nstky: | 
 | 549 | 	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s | 
 | 550 | 	andil	#0xe0000000,%d2		|clear all but G,R,S | 
 | 551 | 	tstl	%d2			|test if original G,R,S are clear | 
 | 552 | 	beqs	grs_clear | 
 | 553 | 	orl	#0x20000000,%d0		|set sticky bit in d0 | 
 | 554 | grs_clear: | 
 | 555 | 	andil	#0xe0000000,%d0		|clear all but G,R,S | 
 | 556 | 	movel	(%sp)+,%d2 | 
 | 557 | 	rts | 
 | 558 | | | 
 | 559 | | case (32<=d1<64) | 
 | 560 | | | 
 | 561 | case_2: | 
 | 562 | 	movel	%d2,-(%sp) | 
 | 563 | 	movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold | 
 | 564 | 	subw	#32,%d1			|d1 now between 0 and 32 | 
 | 565 | 	movel	#32,%d0 | 
 | 566 | 	subw	%d1,%d0			|d0 = 32 - d1 | 
 | 567 | 	bfextu	LOCAL_EX(%a0){%d0:#32},%d2 | 
 | 568 | 	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO | 
 | 569 | 	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S | 
 | 570 | 	bftst	%d1{#2:#30} | 
 | 571 | 	bnes	c2_sstky		|bra if sticky bit to be set | 
 | 572 | 	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32} | 
 | 573 | 	bnes	c2_sstky		|bra if sticky bit to be set | 
 | 574 | 	movel	%d1,%d0 | 
 | 575 | 	clrb	%d1 | 
 | 576 | 	bras	end_c2 | 
 | 577 | c2_sstky: | 
 | 578 | 	movel	%d1,%d0 | 
 | 579 | 	bsetl	#rnd_stky_bit,%d0 | 
 | 580 | 	st	%d1 | 
 | 581 | end_c2: | 
 | 582 | 	clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0 | 
 | 583 | 	movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO | 
 | 584 | 	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s | 
 | 585 | 	andil	#0xe0000000,%d2		|clear all but G,R,S | 
 | 586 | 	tstl	%d2			|test if original G,R,S are clear | 
 | 587 | 	beqs	clear_grs | 
 | 588 | 	orl	#0x20000000,%d0		|set sticky bit in d0 | 
 | 589 | clear_grs: | 
 | 590 | 	andil	#0xe0000000,%d0		|get rid of all but G,R,S | 
 | 591 | 	movel	(%sp)+,%d2 | 
 | 592 | 	rts | 
 | 593 | | | 
 | 594 | | d1 >= 64 Force the exponent to be the denorm threshold with the | 
 | 595 | | correct sign. | 
 | 596 | | | 
 | 597 | case_3: | 
 | 598 | 	movew	%d0,LOCAL_EX(%a0) | 
 | 599 | 	tstw	LOCAL_SGN(%a0) | 
 | 600 | 	bges	c3con | 
 | 601 | c3neg: | 
 | 602 | 	orl	#0x80000000,LOCAL_EX(%a0) | 
 | 603 | c3con: | 
 | 604 | 	cmpw	#64,%d1 | 
 | 605 | 	beqs	sixty_four | 
 | 606 | 	cmpw	#65,%d1 | 
 | 607 | 	beqs	sixty_five | 
 | 608 | | | 
 | 609 | | Shift value is out of range.  Set d1 for inex2 flag and | 
 | 610 | | return a zero with the given threshold. | 
 | 611 | | | 
 | 612 | 	clrl	LOCAL_HI(%a0) | 
 | 613 | 	clrl	LOCAL_LO(%a0) | 
 | 614 | 	movel	#0x20000000,%d0 | 
 | 615 | 	st	%d1 | 
 | 616 | 	rts | 
 | 617 |  | 
 | 618 | sixty_four: | 
 | 619 | 	movel	LOCAL_HI(%a0),%d0 | 
 | 620 | 	bfextu	%d0{#2:#30},%d1 | 
 | 621 | 	andil	#0xc0000000,%d0 | 
 | 622 | 	bras	c3com | 
 | 623 |  | 
 | 624 | sixty_five: | 
 | 625 | 	movel	LOCAL_HI(%a0),%d0 | 
 | 626 | 	bfextu	%d0{#1:#31},%d1 | 
 | 627 | 	andil	#0x80000000,%d0 | 
 | 628 | 	lsrl	#1,%d0			|shift high bit into R bit | 
 | 629 |  | 
 | 630 | c3com: | 
 | 631 | 	tstl	%d1 | 
 | 632 | 	bnes	c3ssticky | 
 | 633 | 	tstl	LOCAL_LO(%a0) | 
 | 634 | 	bnes	c3ssticky | 
 | 635 | 	tstb	FP_SCR2+LOCAL_GRS(%a6) | 
 | 636 | 	bnes	c3ssticky | 
 | 637 | 	clrb	%d1 | 
 | 638 | 	bras	c3end | 
 | 639 |  | 
 | 640 | c3ssticky: | 
 | 641 | 	bsetl	#rnd_stky_bit,%d0 | 
 | 642 | 	st	%d1 | 
 | 643 | c3end: | 
 | 644 | 	clrl	LOCAL_HI(%a0) | 
 | 645 | 	clrl	LOCAL_LO(%a0) | 
 | 646 | 	rts | 
 | 647 |  | 
 | 648 | 	|end |