| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | | 
|  | 2 | |	x_unfl.sa 3.4 7/1/91 | 
|  | 3 | | | 
|  | 4 | |	fpsp_unfl --- FPSP handler for underflow exception | 
|  | 5 | | | 
|  | 6 | | Trap disabled results | 
|  | 7 | |	For 881/2 compatibility, sw must denormalize the intermediate | 
|  | 8 | | result, then store the result.  Denormalization is accomplished | 
|  | 9 | | by taking the intermediate result (which is always normalized) and | 
|  | 10 | | shifting the mantissa right while incrementing the exponent until | 
|  | 11 | | it is equal to the denormalized exponent for the destination | 
|  | 12 | | format.  After denormalization, the result is rounded to the | 
|  | 13 | | destination format. | 
|  | 14 | | | 
|  | 15 | | Trap enabled results | 
|  | 16 | |	All trap disabled code applies.	In addition the exceptional | 
|  | 17 | | operand needs to made available to the user with a bias of $6000 | 
|  | 18 | | added to the exponent. | 
|  | 19 | | | 
|  | 20 |  | 
|  | 21 | |		Copyright (C) Motorola, Inc. 1990 | 
|  | 22 | |			All Rights Reserved | 
|  | 23 | | | 
| Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 24 | |       For details on the license for this file, please see the | 
|  | 25 | |       file, README, in this same directory. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 |  | 
|  | 27 | X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package | 
|  | 28 |  | 
|  | 29 | |section	8 | 
|  | 30 |  | 
|  | 31 | #include "fpsp.h" | 
|  | 32 |  | 
|  | 33 | |xref	denorm | 
|  | 34 | |xref	round | 
|  | 35 | |xref	store | 
|  | 36 | |xref	g_rndpr | 
|  | 37 | |xref	g_opcls | 
|  | 38 | |xref	g_dfmtou | 
|  | 39 | |xref	real_unfl | 
|  | 40 | |xref	real_inex | 
|  | 41 | |xref	fpsp_done | 
|  | 42 | |xref	b1238_fix | 
|  | 43 |  | 
|  | 44 | .global	fpsp_unfl | 
|  | 45 | fpsp_unfl: | 
|  | 46 | link		%a6,#-LOCAL_SIZE | 
|  | 47 | fsave		-(%a7) | 
|  | 48 | moveml		%d0-%d1/%a0-%a1,USER_DA(%a6) | 
|  | 49 | fmovemx	%fp0-%fp3,USER_FP0(%a6) | 
|  | 50 | fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | 
|  | 51 |  | 
|  | 52 | | | 
|  | 53 | bsrl		unf_res	|denormalize, round & store interm op | 
|  | 54 | | | 
|  | 55 | | If underflow exceptions are not enabled, check for inexact | 
|  | 56 | | exception | 
|  | 57 | | | 
|  | 58 | btstb		#unfl_bit,FPCR_ENABLE(%a6) | 
|  | 59 | beqs		ck_inex | 
|  | 60 |  | 
|  | 61 | btstb		#E3,E_BYTE(%a6) | 
|  | 62 | beqs		no_e3_1 | 
|  | 63 | | | 
|  | 64 | | Clear dirty bit on dest resister in the frame before branching | 
|  | 65 | | to b1238_fix. | 
|  | 66 | | | 
|  | 67 | bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no | 
|  | 68 | bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit | 
|  | 69 | bsrl		b1238_fix		|test for bug1238 case | 
|  | 70 | movel		USER_FPSR(%a6),FPSR_SHADOW(%a6) | 
|  | 71 | orl		#sx_mask,E_BYTE(%a6) | 
|  | 72 | no_e3_1: | 
|  | 73 | moveml		USER_DA(%a6),%d0-%d1/%a0-%a1 | 
|  | 74 | fmovemx	USER_FP0(%a6),%fp0-%fp3 | 
|  | 75 | fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | 
|  | 76 | frestore	(%a7)+ | 
|  | 77 | unlk		%a6 | 
|  | 78 | bral		real_unfl | 
|  | 79 | | | 
|  | 80 | | It is possible to have either inex2 or inex1 exceptions with the | 
|  | 81 | | unfl.  If the inex enable bit is set in the FPCR, and either | 
|  | 82 | | inex2 or inex1 occurred, we must clean up and branch to the | 
|  | 83 | | real inex handler. | 
|  | 84 | | | 
|  | 85 | ck_inex: | 
|  | 86 | moveb		FPCR_ENABLE(%a6),%d0 | 
|  | 87 | andb		FPSR_EXCEPT(%a6),%d0 | 
|  | 88 | andib		#0x3,%d0 | 
|  | 89 | beqs		unfl_done | 
|  | 90 |  | 
|  | 91 | | | 
|  | 92 | | Inexact enabled and reported, and we must take an inexact exception | 
|  | 93 | | | 
|  | 94 | take_inex: | 
|  | 95 | btstb		#E3,E_BYTE(%a6) | 
|  | 96 | beqs		no_e3_2 | 
|  | 97 | | | 
|  | 98 | | Clear dirty bit on dest resister in the frame before branching | 
|  | 99 | | to b1238_fix. | 
|  | 100 | | | 
|  | 101 | bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no | 
|  | 102 | bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit | 
|  | 103 | bsrl		b1238_fix		|test for bug1238 case | 
|  | 104 | movel		USER_FPSR(%a6),FPSR_SHADOW(%a6) | 
|  | 105 | orl		#sx_mask,E_BYTE(%a6) | 
|  | 106 | no_e3_2: | 
|  | 107 | moveb		#INEX_VEC,EXC_VEC+1(%a6) | 
|  | 108 | moveml         USER_DA(%a6),%d0-%d1/%a0-%a1 | 
|  | 109 | fmovemx        USER_FP0(%a6),%fp0-%fp3 | 
|  | 110 | fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | 
|  | 111 | frestore        (%a7)+ | 
|  | 112 | unlk            %a6 | 
|  | 113 | bral		real_inex | 
|  | 114 |  | 
|  | 115 | unfl_done: | 
|  | 116 | bclrb		#E3,E_BYTE(%a6) | 
|  | 117 | beqs		e1_set		|if set then branch | 
|  | 118 | | | 
|  | 119 | | Clear dirty bit on dest resister in the frame before branching | 
|  | 120 | | to b1238_fix. | 
|  | 121 | | | 
|  | 122 | bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no | 
|  | 123 | bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit | 
|  | 124 | bsrl		b1238_fix		|test for bug1238 case | 
|  | 125 | movel		USER_FPSR(%a6),FPSR_SHADOW(%a6) | 
|  | 126 | orl		#sx_mask,E_BYTE(%a6) | 
|  | 127 | moveml		USER_DA(%a6),%d0-%d1/%a0-%a1 | 
|  | 128 | fmovemx	USER_FP0(%a6),%fp0-%fp3 | 
|  | 129 | fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | 
|  | 130 | frestore	(%a7)+ | 
|  | 131 | unlk		%a6 | 
|  | 132 | bral		fpsp_done | 
|  | 133 | e1_set: | 
|  | 134 | moveml		USER_DA(%a6),%d0-%d1/%a0-%a1 | 
|  | 135 | fmovemx	USER_FP0(%a6),%fp0-%fp3 | 
|  | 136 | fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | 
|  | 137 | unlk		%a6 | 
|  | 138 | bral		fpsp_done | 
|  | 139 | | | 
|  | 140 | |	unf_res --- underflow result calculation | 
|  | 141 | | | 
|  | 142 | unf_res: | 
|  | 143 | bsrl		g_rndpr		|returns RND_PREC in d0 0=ext, | 
|  | 144 | |					;1=sgl, 2=dbl | 
|  | 145 | |					;we need the RND_PREC in the | 
|  | 146 | |					;upper word for round | 
|  | 147 | movew		#0,-(%a7) | 
|  | 148 | movew		%d0,-(%a7)	|copy RND_PREC to stack | 
|  | 149 | | | 
|  | 150 | | | 
|  | 151 | | If the exception bit set is E3, the exceptional operand from the | 
|  | 152 | | fpu is in WBTEMP; else it is in FPTEMP. | 
|  | 153 | | | 
|  | 154 | btstb		#E3,E_BYTE(%a6) | 
|  | 155 | beqs		unf_E1 | 
|  | 156 | unf_E3: | 
|  | 157 | lea		WBTEMP(%a6),%a0	|a0 now points to operand | 
|  | 158 | | | 
|  | 159 | | Test for fsgldiv and fsglmul.  If the inst was one of these, then | 
|  | 160 | | force the precision to extended for the denorm routine.  Use | 
|  | 161 | | the user's precision for the round routine. | 
|  | 162 | | | 
|  | 163 | movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul | 
|  | 164 | andiw		#0x7f,%d1 | 
|  | 165 | cmpiw		#0x30,%d1		|check for sgldiv | 
|  | 166 | beqs		unf_sgl | 
|  | 167 | cmpiw		#0x33,%d1		|check for sglmul | 
|  | 168 | bnes		unf_cont	|if not, use fpcr prec in round | 
|  | 169 | unf_sgl: | 
|  | 170 | clrl		%d0 | 
|  | 171 | movew		#0x1,(%a7)	|override g_rndpr precision | 
|  | 172 | |					;force single | 
|  | 173 | bras		unf_cont | 
|  | 174 | unf_E1: | 
|  | 175 | lea		FPTEMP(%a6),%a0	|a0 now points to operand | 
|  | 176 | unf_cont: | 
|  | 177 | bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit | 
|  | 178 | sne		LOCAL_SGN(%a0)		|store sign | 
|  | 179 |  | 
|  | 180 | bsrl		denorm		|returns denorm, a0 points to it | 
|  | 181 | | | 
|  | 182 | | WARNING: | 
|  | 183 | |				;d0 has guard,round sticky bit | 
|  | 184 | |				;make sure that it is not corrupted | 
|  | 185 | |				;before it reaches the round subroutine | 
|  | 186 | |				;also ensure that a0 isn't corrupted | 
|  | 187 |  | 
|  | 188 | | | 
|  | 189 | | Set up d1 for round subroutine d1 contains the PREC/MODE | 
|  | 190 | | information respectively on upper/lower register halves. | 
|  | 191 | | | 
|  | 192 | bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR | 
|  | 193 | |						;mode in lower d1 | 
|  | 194 | addl		(%a7)+,%d1		|merge PREC/MODE | 
|  | 195 | | | 
|  | 196 | | WARNING: a0 and d0 are assumed to be intact between the denorm and | 
|  | 197 | | round subroutines. All code between these two subroutines | 
|  | 198 | | must not corrupt a0 and d0. | 
|  | 199 | | | 
|  | 200 | | | 
|  | 201 | | Perform Round | 
|  | 202 | |	Input:		a0 points to input operand | 
|  | 203 | |			d0{31:29} has guard, round, sticky | 
|  | 204 | |			d1{01:00} has rounding mode | 
|  | 205 | |			d1{17:16} has rounding precision | 
|  | 206 | |	Output:		a0 points to rounded operand | 
|  | 207 | | | 
|  | 208 |  | 
|  | 209 | bsrl		round		|returns rounded denorm at (a0) | 
|  | 210 | | | 
|  | 211 | | Differentiate between store to memory vs. store to register | 
|  | 212 | | | 
|  | 213 | unf_store: | 
|  | 214 | bsrl		g_opcls		|returns opclass in d0{2:0} | 
|  | 215 | cmpib		#0x3,%d0 | 
|  | 216 | bnes		not_opc011 | 
|  | 217 | | | 
|  | 218 | | At this point, a store to memory is pending | 
|  | 219 | | | 
|  | 220 | opc011: | 
|  | 221 | bsrl		g_dfmtou | 
|  | 222 | tstb		%d0 | 
|  | 223 | beqs		ext_opc011	|If extended, do not subtract | 
|  | 224 | |				;If destination format is sgl/dbl, | 
|  | 225 | tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't | 
|  | 226 | |					;subtract | 
|  | 227 | bmis		ext_opc011 | 
|  | 228 | subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs. | 
|  | 229 | |				;normalized bias | 
|  | 230 | |				;          normalized   denormalized | 
|  | 231 | |				;single       $7f           $7e | 
|  | 232 | |				;double       $3ff          $3fe | 
|  | 233 | | | 
|  | 234 | ext_opc011: | 
|  | 235 | bsrl		store		|stores to memory | 
|  | 236 | bras		unf_done	|finish up | 
|  | 237 |  | 
|  | 238 | | | 
|  | 239 | | At this point, a store to a float register is pending | 
|  | 240 | | | 
|  | 241 | not_opc011: | 
|  | 242 | bsrl		store	|stores to float register | 
|  | 243 | |				;a0 is not corrupted on a store to a | 
|  | 244 | |				;float register. | 
|  | 245 | | | 
|  | 246 | | Set the condition codes according to result | 
|  | 247 | | | 
|  | 248 | tstl		LOCAL_HI(%a0)	|check upper mantissa | 
|  | 249 | bnes		ck_sgn | 
|  | 250 | tstl		LOCAL_LO(%a0)	|check lower mantissa | 
|  | 251 | bnes		ck_sgn | 
|  | 252 | bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero | 
|  | 253 | ck_sgn: | 
|  | 254 | btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit | 
|  | 255 | beqs		unf_done | 
|  | 256 | bsetb		#neg_bit,FPSR_CC(%a6) | 
|  | 257 |  | 
|  | 258 | | | 
|  | 259 | | Finish. | 
|  | 260 | | | 
|  | 261 | unf_done: | 
|  | 262 | btstb		#inex2_bit,FPSR_EXCEPT(%a6) | 
|  | 263 | beqs		no_aunfl | 
|  | 264 | bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6) | 
|  | 265 | no_aunfl: | 
|  | 266 | rts | 
|  | 267 |  | 
|  | 268 | |end |