| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  linux/arch/arm/vfp/vfpdouble.c | 
|  | 3 | * | 
|  | 4 | * This code is derived in part from John R. Housers softfloat library, which | 
|  | 5 | * carries the following notice: | 
|  | 6 | * | 
|  | 7 | * =========================================================================== | 
|  | 8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | 
|  | 9 | * Arithmetic Package, Release 2. | 
|  | 10 | * | 
|  | 11 | * Written by John R. Hauser.  This work was made possible in part by the | 
|  | 12 | * International Computer Science Institute, located at Suite 600, 1947 Center | 
|  | 13 | * Street, Berkeley, California 94704.  Funding was partially provided by the | 
|  | 14 | * National Science Foundation under grant MIP-9311980.  The original version | 
|  | 15 | * of this code was written as part of a project to build a fixed-point vector | 
|  | 16 | * processor in collaboration with the University of California at Berkeley, | 
|  | 17 | * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information | 
|  | 18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | 
|  | 19 | * arithmetic/softfloat.html'. | 
|  | 20 | * | 
|  | 21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort | 
|  | 22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | 
|  | 23 | * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO | 
|  | 24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | 
|  | 25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | 
|  | 26 | * | 
|  | 27 | * Derivative works are acceptable, even for commercial purposes, so long as | 
|  | 28 | * (1) they include prominent notice that the work is derivative, and (2) they | 
|  | 29 | * include prominent notice akin to these three paragraphs for those parts of | 
|  | 30 | * this code that are retained. | 
|  | 31 | * =========================================================================== | 
|  | 32 | */ | 
|  | 33 | #include <linux/kernel.h> | 
|  | 34 | #include <linux/bitops.h> | 
| Russell King | 438a761 | 2005-06-29 23:01:02 +0100 | [diff] [blame] | 35 |  | 
|  | 36 | #include <asm/div64.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 37 | #include <asm/ptrace.h> | 
|  | 38 | #include <asm/vfp.h> | 
|  | 39 |  | 
|  | 40 | #include "vfpinstr.h" | 
|  | 41 | #include "vfp.h" | 
|  | 42 |  | 
|  | 43 | static struct vfp_double vfp_double_default_qnan = { | 
|  | 44 | .exponent	= 2047, | 
|  | 45 | .sign		= 0, | 
|  | 46 | .significand	= VFP_DOUBLE_SIGNIFICAND_QNAN, | 
|  | 47 | }; | 
|  | 48 |  | 
|  | 49 | static void vfp_double_dump(const char *str, struct vfp_double *d) | 
|  | 50 | { | 
|  | 51 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | 
|  | 52 | str, d->sign != 0, d->exponent, d->significand); | 
|  | 53 | } | 
|  | 54 |  | 
|  | 55 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | 
|  | 56 | { | 
|  | 57 | int bits = 31 - fls(vd->significand >> 32); | 
|  | 58 | if (bits == 31) | 
|  | 59 | bits = 62 - fls(vd->significand); | 
|  | 60 |  | 
|  | 61 | vfp_double_dump("normalise_denormal: in", vd); | 
|  | 62 |  | 
|  | 63 | if (bits) { | 
|  | 64 | vd->exponent -= bits - 1; | 
|  | 65 | vd->significand <<= bits; | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | vfp_double_dump("normalise_denormal: out", vd); | 
|  | 69 | } | 
|  | 70 |  | 
|  | 71 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | 
|  | 72 | { | 
|  | 73 | u64 significand, incr; | 
|  | 74 | int exponent, shift, underflow; | 
|  | 75 | u32 rmode; | 
|  | 76 |  | 
|  | 77 | vfp_double_dump("pack: in", vd); | 
|  | 78 |  | 
|  | 79 | /* | 
|  | 80 | * Infinities and NaNs are a special case. | 
|  | 81 | */ | 
|  | 82 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | 
|  | 83 | goto pack; | 
|  | 84 |  | 
|  | 85 | /* | 
|  | 86 | * Special-case zero. | 
|  | 87 | */ | 
|  | 88 | if (vd->significand == 0) { | 
|  | 89 | vd->exponent = 0; | 
|  | 90 | goto pack; | 
|  | 91 | } | 
|  | 92 |  | 
|  | 93 | exponent = vd->exponent; | 
|  | 94 | significand = vd->significand; | 
|  | 95 |  | 
|  | 96 | shift = 32 - fls(significand >> 32); | 
|  | 97 | if (shift == 32) | 
|  | 98 | shift = 64 - fls(significand); | 
|  | 99 | if (shift) { | 
|  | 100 | exponent -= shift; | 
|  | 101 | significand <<= shift; | 
|  | 102 | } | 
|  | 103 |  | 
|  | 104 | #ifdef DEBUG | 
|  | 105 | vd->exponent = exponent; | 
|  | 106 | vd->significand = significand; | 
|  | 107 | vfp_double_dump("pack: normalised", vd); | 
|  | 108 | #endif | 
|  | 109 |  | 
|  | 110 | /* | 
|  | 111 | * Tiny number? | 
|  | 112 | */ | 
|  | 113 | underflow = exponent < 0; | 
|  | 114 | if (underflow) { | 
|  | 115 | significand = vfp_shiftright64jamming(significand, -exponent); | 
|  | 116 | exponent = 0; | 
|  | 117 | #ifdef DEBUG | 
|  | 118 | vd->exponent = exponent; | 
|  | 119 | vd->significand = significand; | 
|  | 120 | vfp_double_dump("pack: tiny number", vd); | 
|  | 121 | #endif | 
|  | 122 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | 
|  | 123 | underflow = 0; | 
|  | 124 | } | 
|  | 125 |  | 
|  | 126 | /* | 
|  | 127 | * Select rounding increment. | 
|  | 128 | */ | 
|  | 129 | incr = 0; | 
|  | 130 | rmode = fpscr & FPSCR_RMODE_MASK; | 
|  | 131 |  | 
|  | 132 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 133 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | 
|  | 134 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | 
|  | 135 | incr -= 1; | 
|  | 136 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 137 | incr = 0; | 
|  | 138 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | 
|  | 139 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | 
|  | 140 |  | 
|  | 141 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | 
|  | 142 |  | 
|  | 143 | /* | 
|  | 144 | * Is our rounding going to overflow? | 
|  | 145 | */ | 
|  | 146 | if ((significand + incr) < significand) { | 
|  | 147 | exponent += 1; | 
|  | 148 | significand = (significand >> 1) | (significand & 1); | 
|  | 149 | incr >>= 1; | 
|  | 150 | #ifdef DEBUG | 
|  | 151 | vd->exponent = exponent; | 
|  | 152 | vd->significand = significand; | 
|  | 153 | vfp_double_dump("pack: overflow", vd); | 
|  | 154 | #endif | 
|  | 155 | } | 
|  | 156 |  | 
|  | 157 | /* | 
|  | 158 | * If any of the low bits (which will be shifted out of the | 
|  | 159 | * number) are non-zero, the result is inexact. | 
|  | 160 | */ | 
|  | 161 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | 
|  | 162 | exceptions |= FPSCR_IXC; | 
|  | 163 |  | 
|  | 164 | /* | 
|  | 165 | * Do our rounding. | 
|  | 166 | */ | 
|  | 167 | significand += incr; | 
|  | 168 |  | 
|  | 169 | /* | 
|  | 170 | * Infinity? | 
|  | 171 | */ | 
|  | 172 | if (exponent >= 2046) { | 
|  | 173 | exceptions |= FPSCR_OFC | FPSCR_IXC; | 
|  | 174 | if (incr == 0) { | 
|  | 175 | vd->exponent = 2045; | 
|  | 176 | vd->significand = 0x7fffffffffffffffULL; | 
|  | 177 | } else { | 
|  | 178 | vd->exponent = 2047;		/* infinity */ | 
|  | 179 | vd->significand = 0; | 
|  | 180 | } | 
|  | 181 | } else { | 
|  | 182 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | 
|  | 183 | exponent = 0; | 
|  | 184 | if (exponent || significand > 0x8000000000000000ULL) | 
|  | 185 | underflow = 0; | 
|  | 186 | if (underflow) | 
|  | 187 | exceptions |= FPSCR_UFC; | 
|  | 188 | vd->exponent = exponent; | 
|  | 189 | vd->significand = significand >> 1; | 
|  | 190 | } | 
|  | 191 |  | 
|  | 192 | pack: | 
|  | 193 | vfp_double_dump("pack: final", vd); | 
|  | 194 | { | 
|  | 195 | s64 d = vfp_double_pack(vd); | 
|  | 196 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | 
|  | 197 | dd, d, exceptions); | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 198 | vfp_put_double(d, dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | } | 
| Russell King | 928bd1b | 2006-04-25 20:41:27 +0100 | [diff] [blame] | 200 | return exceptions; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 201 | } | 
|  | 202 |  | 
|  | 203 | /* | 
|  | 204 | * Propagate the NaN, setting exceptions if it is signalling. | 
|  | 205 | * 'n' is always a NaN.  'm' may be a number, NaN or infinity. | 
|  | 206 | */ | 
|  | 207 | static u32 | 
|  | 208 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | 
|  | 209 | struct vfp_double *vdm, u32 fpscr) | 
|  | 210 | { | 
|  | 211 | struct vfp_double *nan; | 
|  | 212 | int tn, tm = 0; | 
|  | 213 |  | 
|  | 214 | tn = vfp_double_type(vdn); | 
|  | 215 |  | 
|  | 216 | if (vdm) | 
|  | 217 | tm = vfp_double_type(vdm); | 
|  | 218 |  | 
|  | 219 | if (fpscr & FPSCR_DEFAULT_NAN) | 
|  | 220 | /* | 
|  | 221 | * Default NaN mode - always returns a quiet NaN | 
|  | 222 | */ | 
|  | 223 | nan = &vfp_double_default_qnan; | 
|  | 224 | else { | 
|  | 225 | /* | 
|  | 226 | * Contemporary mode - select the first signalling | 
|  | 227 | * NAN, or if neither are signalling, the first | 
|  | 228 | * quiet NAN. | 
|  | 229 | */ | 
|  | 230 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | 
|  | 231 | nan = vdn; | 
|  | 232 | else | 
|  | 233 | nan = vdm; | 
|  | 234 | /* | 
|  | 235 | * Make the NaN quiet. | 
|  | 236 | */ | 
|  | 237 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | 
|  | 238 | } | 
|  | 239 |  | 
|  | 240 | *vdd = *nan; | 
|  | 241 |  | 
|  | 242 | /* | 
|  | 243 | * If one was a signalling NAN, raise invalid operation. | 
|  | 244 | */ | 
|  | 245 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | 
|  | 246 | } | 
|  | 247 |  | 
|  | 248 | /* | 
|  | 249 | * Extended operations | 
|  | 250 | */ | 
|  | 251 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | 
|  | 252 | { | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 253 | vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 254 | return 0; | 
|  | 255 | } | 
|  | 256 |  | 
|  | 257 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | 
|  | 258 | { | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 259 | vfp_put_double(vfp_get_double(dm), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 260 | return 0; | 
|  | 261 | } | 
|  | 262 |  | 
|  | 263 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | 
|  | 264 | { | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 265 | vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 266 | return 0; | 
|  | 267 | } | 
|  | 268 |  | 
|  | 269 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | 
|  | 270 | { | 
|  | 271 | struct vfp_double vdm, vdd; | 
|  | 272 | int ret, tm; | 
|  | 273 |  | 
|  | 274 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 275 | tm = vfp_double_type(&vdm); | 
|  | 276 | if (tm & (VFP_NAN|VFP_INFINITY)) { | 
|  | 277 | struct vfp_double *vdp = &vdd; | 
|  | 278 |  | 
|  | 279 | if (tm & VFP_NAN) | 
|  | 280 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | 
|  | 281 | else if (vdm.sign == 0) { | 
|  | 282 | sqrt_copy: | 
|  | 283 | vdp = &vdm; | 
|  | 284 | ret = 0; | 
|  | 285 | } else { | 
|  | 286 | sqrt_invalid: | 
|  | 287 | vdp = &vfp_double_default_qnan; | 
|  | 288 | ret = FPSCR_IOC; | 
|  | 289 | } | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 290 | vfp_put_double(vfp_double_pack(vdp), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 291 | return ret; | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | /* | 
|  | 295 | * sqrt(+/- 0) == +/- 0 | 
|  | 296 | */ | 
|  | 297 | if (tm & VFP_ZERO) | 
|  | 298 | goto sqrt_copy; | 
|  | 299 |  | 
|  | 300 | /* | 
|  | 301 | * Normalise a denormalised number | 
|  | 302 | */ | 
|  | 303 | if (tm & VFP_DENORMAL) | 
|  | 304 | vfp_double_normalise_denormal(&vdm); | 
|  | 305 |  | 
|  | 306 | /* | 
|  | 307 | * sqrt(<0) = invalid | 
|  | 308 | */ | 
|  | 309 | if (vdm.sign) | 
|  | 310 | goto sqrt_invalid; | 
|  | 311 |  | 
|  | 312 | vfp_double_dump("sqrt", &vdm); | 
|  | 313 |  | 
|  | 314 | /* | 
|  | 315 | * Estimate the square root. | 
|  | 316 | */ | 
|  | 317 | vdd.sign = 0; | 
|  | 318 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | 
|  | 319 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | 
|  | 320 |  | 
|  | 321 | vfp_double_dump("sqrt estimate1", &vdd); | 
|  | 322 |  | 
|  | 323 | vdm.significand >>= 1 + (vdm.exponent & 1); | 
|  | 324 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | 
|  | 325 |  | 
|  | 326 | vfp_double_dump("sqrt estimate2", &vdd); | 
|  | 327 |  | 
|  | 328 | /* | 
|  | 329 | * And now adjust. | 
|  | 330 | */ | 
|  | 331 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | 
|  | 332 | if (vdd.significand < 2) { | 
|  | 333 | vdd.significand = ~0ULL; | 
|  | 334 | } else { | 
|  | 335 | u64 termh, terml, remh, reml; | 
|  | 336 | vdm.significand <<= 2; | 
|  | 337 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | 
|  | 338 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | 
|  | 339 | while ((s64)remh < 0) { | 
|  | 340 | vdd.significand -= 1; | 
|  | 341 | shift64left(&termh, &terml, vdd.significand); | 
|  | 342 | terml |= 1; | 
|  | 343 | add128(&remh, &reml, remh, reml, termh, terml); | 
|  | 344 | } | 
|  | 345 | vdd.significand |= (remh | reml) != 0; | 
|  | 346 | } | 
|  | 347 | } | 
|  | 348 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | 
|  | 349 |  | 
|  | 350 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | 
|  | 351 | } | 
|  | 352 |  | 
|  | 353 | /* | 
|  | 354 | * Equal	:= ZC | 
|  | 355 | * Less than	:= N | 
|  | 356 | * Greater than	:= C | 
|  | 357 | * Unordered	:= CV | 
|  | 358 | */ | 
|  | 359 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | 
|  | 360 | { | 
|  | 361 | s64 d, m; | 
|  | 362 | u32 ret = 0; | 
|  | 363 |  | 
|  | 364 | m = vfp_get_double(dm); | 
|  | 365 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | 
|  | 366 | ret |= FPSCR_C | FPSCR_V; | 
|  | 367 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 
|  | 368 | /* | 
|  | 369 | * Signalling NaN, or signalling on quiet NaN | 
|  | 370 | */ | 
|  | 371 | ret |= FPSCR_IOC; | 
|  | 372 | } | 
|  | 373 |  | 
|  | 374 | d = vfp_get_double(dd); | 
|  | 375 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | 
|  | 376 | ret |= FPSCR_C | FPSCR_V; | 
|  | 377 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 
|  | 378 | /* | 
|  | 379 | * Signalling NaN, or signalling on quiet NaN | 
|  | 380 | */ | 
|  | 381 | ret |= FPSCR_IOC; | 
|  | 382 | } | 
|  | 383 |  | 
|  | 384 | if (ret == 0) { | 
|  | 385 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | 
|  | 386 | /* | 
|  | 387 | * equal | 
|  | 388 | */ | 
|  | 389 | ret |= FPSCR_Z | FPSCR_C; | 
|  | 390 | } else if (vfp_double_packed_sign(d ^ m)) { | 
|  | 391 | /* | 
|  | 392 | * different signs | 
|  | 393 | */ | 
|  | 394 | if (vfp_double_packed_sign(d)) | 
|  | 395 | /* | 
|  | 396 | * d is negative, so d < m | 
|  | 397 | */ | 
|  | 398 | ret |= FPSCR_N; | 
|  | 399 | else | 
|  | 400 | /* | 
|  | 401 | * d is positive, so d > m | 
|  | 402 | */ | 
|  | 403 | ret |= FPSCR_C; | 
|  | 404 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | 
|  | 405 | /* | 
|  | 406 | * d < m | 
|  | 407 | */ | 
|  | 408 | ret |= FPSCR_N; | 
|  | 409 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | 
|  | 410 | /* | 
|  | 411 | * d > m | 
|  | 412 | */ | 
|  | 413 | ret |= FPSCR_C; | 
|  | 414 | } | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | return ret; | 
|  | 418 | } | 
|  | 419 |  | 
|  | 420 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | 
|  | 421 | { | 
|  | 422 | return vfp_compare(dd, 0, dm, fpscr); | 
|  | 423 | } | 
|  | 424 |  | 
|  | 425 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | 
|  | 426 | { | 
|  | 427 | return vfp_compare(dd, 1, dm, fpscr); | 
|  | 428 | } | 
|  | 429 |  | 
|  | 430 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | 
|  | 431 | { | 
|  | 432 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | 
|  | 433 | } | 
|  | 434 |  | 
|  | 435 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | 
|  | 436 | { | 
|  | 437 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | 
|  | 438 | } | 
|  | 439 |  | 
|  | 440 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | 
|  | 441 | { | 
|  | 442 | struct vfp_double vdm; | 
|  | 443 | struct vfp_single vsd; | 
|  | 444 | int tm; | 
|  | 445 | u32 exceptions = 0; | 
|  | 446 |  | 
|  | 447 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 448 |  | 
|  | 449 | tm = vfp_double_type(&vdm); | 
|  | 450 |  | 
|  | 451 | /* | 
|  | 452 | * If we have a signalling NaN, signal invalid operation. | 
|  | 453 | */ | 
|  | 454 | if (tm == VFP_SNAN) | 
|  | 455 | exceptions = FPSCR_IOC; | 
|  | 456 |  | 
|  | 457 | if (tm & VFP_DENORMAL) | 
|  | 458 | vfp_double_normalise_denormal(&vdm); | 
|  | 459 |  | 
|  | 460 | vsd.sign = vdm.sign; | 
|  | 461 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | 
|  | 462 |  | 
|  | 463 | /* | 
|  | 464 | * If we have an infinity or a NaN, the exponent must be 255 | 
|  | 465 | */ | 
|  | 466 | if (tm & (VFP_INFINITY|VFP_NAN)) { | 
|  | 467 | vsd.exponent = 255; | 
| Daniel Jacobowitz | b53a2b4 | 2006-08-27 12:42:14 +0100 | [diff] [blame] | 468 | if (tm == VFP_QNAN) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 469 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | 
|  | 470 | goto pack_nan; | 
|  | 471 | } else if (tm & VFP_ZERO) | 
|  | 472 | vsd.exponent = 0; | 
|  | 473 | else | 
|  | 474 | vsd.exponent = vdm.exponent - (1023 - 127); | 
|  | 475 |  | 
|  | 476 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | 
|  | 477 |  | 
|  | 478 | pack_nan: | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 479 | vfp_put_float(vfp_single_pack(&vsd), sd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 480 | return exceptions; | 
|  | 481 | } | 
|  | 482 |  | 
|  | 483 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | 
|  | 484 | { | 
|  | 485 | struct vfp_double vdm; | 
|  | 486 | u32 m = vfp_get_float(dm); | 
|  | 487 |  | 
|  | 488 | vdm.sign = 0; | 
|  | 489 | vdm.exponent = 1023 + 63 - 1; | 
|  | 490 | vdm.significand = (u64)m; | 
|  | 491 |  | 
|  | 492 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | 
|  | 493 | } | 
|  | 494 |  | 
|  | 495 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | 
|  | 496 | { | 
|  | 497 | struct vfp_double vdm; | 
|  | 498 | u32 m = vfp_get_float(dm); | 
|  | 499 |  | 
|  | 500 | vdm.sign = (m & 0x80000000) >> 16; | 
|  | 501 | vdm.exponent = 1023 + 63 - 1; | 
|  | 502 | vdm.significand = vdm.sign ? -m : m; | 
|  | 503 |  | 
|  | 504 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | 
|  | 505 | } | 
|  | 506 |  | 
|  | 507 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | 
|  | 508 | { | 
|  | 509 | struct vfp_double vdm; | 
|  | 510 | u32 d, exceptions = 0; | 
|  | 511 | int rmode = fpscr & FPSCR_RMODE_MASK; | 
|  | 512 | int tm; | 
|  | 513 |  | 
|  | 514 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 515 |  | 
|  | 516 | /* | 
|  | 517 | * Do we have a denormalised number? | 
|  | 518 | */ | 
|  | 519 | tm = vfp_double_type(&vdm); | 
|  | 520 | if (tm & VFP_DENORMAL) | 
|  | 521 | exceptions |= FPSCR_IDC; | 
|  | 522 |  | 
|  | 523 | if (tm & VFP_NAN) | 
|  | 524 | vdm.sign = 0; | 
|  | 525 |  | 
|  | 526 | if (vdm.exponent >= 1023 + 32) { | 
|  | 527 | d = vdm.sign ? 0 : 0xffffffff; | 
|  | 528 | exceptions = FPSCR_IOC; | 
|  | 529 | } else if (vdm.exponent >= 1023 - 1) { | 
|  | 530 | int shift = 1023 + 63 - vdm.exponent; | 
|  | 531 | u64 rem, incr = 0; | 
|  | 532 |  | 
|  | 533 | /* | 
|  | 534 | * 2^0 <= m < 2^32-2^8 | 
|  | 535 | */ | 
|  | 536 | d = (vdm.significand << 1) >> shift; | 
|  | 537 | rem = vdm.significand << (65 - shift); | 
|  | 538 |  | 
|  | 539 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 540 | incr = 0x8000000000000000ULL; | 
|  | 541 | if ((d & 1) == 0) | 
|  | 542 | incr -= 1; | 
|  | 543 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 544 | incr = 0; | 
|  | 545 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | 
|  | 546 | incr = ~0ULL; | 
|  | 547 | } | 
|  | 548 |  | 
|  | 549 | if ((rem + incr) < rem) { | 
|  | 550 | if (d < 0xffffffff) | 
|  | 551 | d += 1; | 
|  | 552 | else | 
|  | 553 | exceptions |= FPSCR_IOC; | 
|  | 554 | } | 
|  | 555 |  | 
|  | 556 | if (d && vdm.sign) { | 
|  | 557 | d = 0; | 
|  | 558 | exceptions |= FPSCR_IOC; | 
|  | 559 | } else if (rem) | 
|  | 560 | exceptions |= FPSCR_IXC; | 
|  | 561 | } else { | 
|  | 562 | d = 0; | 
|  | 563 | if (vdm.exponent | vdm.significand) { | 
|  | 564 | exceptions |= FPSCR_IXC; | 
|  | 565 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | 
|  | 566 | d = 1; | 
|  | 567 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | 
|  | 568 | d = 0; | 
|  | 569 | exceptions |= FPSCR_IOC; | 
|  | 570 | } | 
|  | 571 | } | 
|  | 572 | } | 
|  | 573 |  | 
|  | 574 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 
|  | 575 |  | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 576 | vfp_put_float(d, sd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 577 |  | 
|  | 578 | return exceptions; | 
|  | 579 | } | 
|  | 580 |  | 
|  | 581 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | 
|  | 582 | { | 
|  | 583 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | 
|  | 584 | } | 
|  | 585 |  | 
|  | 586 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | 
|  | 587 | { | 
|  | 588 | struct vfp_double vdm; | 
|  | 589 | u32 d, exceptions = 0; | 
|  | 590 | int rmode = fpscr & FPSCR_RMODE_MASK; | 
| Catalin Marinas | 1320a80 | 2006-04-10 21:32:39 +0100 | [diff] [blame] | 591 | int tm; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 592 |  | 
|  | 593 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 594 | vfp_double_dump("VDM", &vdm); | 
|  | 595 |  | 
|  | 596 | /* | 
|  | 597 | * Do we have denormalised number? | 
|  | 598 | */ | 
| Catalin Marinas | 1320a80 | 2006-04-10 21:32:39 +0100 | [diff] [blame] | 599 | tm = vfp_double_type(&vdm); | 
|  | 600 | if (tm & VFP_DENORMAL) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 601 | exceptions |= FPSCR_IDC; | 
|  | 602 |  | 
| Catalin Marinas | 1320a80 | 2006-04-10 21:32:39 +0100 | [diff] [blame] | 603 | if (tm & VFP_NAN) { | 
|  | 604 | d = 0; | 
|  | 605 | exceptions |= FPSCR_IOC; | 
|  | 606 | } else if (vdm.exponent >= 1023 + 32) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 607 | d = 0x7fffffff; | 
|  | 608 | if (vdm.sign) | 
|  | 609 | d = ~d; | 
|  | 610 | exceptions |= FPSCR_IOC; | 
|  | 611 | } else if (vdm.exponent >= 1023 - 1) { | 
|  | 612 | int shift = 1023 + 63 - vdm.exponent;	/* 58 */ | 
|  | 613 | u64 rem, incr = 0; | 
|  | 614 |  | 
|  | 615 | d = (vdm.significand << 1) >> shift; | 
|  | 616 | rem = vdm.significand << (65 - shift); | 
|  | 617 |  | 
|  | 618 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 619 | incr = 0x8000000000000000ULL; | 
|  | 620 | if ((d & 1) == 0) | 
|  | 621 | incr -= 1; | 
|  | 622 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 623 | incr = 0; | 
|  | 624 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | 
|  | 625 | incr = ~0ULL; | 
|  | 626 | } | 
|  | 627 |  | 
|  | 628 | if ((rem + incr) < rem && d < 0xffffffff) | 
|  | 629 | d += 1; | 
|  | 630 | if (d > 0x7fffffff + (vdm.sign != 0)) { | 
|  | 631 | d = 0x7fffffff + (vdm.sign != 0); | 
|  | 632 | exceptions |= FPSCR_IOC; | 
|  | 633 | } else if (rem) | 
|  | 634 | exceptions |= FPSCR_IXC; | 
|  | 635 |  | 
|  | 636 | if (vdm.sign) | 
|  | 637 | d = -d; | 
|  | 638 | } else { | 
|  | 639 | d = 0; | 
|  | 640 | if (vdm.exponent | vdm.significand) { | 
|  | 641 | exceptions |= FPSCR_IXC; | 
|  | 642 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | 
|  | 643 | d = 1; | 
|  | 644 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | 
|  | 645 | d = -1; | 
|  | 646 | } | 
|  | 647 | } | 
|  | 648 |  | 
|  | 649 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 
|  | 650 |  | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 651 | vfp_put_float((s32)d, sd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 652 |  | 
|  | 653 | return exceptions; | 
|  | 654 | } | 
|  | 655 |  | 
|  | 656 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | 
|  | 657 | { | 
|  | 658 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | 
|  | 659 | } | 
|  | 660 |  | 
|  | 661 |  | 
|  | 662 | static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { | 
|  | 663 | [FEXT_TO_IDX(FEXT_FCPY)]	= vfp_double_fcpy, | 
|  | 664 | [FEXT_TO_IDX(FEXT_FABS)]	= vfp_double_fabs, | 
|  | 665 | [FEXT_TO_IDX(FEXT_FNEG)]	= vfp_double_fneg, | 
|  | 666 | [FEXT_TO_IDX(FEXT_FSQRT)]	= vfp_double_fsqrt, | 
|  | 667 | [FEXT_TO_IDX(FEXT_FCMP)]	= vfp_double_fcmp, | 
|  | 668 | [FEXT_TO_IDX(FEXT_FCMPE)]	= vfp_double_fcmpe, | 
|  | 669 | [FEXT_TO_IDX(FEXT_FCMPZ)]	= vfp_double_fcmpz, | 
|  | 670 | [FEXT_TO_IDX(FEXT_FCMPEZ)]	= vfp_double_fcmpez, | 
|  | 671 | [FEXT_TO_IDX(FEXT_FCVT)]	= vfp_double_fcvts, | 
|  | 672 | [FEXT_TO_IDX(FEXT_FUITO)]	= vfp_double_fuito, | 
|  | 673 | [FEXT_TO_IDX(FEXT_FSITO)]	= vfp_double_fsito, | 
|  | 674 | [FEXT_TO_IDX(FEXT_FTOUI)]	= vfp_double_ftoui, | 
|  | 675 | [FEXT_TO_IDX(FEXT_FTOUIZ)]	= vfp_double_ftouiz, | 
|  | 676 | [FEXT_TO_IDX(FEXT_FTOSI)]	= vfp_double_ftosi, | 
|  | 677 | [FEXT_TO_IDX(FEXT_FTOSIZ)]	= vfp_double_ftosiz, | 
|  | 678 | }; | 
|  | 679 |  | 
|  | 680 |  | 
|  | 681 |  | 
|  | 682 |  | 
|  | 683 | static u32 | 
|  | 684 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | 
|  | 685 | struct vfp_double *vdm, u32 fpscr) | 
|  | 686 | { | 
|  | 687 | struct vfp_double *vdp; | 
|  | 688 | u32 exceptions = 0; | 
|  | 689 | int tn, tm; | 
|  | 690 |  | 
|  | 691 | tn = vfp_double_type(vdn); | 
|  | 692 | tm = vfp_double_type(vdm); | 
|  | 693 |  | 
|  | 694 | if (tn & tm & VFP_INFINITY) { | 
|  | 695 | /* | 
|  | 696 | * Two infinities.  Are they different signs? | 
|  | 697 | */ | 
|  | 698 | if (vdn->sign ^ vdm->sign) { | 
|  | 699 | /* | 
|  | 700 | * different signs -> invalid | 
|  | 701 | */ | 
|  | 702 | exceptions = FPSCR_IOC; | 
|  | 703 | vdp = &vfp_double_default_qnan; | 
|  | 704 | } else { | 
|  | 705 | /* | 
|  | 706 | * same signs -> valid | 
|  | 707 | */ | 
|  | 708 | vdp = vdn; | 
|  | 709 | } | 
|  | 710 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | 
|  | 711 | /* | 
|  | 712 | * One infinity and one number -> infinity | 
|  | 713 | */ | 
|  | 714 | vdp = vdn; | 
|  | 715 | } else { | 
|  | 716 | /* | 
|  | 717 | * 'n' is a NaN of some type | 
|  | 718 | */ | 
|  | 719 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | 
|  | 720 | } | 
|  | 721 | *vdd = *vdp; | 
|  | 722 | return exceptions; | 
|  | 723 | } | 
|  | 724 |  | 
|  | 725 | static u32 | 
|  | 726 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | 
|  | 727 | struct vfp_double *vdm, u32 fpscr) | 
|  | 728 | { | 
|  | 729 | u32 exp_diff; | 
|  | 730 | u64 m_sig; | 
|  | 731 |  | 
|  | 732 | if (vdn->significand & (1ULL << 63) || | 
|  | 733 | vdm->significand & (1ULL << 63)) { | 
|  | 734 | pr_info("VFP: bad FP values in %s\n", __func__); | 
|  | 735 | vfp_double_dump("VDN", vdn); | 
|  | 736 | vfp_double_dump("VDM", vdm); | 
|  | 737 | } | 
|  | 738 |  | 
|  | 739 | /* | 
|  | 740 | * Ensure that 'n' is the largest magnitude number.  Note that | 
|  | 741 | * if 'n' and 'm' have equal exponents, we do not swap them. | 
|  | 742 | * This ensures that NaN propagation works correctly. | 
|  | 743 | */ | 
|  | 744 | if (vdn->exponent < vdm->exponent) { | 
|  | 745 | struct vfp_double *t = vdn; | 
|  | 746 | vdn = vdm; | 
|  | 747 | vdm = t; | 
|  | 748 | } | 
|  | 749 |  | 
|  | 750 | /* | 
|  | 751 | * Is 'n' an infinity or a NaN?  Note that 'm' may be a number, | 
|  | 752 | * infinity or a NaN here. | 
|  | 753 | */ | 
|  | 754 | if (vdn->exponent == 2047) | 
|  | 755 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | 
|  | 756 |  | 
|  | 757 | /* | 
|  | 758 | * We have two proper numbers, where 'vdn' is the larger magnitude. | 
|  | 759 | * | 
|  | 760 | * Copy 'n' to 'd' before doing the arithmetic. | 
|  | 761 | */ | 
|  | 762 | *vdd = *vdn; | 
|  | 763 |  | 
|  | 764 | /* | 
|  | 765 | * Align 'm' with the result. | 
|  | 766 | */ | 
|  | 767 | exp_diff = vdn->exponent - vdm->exponent; | 
|  | 768 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | 
|  | 769 |  | 
|  | 770 | /* | 
|  | 771 | * If the signs are different, we are really subtracting. | 
|  | 772 | */ | 
|  | 773 | if (vdn->sign ^ vdm->sign) { | 
|  | 774 | m_sig = vdn->significand - m_sig; | 
|  | 775 | if ((s64)m_sig < 0) { | 
|  | 776 | vdd->sign = vfp_sign_negate(vdd->sign); | 
|  | 777 | m_sig = -m_sig; | 
| Catalin Marinas | 7b1fbf2 | 2005-08-03 19:53:25 +0100 | [diff] [blame] | 778 | } else if (m_sig == 0) { | 
|  | 779 | vdd->sign = (fpscr & FPSCR_RMODE_MASK) == | 
|  | 780 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 781 | } | 
|  | 782 | } else { | 
|  | 783 | m_sig += vdn->significand; | 
|  | 784 | } | 
|  | 785 | vdd->significand = m_sig; | 
|  | 786 |  | 
|  | 787 | return 0; | 
|  | 788 | } | 
|  | 789 |  | 
|  | 790 | static u32 | 
|  | 791 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | 
|  | 792 | struct vfp_double *vdm, u32 fpscr) | 
|  | 793 | { | 
|  | 794 | vfp_double_dump("VDN", vdn); | 
|  | 795 | vfp_double_dump("VDM", vdm); | 
|  | 796 |  | 
|  | 797 | /* | 
|  | 798 | * Ensure that 'n' is the largest magnitude number.  Note that | 
|  | 799 | * if 'n' and 'm' have equal exponents, we do not swap them. | 
|  | 800 | * This ensures that NaN propagation works correctly. | 
|  | 801 | */ | 
|  | 802 | if (vdn->exponent < vdm->exponent) { | 
|  | 803 | struct vfp_double *t = vdn; | 
|  | 804 | vdn = vdm; | 
|  | 805 | vdm = t; | 
|  | 806 | pr_debug("VFP: swapping M <-> N\n"); | 
|  | 807 | } | 
|  | 808 |  | 
|  | 809 | vdd->sign = vdn->sign ^ vdm->sign; | 
|  | 810 |  | 
|  | 811 | /* | 
|  | 812 | * If 'n' is an infinity or NaN, handle it.  'm' may be anything. | 
|  | 813 | */ | 
|  | 814 | if (vdn->exponent == 2047) { | 
|  | 815 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | 
|  | 816 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | 
|  | 817 | if ((vdm->exponent | vdm->significand) == 0) { | 
|  | 818 | *vdd = vfp_double_default_qnan; | 
|  | 819 | return FPSCR_IOC; | 
|  | 820 | } | 
|  | 821 | vdd->exponent = vdn->exponent; | 
|  | 822 | vdd->significand = 0; | 
|  | 823 | return 0; | 
|  | 824 | } | 
|  | 825 |  | 
|  | 826 | /* | 
|  | 827 | * If 'm' is zero, the result is always zero.  In this case, | 
|  | 828 | * 'n' may be zero or a number, but it doesn't matter which. | 
|  | 829 | */ | 
|  | 830 | if ((vdm->exponent | vdm->significand) == 0) { | 
|  | 831 | vdd->exponent = 0; | 
|  | 832 | vdd->significand = 0; | 
|  | 833 | return 0; | 
|  | 834 | } | 
|  | 835 |  | 
|  | 836 | /* | 
|  | 837 | * We add 2 to the destination exponent for the same reason | 
|  | 838 | * as the addition case - though this time we have +1 from | 
|  | 839 | * each input operand. | 
|  | 840 | */ | 
|  | 841 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | 
|  | 842 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | 
|  | 843 |  | 
|  | 844 | vfp_double_dump("VDD", vdd); | 
|  | 845 | return 0; | 
|  | 846 | } | 
|  | 847 |  | 
|  | 848 | #define NEG_MULTIPLY	(1 << 0) | 
|  | 849 | #define NEG_SUBTRACT	(1 << 1) | 
|  | 850 |  | 
|  | 851 | static u32 | 
|  | 852 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | 
|  | 853 | { | 
|  | 854 | struct vfp_double vdd, vdp, vdn, vdm; | 
|  | 855 | u32 exceptions; | 
|  | 856 |  | 
|  | 857 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 858 | if (vdn.exponent == 0 && vdn.significand) | 
|  | 859 | vfp_double_normalise_denormal(&vdn); | 
|  | 860 |  | 
|  | 861 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 862 | if (vdm.exponent == 0 && vdm.significand) | 
|  | 863 | vfp_double_normalise_denormal(&vdm); | 
|  | 864 |  | 
|  | 865 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | 
|  | 866 | if (negate & NEG_MULTIPLY) | 
|  | 867 | vdp.sign = vfp_sign_negate(vdp.sign); | 
|  | 868 |  | 
|  | 869 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | 
|  | 870 | if (negate & NEG_SUBTRACT) | 
|  | 871 | vdn.sign = vfp_sign_negate(vdn.sign); | 
|  | 872 |  | 
|  | 873 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | 
|  | 874 |  | 
|  | 875 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | 
|  | 876 | } | 
|  | 877 |  | 
|  | 878 | /* | 
|  | 879 | * Standard operations | 
|  | 880 | */ | 
|  | 881 |  | 
|  | 882 | /* | 
|  | 883 | * sd = sd + (sn * sm) | 
|  | 884 | */ | 
|  | 885 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | 
|  | 886 | { | 
|  | 887 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | 
|  | 888 | } | 
|  | 889 |  | 
|  | 890 | /* | 
|  | 891 | * sd = sd - (sn * sm) | 
|  | 892 | */ | 
|  | 893 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | 
|  | 894 | { | 
|  | 895 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | 
|  | 896 | } | 
|  | 897 |  | 
|  | 898 | /* | 
|  | 899 | * sd = -sd + (sn * sm) | 
|  | 900 | */ | 
|  | 901 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | 
|  | 902 | { | 
|  | 903 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | 
|  | 904 | } | 
|  | 905 |  | 
|  | 906 | /* | 
|  | 907 | * sd = -sd - (sn * sm) | 
|  | 908 | */ | 
|  | 909 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | 
|  | 910 | { | 
|  | 911 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | 
|  | 912 | } | 
|  | 913 |  | 
|  | 914 | /* | 
|  | 915 | * sd = sn * sm | 
|  | 916 | */ | 
|  | 917 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | 
|  | 918 | { | 
|  | 919 | struct vfp_double vdd, vdn, vdm; | 
|  | 920 | u32 exceptions; | 
|  | 921 |  | 
|  | 922 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 923 | if (vdn.exponent == 0 && vdn.significand) | 
|  | 924 | vfp_double_normalise_denormal(&vdn); | 
|  | 925 |  | 
|  | 926 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 927 | if (vdm.exponent == 0 && vdm.significand) | 
|  | 928 | vfp_double_normalise_denormal(&vdm); | 
|  | 929 |  | 
|  | 930 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | 
|  | 931 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | 
|  | 932 | } | 
|  | 933 |  | 
|  | 934 | /* | 
|  | 935 | * sd = -(sn * sm) | 
|  | 936 | */ | 
|  | 937 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | 
|  | 938 | { | 
|  | 939 | struct vfp_double vdd, vdn, vdm; | 
|  | 940 | u32 exceptions; | 
|  | 941 |  | 
|  | 942 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 943 | if (vdn.exponent == 0 && vdn.significand) | 
|  | 944 | vfp_double_normalise_denormal(&vdn); | 
|  | 945 |  | 
|  | 946 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 947 | if (vdm.exponent == 0 && vdm.significand) | 
|  | 948 | vfp_double_normalise_denormal(&vdm); | 
|  | 949 |  | 
|  | 950 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | 
|  | 951 | vdd.sign = vfp_sign_negate(vdd.sign); | 
|  | 952 |  | 
|  | 953 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | 
|  | 954 | } | 
|  | 955 |  | 
|  | 956 | /* | 
|  | 957 | * sd = sn + sm | 
|  | 958 | */ | 
|  | 959 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | 
|  | 960 | { | 
|  | 961 | struct vfp_double vdd, vdn, vdm; | 
|  | 962 | u32 exceptions; | 
|  | 963 |  | 
|  | 964 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 965 | if (vdn.exponent == 0 && vdn.significand) | 
|  | 966 | vfp_double_normalise_denormal(&vdn); | 
|  | 967 |  | 
|  | 968 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 969 | if (vdm.exponent == 0 && vdm.significand) | 
|  | 970 | vfp_double_normalise_denormal(&vdm); | 
|  | 971 |  | 
|  | 972 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | 
|  | 973 |  | 
|  | 974 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | 
|  | 975 | } | 
|  | 976 |  | 
|  | 977 | /* | 
|  | 978 | * sd = sn - sm | 
|  | 979 | */ | 
|  | 980 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | 
|  | 981 | { | 
|  | 982 | struct vfp_double vdd, vdn, vdm; | 
|  | 983 | u32 exceptions; | 
|  | 984 |  | 
|  | 985 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 986 | if (vdn.exponent == 0 && vdn.significand) | 
|  | 987 | vfp_double_normalise_denormal(&vdn); | 
|  | 988 |  | 
|  | 989 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 990 | if (vdm.exponent == 0 && vdm.significand) | 
|  | 991 | vfp_double_normalise_denormal(&vdm); | 
|  | 992 |  | 
|  | 993 | /* | 
|  | 994 | * Subtraction is like addition, but with a negated operand. | 
|  | 995 | */ | 
|  | 996 | vdm.sign = vfp_sign_negate(vdm.sign); | 
|  | 997 |  | 
|  | 998 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | 
|  | 999 |  | 
|  | 1000 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | 
|  | 1001 | } | 
|  | 1002 |  | 
|  | 1003 | /* | 
|  | 1004 | * sd = sn / sm | 
|  | 1005 | */ | 
|  | 1006 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | 
|  | 1007 | { | 
|  | 1008 | struct vfp_double vdd, vdn, vdm; | 
|  | 1009 | u32 exceptions = 0; | 
|  | 1010 | int tm, tn; | 
|  | 1011 |  | 
|  | 1012 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 
|  | 1013 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 
|  | 1014 |  | 
|  | 1015 | vdd.sign = vdn.sign ^ vdm.sign; | 
|  | 1016 |  | 
|  | 1017 | tn = vfp_double_type(&vdn); | 
|  | 1018 | tm = vfp_double_type(&vdm); | 
|  | 1019 |  | 
|  | 1020 | /* | 
|  | 1021 | * Is n a NAN? | 
|  | 1022 | */ | 
|  | 1023 | if (tn & VFP_NAN) | 
|  | 1024 | goto vdn_nan; | 
|  | 1025 |  | 
|  | 1026 | /* | 
|  | 1027 | * Is m a NAN? | 
|  | 1028 | */ | 
|  | 1029 | if (tm & VFP_NAN) | 
|  | 1030 | goto vdm_nan; | 
|  | 1031 |  | 
|  | 1032 | /* | 
|  | 1033 | * If n and m are infinity, the result is invalid | 
|  | 1034 | * If n and m are zero, the result is invalid | 
|  | 1035 | */ | 
|  | 1036 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | 
|  | 1037 | goto invalid; | 
|  | 1038 |  | 
|  | 1039 | /* | 
|  | 1040 | * If n is infinity, the result is infinity | 
|  | 1041 | */ | 
|  | 1042 | if (tn & VFP_INFINITY) | 
|  | 1043 | goto infinity; | 
|  | 1044 |  | 
|  | 1045 | /* | 
|  | 1046 | * If m is zero, raise div0 exceptions | 
|  | 1047 | */ | 
|  | 1048 | if (tm & VFP_ZERO) | 
|  | 1049 | goto divzero; | 
|  | 1050 |  | 
|  | 1051 | /* | 
|  | 1052 | * If m is infinity, or n is zero, the result is zero | 
|  | 1053 | */ | 
|  | 1054 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | 
|  | 1055 | goto zero; | 
|  | 1056 |  | 
|  | 1057 | if (tn & VFP_DENORMAL) | 
|  | 1058 | vfp_double_normalise_denormal(&vdn); | 
|  | 1059 | if (tm & VFP_DENORMAL) | 
|  | 1060 | vfp_double_normalise_denormal(&vdm); | 
|  | 1061 |  | 
|  | 1062 | /* | 
|  | 1063 | * Ok, we have two numbers, we can perform division. | 
|  | 1064 | */ | 
|  | 1065 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | 
|  | 1066 | vdm.significand <<= 1; | 
|  | 1067 | if (vdm.significand <= (2 * vdn.significand)) { | 
|  | 1068 | vdn.significand >>= 1; | 
|  | 1069 | vdd.exponent++; | 
|  | 1070 | } | 
|  | 1071 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | 
|  | 1072 | if ((vdd.significand & 0x1ff) <= 2) { | 
|  | 1073 | u64 termh, terml, remh, reml; | 
|  | 1074 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | 
|  | 1075 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | 
|  | 1076 | while ((s64)remh < 0) { | 
|  | 1077 | vdd.significand -= 1; | 
|  | 1078 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | 
|  | 1079 | } | 
|  | 1080 | vdd.significand |= (reml != 0); | 
|  | 1081 | } | 
|  | 1082 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | 
|  | 1083 |  | 
|  | 1084 | vdn_nan: | 
|  | 1085 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | 
|  | 1086 | pack: | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 1087 | vfp_put_double(vfp_double_pack(&vdd), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1088 | return exceptions; | 
|  | 1089 |  | 
|  | 1090 | vdm_nan: | 
|  | 1091 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | 
|  | 1092 | goto pack; | 
|  | 1093 |  | 
|  | 1094 | zero: | 
|  | 1095 | vdd.exponent = 0; | 
|  | 1096 | vdd.significand = 0; | 
|  | 1097 | goto pack; | 
|  | 1098 |  | 
|  | 1099 | divzero: | 
|  | 1100 | exceptions = FPSCR_DZC; | 
|  | 1101 | infinity: | 
|  | 1102 | vdd.exponent = 2047; | 
|  | 1103 | vdd.significand = 0; | 
|  | 1104 | goto pack; | 
|  | 1105 |  | 
|  | 1106 | invalid: | 
| Daniel Jacobowitz | 0355b3e | 2006-08-30 15:06:39 +0100 | [diff] [blame] | 1107 | vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1108 | return FPSCR_IOC; | 
|  | 1109 | } | 
|  | 1110 |  | 
|  | 1111 | static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { | 
|  | 1112 | [FOP_TO_IDX(FOP_FMAC)]	= vfp_double_fmac, | 
|  | 1113 | [FOP_TO_IDX(FOP_FNMAC)]	= vfp_double_fnmac, | 
|  | 1114 | [FOP_TO_IDX(FOP_FMSC)]	= vfp_double_fmsc, | 
|  | 1115 | [FOP_TO_IDX(FOP_FNMSC)]	= vfp_double_fnmsc, | 
|  | 1116 | [FOP_TO_IDX(FOP_FMUL)]	= vfp_double_fmul, | 
|  | 1117 | [FOP_TO_IDX(FOP_FNMUL)]	= vfp_double_fnmul, | 
|  | 1118 | [FOP_TO_IDX(FOP_FADD)]	= vfp_double_fadd, | 
|  | 1119 | [FOP_TO_IDX(FOP_FSUB)]	= vfp_double_fsub, | 
|  | 1120 | [FOP_TO_IDX(FOP_FDIV)]	= vfp_double_fdiv, | 
|  | 1121 | }; | 
|  | 1122 |  | 
|  | 1123 | #define FREG_BANK(x)	((x) & 0x0c) | 
|  | 1124 | #define FREG_IDX(x)	((x) & 3) | 
|  | 1125 |  | 
|  | 1126 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | 
|  | 1127 | { | 
|  | 1128 | u32 op = inst & FOP_MASK; | 
|  | 1129 | u32 exceptions = 0; | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1130 | unsigned int dest; | 
| Catalin Marinas | 1356c19 | 2006-04-10 21:32:46 +0100 | [diff] [blame] | 1131 | unsigned int dn = vfp_get_dn(inst); | 
|  | 1132 | unsigned int dm = vfp_get_dm(inst); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1133 | unsigned int vecitr, veclen, vecstride; | 
|  | 1134 | u32 (*fop)(int, int, s32, u32); | 
|  | 1135 |  | 
|  | 1136 | veclen = fpscr & FPSCR_LENGTH_MASK; | 
|  | 1137 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; | 
|  | 1138 |  | 
|  | 1139 | /* | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1140 | * fcvtds takes an sN register number as destination, not dN. | 
|  | 1141 | * It also always operates on scalars. | 
|  | 1142 | */ | 
|  | 1143 | if ((inst & FEXT_MASK) == FEXT_FCVT) { | 
|  | 1144 | veclen = 0; | 
|  | 1145 | dest = vfp_get_sd(inst); | 
|  | 1146 | } else | 
|  | 1147 | dest = vfp_get_dd(inst); | 
|  | 1148 |  | 
|  | 1149 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1150 | * If destination bank is zero, vector length is always '1'. | 
|  | 1151 | * ARM DDI0100F C5.1.3, C5.3.2. | 
|  | 1152 | */ | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1153 | if (FREG_BANK(dest) == 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1154 | veclen = 0; | 
|  | 1155 |  | 
|  | 1156 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | 
|  | 1157 | (veclen >> FPSCR_LENGTH_BIT) + 1); | 
|  | 1158 |  | 
| Catalin Marinas | 1356c19 | 2006-04-10 21:32:46 +0100 | [diff] [blame] | 1159 | fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1160 | if (!fop) | 
|  | 1161 | goto invalid; | 
|  | 1162 |  | 
|  | 1163 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | 
|  | 1164 | u32 except; | 
|  | 1165 |  | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1166 | if (op == FOP_EXT && (inst & FEXT_MASK) == FEXT_FCVT) | 
|  | 1167 | pr_debug("VFP: itr%d (s%u) = op[%u] (d%u)\n", | 
|  | 1168 | vecitr >> FPSCR_LENGTH_BIT, | 
|  | 1169 | dest, dn, dm); | 
|  | 1170 | else if (op == FOP_EXT) | 
| Catalin Marinas | 1356c19 | 2006-04-10 21:32:46 +0100 | [diff] [blame] | 1171 | pr_debug("VFP: itr%d (d%u) = op[%u] (d%u)\n", | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1172 | vecitr >> FPSCR_LENGTH_BIT, | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1173 | dest, dn, dm); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1174 | else | 
| Catalin Marinas | 1356c19 | 2006-04-10 21:32:46 +0100 | [diff] [blame] | 1175 | pr_debug("VFP: itr%d (d%u) = (d%u) op[%u] (d%u)\n", | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1176 | vecitr >> FPSCR_LENGTH_BIT, | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1177 | dest, dn, FOP_TO_IDX(op), dm); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1178 |  | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1179 | except = fop(dest, dn, dm, fpscr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1180 | pr_debug("VFP: itr%d: exceptions=%08x\n", | 
|  | 1181 | vecitr >> FPSCR_LENGTH_BIT, except); | 
|  | 1182 |  | 
|  | 1183 | exceptions |= except; | 
|  | 1184 |  | 
|  | 1185 | /* | 
|  | 1186 | * This ensures that comparisons only operate on scalars; | 
|  | 1187 | * comparisons always return with one FPSCR status bit set. | 
|  | 1188 | */ | 
|  | 1189 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 
|  | 1190 | break; | 
|  | 1191 |  | 
|  | 1192 | /* | 
|  | 1193 | * CHECK: It appears to be undefined whether we stop when | 
|  | 1194 | * we encounter an exception.  We continue. | 
|  | 1195 | */ | 
|  | 1196 |  | 
| Daniel Jacobowitz | c29ecac | 2006-08-27 12:42:10 +0100 | [diff] [blame] | 1197 | dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 6); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1198 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); | 
|  | 1199 | if (FREG_BANK(dm) != 0) | 
|  | 1200 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); | 
|  | 1201 | } | 
|  | 1202 | return exceptions; | 
|  | 1203 |  | 
|  | 1204 | invalid: | 
|  | 1205 | return ~0; | 
|  | 1206 | } |