| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  linux/arch/arm/vfp/vfpsingle.c | 
|  | 3 | * | 
|  | 4 | * This code is derived in part from John R. Housers softfloat library, which | 
|  | 5 | * carries the following notice: | 
|  | 6 | * | 
|  | 7 | * =========================================================================== | 
|  | 8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | 
|  | 9 | * Arithmetic Package, Release 2. | 
|  | 10 | * | 
|  | 11 | * Written by John R. Hauser.  This work was made possible in part by the | 
|  | 12 | * International Computer Science Institute, located at Suite 600, 1947 Center | 
|  | 13 | * Street, Berkeley, California 94704.  Funding was partially provided by the | 
|  | 14 | * National Science Foundation under grant MIP-9311980.  The original version | 
|  | 15 | * of this code was written as part of a project to build a fixed-point vector | 
|  | 16 | * processor in collaboration with the University of California at Berkeley, | 
|  | 17 | * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information | 
|  | 18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | 
|  | 19 | * arithmetic/softfloat.html'. | 
|  | 20 | * | 
|  | 21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort | 
|  | 22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | 
|  | 23 | * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO | 
|  | 24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | 
|  | 25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | 
|  | 26 | * | 
|  | 27 | * Derivative works are acceptable, even for commercial purposes, so long as | 
|  | 28 | * (1) they include prominent notice that the work is derivative, and (2) they | 
|  | 29 | * include prominent notice akin to these three paragraphs for those parts of | 
|  | 30 | * this code that are retained. | 
|  | 31 | * =========================================================================== | 
|  | 32 | */ | 
|  | 33 | #include <linux/kernel.h> | 
|  | 34 | #include <linux/bitops.h> | 
| Russell King | 438a761 | 2005-06-29 23:01:02 +0100 | [diff] [blame] | 35 |  | 
|  | 36 | #include <asm/div64.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 37 | #include <asm/ptrace.h> | 
|  | 38 | #include <asm/vfp.h> | 
|  | 39 |  | 
|  | 40 | #include "vfpinstr.h" | 
|  | 41 | #include "vfp.h" | 
|  | 42 |  | 
|  | 43 | static struct vfp_single vfp_single_default_qnan = { | 
|  | 44 | .exponent	= 255, | 
|  | 45 | .sign		= 0, | 
|  | 46 | .significand	= VFP_SINGLE_SIGNIFICAND_QNAN, | 
|  | 47 | }; | 
|  | 48 |  | 
|  | 49 | static void vfp_single_dump(const char *str, struct vfp_single *s) | 
|  | 50 | { | 
|  | 51 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", | 
|  | 52 | str, s->sign != 0, s->exponent, s->significand); | 
|  | 53 | } | 
|  | 54 |  | 
|  | 55 | static void vfp_single_normalise_denormal(struct vfp_single *vs) | 
|  | 56 | { | 
|  | 57 | int bits = 31 - fls(vs->significand); | 
|  | 58 |  | 
|  | 59 | vfp_single_dump("normalise_denormal: in", vs); | 
|  | 60 |  | 
|  | 61 | if (bits) { | 
|  | 62 | vs->exponent -= bits - 1; | 
|  | 63 | vs->significand <<= bits; | 
|  | 64 | } | 
|  | 65 |  | 
|  | 66 | vfp_single_dump("normalise_denormal: out", vs); | 
|  | 67 | } | 
|  | 68 |  | 
|  | 69 | #ifndef DEBUG | 
|  | 70 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | 
|  | 71 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) | 
|  | 72 | #else | 
|  | 73 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) | 
|  | 74 | #endif | 
|  | 75 | { | 
|  | 76 | u32 significand, incr, rmode; | 
|  | 77 | int exponent, shift, underflow; | 
|  | 78 |  | 
|  | 79 | vfp_single_dump("pack: in", vs); | 
|  | 80 |  | 
|  | 81 | /* | 
|  | 82 | * Infinities and NaNs are a special case. | 
|  | 83 | */ | 
|  | 84 | if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) | 
|  | 85 | goto pack; | 
|  | 86 |  | 
|  | 87 | /* | 
|  | 88 | * Special-case zero. | 
|  | 89 | */ | 
|  | 90 | if (vs->significand == 0) { | 
|  | 91 | vs->exponent = 0; | 
|  | 92 | goto pack; | 
|  | 93 | } | 
|  | 94 |  | 
|  | 95 | exponent = vs->exponent; | 
|  | 96 | significand = vs->significand; | 
|  | 97 |  | 
|  | 98 | /* | 
|  | 99 | * Normalise first.  Note that we shift the significand up to | 
|  | 100 | * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least | 
|  | 101 | * significant bit. | 
|  | 102 | */ | 
|  | 103 | shift = 32 - fls(significand); | 
|  | 104 | if (shift < 32 && shift) { | 
|  | 105 | exponent -= shift; | 
|  | 106 | significand <<= shift; | 
|  | 107 | } | 
|  | 108 |  | 
|  | 109 | #ifdef DEBUG | 
|  | 110 | vs->exponent = exponent; | 
|  | 111 | vs->significand = significand; | 
|  | 112 | vfp_single_dump("pack: normalised", vs); | 
|  | 113 | #endif | 
|  | 114 |  | 
|  | 115 | /* | 
|  | 116 | * Tiny number? | 
|  | 117 | */ | 
|  | 118 | underflow = exponent < 0; | 
|  | 119 | if (underflow) { | 
|  | 120 | significand = vfp_shiftright32jamming(significand, -exponent); | 
|  | 121 | exponent = 0; | 
|  | 122 | #ifdef DEBUG | 
|  | 123 | vs->exponent = exponent; | 
|  | 124 | vs->significand = significand; | 
|  | 125 | vfp_single_dump("pack: tiny number", vs); | 
|  | 126 | #endif | 
|  | 127 | if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) | 
|  | 128 | underflow = 0; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | /* | 
|  | 132 | * Select rounding increment. | 
|  | 133 | */ | 
|  | 134 | incr = 0; | 
|  | 135 | rmode = fpscr & FPSCR_RMODE_MASK; | 
|  | 136 |  | 
|  | 137 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 138 | incr = 1 << VFP_SINGLE_LOW_BITS; | 
|  | 139 | if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) | 
|  | 140 | incr -= 1; | 
|  | 141 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 142 | incr = 0; | 
|  | 143 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) | 
|  | 144 | incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; | 
|  | 145 |  | 
|  | 146 | pr_debug("VFP: rounding increment = 0x%08x\n", incr); | 
|  | 147 |  | 
|  | 148 | /* | 
|  | 149 | * Is our rounding going to overflow? | 
|  | 150 | */ | 
|  | 151 | if ((significand + incr) < significand) { | 
|  | 152 | exponent += 1; | 
|  | 153 | significand = (significand >> 1) | (significand & 1); | 
|  | 154 | incr >>= 1; | 
|  | 155 | #ifdef DEBUG | 
|  | 156 | vs->exponent = exponent; | 
|  | 157 | vs->significand = significand; | 
|  | 158 | vfp_single_dump("pack: overflow", vs); | 
|  | 159 | #endif | 
|  | 160 | } | 
|  | 161 |  | 
|  | 162 | /* | 
|  | 163 | * If any of the low bits (which will be shifted out of the | 
|  | 164 | * number) are non-zero, the result is inexact. | 
|  | 165 | */ | 
|  | 166 | if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) | 
|  | 167 | exceptions |= FPSCR_IXC; | 
|  | 168 |  | 
|  | 169 | /* | 
|  | 170 | * Do our rounding. | 
|  | 171 | */ | 
|  | 172 | significand += incr; | 
|  | 173 |  | 
|  | 174 | /* | 
|  | 175 | * Infinity? | 
|  | 176 | */ | 
|  | 177 | if (exponent >= 254) { | 
|  | 178 | exceptions |= FPSCR_OFC | FPSCR_IXC; | 
|  | 179 | if (incr == 0) { | 
|  | 180 | vs->exponent = 253; | 
|  | 181 | vs->significand = 0x7fffffff; | 
|  | 182 | } else { | 
|  | 183 | vs->exponent = 255;		/* infinity */ | 
|  | 184 | vs->significand = 0; | 
|  | 185 | } | 
|  | 186 | } else { | 
|  | 187 | if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) | 
|  | 188 | exponent = 0; | 
|  | 189 | if (exponent || significand > 0x80000000) | 
|  | 190 | underflow = 0; | 
|  | 191 | if (underflow) | 
|  | 192 | exceptions |= FPSCR_UFC; | 
|  | 193 | vs->exponent = exponent; | 
|  | 194 | vs->significand = significand >> 1; | 
|  | 195 | } | 
|  | 196 |  | 
|  | 197 | pack: | 
|  | 198 | vfp_single_dump("pack: final", vs); | 
|  | 199 | { | 
|  | 200 | s32 d = vfp_single_pack(vs); | 
|  | 201 | pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, | 
|  | 202 | sd, d, exceptions); | 
|  | 203 | vfp_put_float(sd, d); | 
|  | 204 | } | 
|  | 205 |  | 
|  | 206 | return exceptions & ~VFP_NAN_FLAG; | 
|  | 207 | } | 
|  | 208 |  | 
|  | 209 | /* | 
|  | 210 | * Propagate the NaN, setting exceptions if it is signalling. | 
|  | 211 | * 'n' is always a NaN.  'm' may be a number, NaN or infinity. | 
|  | 212 | */ | 
|  | 213 | static u32 | 
|  | 214 | vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, | 
|  | 215 | struct vfp_single *vsm, u32 fpscr) | 
|  | 216 | { | 
|  | 217 | struct vfp_single *nan; | 
|  | 218 | int tn, tm = 0; | 
|  | 219 |  | 
|  | 220 | tn = vfp_single_type(vsn); | 
|  | 221 |  | 
|  | 222 | if (vsm) | 
|  | 223 | tm = vfp_single_type(vsm); | 
|  | 224 |  | 
|  | 225 | if (fpscr & FPSCR_DEFAULT_NAN) | 
|  | 226 | /* | 
|  | 227 | * Default NaN mode - always returns a quiet NaN | 
|  | 228 | */ | 
|  | 229 | nan = &vfp_single_default_qnan; | 
|  | 230 | else { | 
|  | 231 | /* | 
|  | 232 | * Contemporary mode - select the first signalling | 
|  | 233 | * NAN, or if neither are signalling, the first | 
|  | 234 | * quiet NAN. | 
|  | 235 | */ | 
|  | 236 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | 
|  | 237 | nan = vsn; | 
|  | 238 | else | 
|  | 239 | nan = vsm; | 
|  | 240 | /* | 
|  | 241 | * Make the NaN quiet. | 
|  | 242 | */ | 
|  | 243 | nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | 
|  | 244 | } | 
|  | 245 |  | 
|  | 246 | *vsd = *nan; | 
|  | 247 |  | 
|  | 248 | /* | 
|  | 249 | * If one was a signalling NAN, raise invalid operation. | 
|  | 250 | */ | 
|  | 251 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | 
|  | 252 | } | 
|  | 253 |  | 
|  | 254 |  | 
|  | 255 | /* | 
|  | 256 | * Extended operations | 
|  | 257 | */ | 
|  | 258 | static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) | 
|  | 259 | { | 
|  | 260 | vfp_put_float(sd, vfp_single_packed_abs(m)); | 
|  | 261 | return 0; | 
|  | 262 | } | 
|  | 263 |  | 
|  | 264 | static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) | 
|  | 265 | { | 
|  | 266 | vfp_put_float(sd, m); | 
|  | 267 | return 0; | 
|  | 268 | } | 
|  | 269 |  | 
|  | 270 | static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) | 
|  | 271 | { | 
|  | 272 | vfp_put_float(sd, vfp_single_packed_negate(m)); | 
|  | 273 | return 0; | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | static const u16 sqrt_oddadjust[] = { | 
|  | 277 | 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, | 
|  | 278 | 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 | 
|  | 279 | }; | 
|  | 280 |  | 
|  | 281 | static const u16 sqrt_evenadjust[] = { | 
|  | 282 | 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, | 
|  | 283 | 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 | 
|  | 284 | }; | 
|  | 285 |  | 
|  | 286 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) | 
|  | 287 | { | 
|  | 288 | int index; | 
|  | 289 | u32 z, a; | 
|  | 290 |  | 
|  | 291 | if ((significand & 0xc0000000) != 0x40000000) { | 
|  | 292 | printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | a = significand << 1; | 
|  | 296 | index = (a >> 27) & 15; | 
|  | 297 | if (exponent & 1) { | 
|  | 298 | z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; | 
|  | 299 | z = ((a / z) << 14) + (z << 15); | 
|  | 300 | a >>= 1; | 
|  | 301 | } else { | 
|  | 302 | z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; | 
|  | 303 | z = a / z + z; | 
|  | 304 | z = (z >= 0x20000) ? 0xffff8000 : (z << 15); | 
|  | 305 | if (z <= a) | 
|  | 306 | return (s32)a >> 1; | 
|  | 307 | } | 
| Russell King | 438a761 | 2005-06-29 23:01:02 +0100 | [diff] [blame] | 308 | { | 
|  | 309 | u64 v = (u64)a << 31; | 
|  | 310 | do_div(v, z); | 
|  | 311 | return v + (z >> 1); | 
|  | 312 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 313 | } | 
|  | 314 |  | 
|  | 315 | static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) | 
|  | 316 | { | 
|  | 317 | struct vfp_single vsm, vsd; | 
|  | 318 | int ret, tm; | 
|  | 319 |  | 
|  | 320 | vfp_single_unpack(&vsm, m); | 
|  | 321 | tm = vfp_single_type(&vsm); | 
|  | 322 | if (tm & (VFP_NAN|VFP_INFINITY)) { | 
|  | 323 | struct vfp_single *vsp = &vsd; | 
|  | 324 |  | 
|  | 325 | if (tm & VFP_NAN) | 
|  | 326 | ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); | 
|  | 327 | else if (vsm.sign == 0) { | 
|  | 328 | sqrt_copy: | 
|  | 329 | vsp = &vsm; | 
|  | 330 | ret = 0; | 
|  | 331 | } else { | 
|  | 332 | sqrt_invalid: | 
|  | 333 | vsp = &vfp_single_default_qnan; | 
|  | 334 | ret = FPSCR_IOC; | 
|  | 335 | } | 
|  | 336 | vfp_put_float(sd, vfp_single_pack(vsp)); | 
|  | 337 | return ret; | 
|  | 338 | } | 
|  | 339 |  | 
|  | 340 | /* | 
|  | 341 | * sqrt(+/- 0) == +/- 0 | 
|  | 342 | */ | 
|  | 343 | if (tm & VFP_ZERO) | 
|  | 344 | goto sqrt_copy; | 
|  | 345 |  | 
|  | 346 | /* | 
|  | 347 | * Normalise a denormalised number | 
|  | 348 | */ | 
|  | 349 | if (tm & VFP_DENORMAL) | 
|  | 350 | vfp_single_normalise_denormal(&vsm); | 
|  | 351 |  | 
|  | 352 | /* | 
|  | 353 | * sqrt(<0) = invalid | 
|  | 354 | */ | 
|  | 355 | if (vsm.sign) | 
|  | 356 | goto sqrt_invalid; | 
|  | 357 |  | 
|  | 358 | vfp_single_dump("sqrt", &vsm); | 
|  | 359 |  | 
|  | 360 | /* | 
|  | 361 | * Estimate the square root. | 
|  | 362 | */ | 
|  | 363 | vsd.sign = 0; | 
|  | 364 | vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; | 
|  | 365 | vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; | 
|  | 366 |  | 
|  | 367 | vfp_single_dump("sqrt estimate", &vsd); | 
|  | 368 |  | 
|  | 369 | /* | 
|  | 370 | * And now adjust. | 
|  | 371 | */ | 
|  | 372 | if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { | 
|  | 373 | if (vsd.significand < 2) { | 
|  | 374 | vsd.significand = 0xffffffff; | 
|  | 375 | } else { | 
|  | 376 | u64 term; | 
|  | 377 | s64 rem; | 
|  | 378 | vsm.significand <<= !(vsm.exponent & 1); | 
|  | 379 | term = (u64)vsd.significand * vsd.significand; | 
|  | 380 | rem = ((u64)vsm.significand << 32) - term; | 
|  | 381 |  | 
|  | 382 | pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); | 
|  | 383 |  | 
|  | 384 | while (rem < 0) { | 
|  | 385 | vsd.significand -= 1; | 
|  | 386 | rem += ((u64)vsd.significand << 1) | 1; | 
|  | 387 | } | 
|  | 388 | vsd.significand |= rem != 0; | 
|  | 389 | } | 
|  | 390 | } | 
|  | 391 | vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); | 
|  | 392 |  | 
|  | 393 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); | 
|  | 394 | } | 
|  | 395 |  | 
|  | 396 | /* | 
|  | 397 | * Equal	:= ZC | 
|  | 398 | * Less than	:= N | 
|  | 399 | * Greater than	:= C | 
|  | 400 | * Unordered	:= CV | 
|  | 401 | */ | 
|  | 402 | static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) | 
|  | 403 | { | 
|  | 404 | s32 d; | 
|  | 405 | u32 ret = 0; | 
|  | 406 |  | 
|  | 407 | d = vfp_get_float(sd); | 
|  | 408 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { | 
|  | 409 | ret |= FPSCR_C | FPSCR_V; | 
|  | 410 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 
|  | 411 | /* | 
|  | 412 | * Signalling NaN, or signalling on quiet NaN | 
|  | 413 | */ | 
|  | 414 | ret |= FPSCR_IOC; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { | 
|  | 418 | ret |= FPSCR_C | FPSCR_V; | 
|  | 419 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 
|  | 420 | /* | 
|  | 421 | * Signalling NaN, or signalling on quiet NaN | 
|  | 422 | */ | 
|  | 423 | ret |= FPSCR_IOC; | 
|  | 424 | } | 
|  | 425 |  | 
|  | 426 | if (ret == 0) { | 
|  | 427 | if (d == m || vfp_single_packed_abs(d | m) == 0) { | 
|  | 428 | /* | 
|  | 429 | * equal | 
|  | 430 | */ | 
|  | 431 | ret |= FPSCR_Z | FPSCR_C; | 
|  | 432 | } else if (vfp_single_packed_sign(d ^ m)) { | 
|  | 433 | /* | 
|  | 434 | * different signs | 
|  | 435 | */ | 
|  | 436 | if (vfp_single_packed_sign(d)) | 
|  | 437 | /* | 
|  | 438 | * d is negative, so d < m | 
|  | 439 | */ | 
|  | 440 | ret |= FPSCR_N; | 
|  | 441 | else | 
|  | 442 | /* | 
|  | 443 | * d is positive, so d > m | 
|  | 444 | */ | 
|  | 445 | ret |= FPSCR_C; | 
|  | 446 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { | 
|  | 447 | /* | 
|  | 448 | * d < m | 
|  | 449 | */ | 
|  | 450 | ret |= FPSCR_N; | 
|  | 451 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { | 
|  | 452 | /* | 
|  | 453 | * d > m | 
|  | 454 | */ | 
|  | 455 | ret |= FPSCR_C; | 
|  | 456 | } | 
|  | 457 | } | 
|  | 458 | return ret; | 
|  | 459 | } | 
|  | 460 |  | 
|  | 461 | static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) | 
|  | 462 | { | 
|  | 463 | return vfp_compare(sd, 0, m, fpscr); | 
|  | 464 | } | 
|  | 465 |  | 
|  | 466 | static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) | 
|  | 467 | { | 
|  | 468 | return vfp_compare(sd, 1, m, fpscr); | 
|  | 469 | } | 
|  | 470 |  | 
|  | 471 | static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) | 
|  | 472 | { | 
|  | 473 | return vfp_compare(sd, 0, 0, fpscr); | 
|  | 474 | } | 
|  | 475 |  | 
|  | 476 | static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) | 
|  | 477 | { | 
|  | 478 | return vfp_compare(sd, 1, 0, fpscr); | 
|  | 479 | } | 
|  | 480 |  | 
|  | 481 | static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) | 
|  | 482 | { | 
|  | 483 | struct vfp_single vsm; | 
|  | 484 | struct vfp_double vdd; | 
|  | 485 | int tm; | 
|  | 486 | u32 exceptions = 0; | 
|  | 487 |  | 
|  | 488 | vfp_single_unpack(&vsm, m); | 
|  | 489 |  | 
|  | 490 | tm = vfp_single_type(&vsm); | 
|  | 491 |  | 
|  | 492 | /* | 
|  | 493 | * If we have a signalling NaN, signal invalid operation. | 
|  | 494 | */ | 
|  | 495 | if (tm == VFP_SNAN) | 
|  | 496 | exceptions = FPSCR_IOC; | 
|  | 497 |  | 
|  | 498 | if (tm & VFP_DENORMAL) | 
|  | 499 | vfp_single_normalise_denormal(&vsm); | 
|  | 500 |  | 
|  | 501 | vdd.sign = vsm.sign; | 
|  | 502 | vdd.significand = (u64)vsm.significand << 32; | 
|  | 503 |  | 
|  | 504 | /* | 
|  | 505 | * If we have an infinity or NaN, the exponent must be 2047. | 
|  | 506 | */ | 
|  | 507 | if (tm & (VFP_INFINITY|VFP_NAN)) { | 
|  | 508 | vdd.exponent = 2047; | 
|  | 509 | if (tm & VFP_NAN) | 
|  | 510 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | 
|  | 511 | goto pack_nan; | 
|  | 512 | } else if (tm & VFP_ZERO) | 
|  | 513 | vdd.exponent = 0; | 
|  | 514 | else | 
|  | 515 | vdd.exponent = vsm.exponent + (1023 - 127); | 
|  | 516 |  | 
|  | 517 | /* | 
|  | 518 | * Technically, if bit 0 of dd is set, this is an invalid | 
|  | 519 | * instruction.  However, we ignore this for efficiency. | 
|  | 520 | */ | 
|  | 521 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); | 
|  | 522 |  | 
|  | 523 | pack_nan: | 
|  | 524 | vfp_put_double(dd, vfp_double_pack(&vdd)); | 
|  | 525 | return exceptions; | 
|  | 526 | } | 
|  | 527 |  | 
|  | 528 | static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) | 
|  | 529 | { | 
|  | 530 | struct vfp_single vs; | 
|  | 531 |  | 
|  | 532 | vs.sign = 0; | 
|  | 533 | vs.exponent = 127 + 31 - 1; | 
|  | 534 | vs.significand = (u32)m; | 
|  | 535 |  | 
|  | 536 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); | 
|  | 537 | } | 
|  | 538 |  | 
|  | 539 | static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) | 
|  | 540 | { | 
|  | 541 | struct vfp_single vs; | 
|  | 542 |  | 
|  | 543 | vs.sign = (m & 0x80000000) >> 16; | 
|  | 544 | vs.exponent = 127 + 31 - 1; | 
|  | 545 | vs.significand = vs.sign ? -m : m; | 
|  | 546 |  | 
|  | 547 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); | 
|  | 548 | } | 
|  | 549 |  | 
|  | 550 | static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) | 
|  | 551 | { | 
|  | 552 | struct vfp_single vsm; | 
|  | 553 | u32 d, exceptions = 0; | 
|  | 554 | int rmode = fpscr & FPSCR_RMODE_MASK; | 
|  | 555 | int tm; | 
|  | 556 |  | 
|  | 557 | vfp_single_unpack(&vsm, m); | 
|  | 558 | vfp_single_dump("VSM", &vsm); | 
|  | 559 |  | 
|  | 560 | /* | 
|  | 561 | * Do we have a denormalised number? | 
|  | 562 | */ | 
|  | 563 | tm = vfp_single_type(&vsm); | 
|  | 564 | if (tm & VFP_DENORMAL) | 
|  | 565 | exceptions |= FPSCR_IDC; | 
|  | 566 |  | 
|  | 567 | if (tm & VFP_NAN) | 
|  | 568 | vsm.sign = 0; | 
|  | 569 |  | 
|  | 570 | if (vsm.exponent >= 127 + 32) { | 
|  | 571 | d = vsm.sign ? 0 : 0xffffffff; | 
|  | 572 | exceptions = FPSCR_IOC; | 
|  | 573 | } else if (vsm.exponent >= 127 - 1) { | 
|  | 574 | int shift = 127 + 31 - vsm.exponent; | 
|  | 575 | u32 rem, incr = 0; | 
|  | 576 |  | 
|  | 577 | /* | 
|  | 578 | * 2^0 <= m < 2^32-2^8 | 
|  | 579 | */ | 
|  | 580 | d = (vsm.significand << 1) >> shift; | 
|  | 581 | rem = vsm.significand << (33 - shift); | 
|  | 582 |  | 
|  | 583 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 584 | incr = 0x80000000; | 
|  | 585 | if ((d & 1) == 0) | 
|  | 586 | incr -= 1; | 
|  | 587 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 588 | incr = 0; | 
|  | 589 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | 
|  | 590 | incr = ~0; | 
|  | 591 | } | 
|  | 592 |  | 
|  | 593 | if ((rem + incr) < rem) { | 
|  | 594 | if (d < 0xffffffff) | 
|  | 595 | d += 1; | 
|  | 596 | else | 
|  | 597 | exceptions |= FPSCR_IOC; | 
|  | 598 | } | 
|  | 599 |  | 
|  | 600 | if (d && vsm.sign) { | 
|  | 601 | d = 0; | 
|  | 602 | exceptions |= FPSCR_IOC; | 
|  | 603 | } else if (rem) | 
|  | 604 | exceptions |= FPSCR_IXC; | 
|  | 605 | } else { | 
|  | 606 | d = 0; | 
|  | 607 | if (vsm.exponent | vsm.significand) { | 
|  | 608 | exceptions |= FPSCR_IXC; | 
|  | 609 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | 
|  | 610 | d = 1; | 
|  | 611 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { | 
|  | 612 | d = 0; | 
|  | 613 | exceptions |= FPSCR_IOC; | 
|  | 614 | } | 
|  | 615 | } | 
|  | 616 | } | 
|  | 617 |  | 
|  | 618 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 
|  | 619 |  | 
|  | 620 | vfp_put_float(sd, d); | 
|  | 621 |  | 
|  | 622 | return exceptions; | 
|  | 623 | } | 
|  | 624 |  | 
|  | 625 | static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) | 
|  | 626 | { | 
|  | 627 | return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); | 
|  | 628 | } | 
|  | 629 |  | 
|  | 630 | static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) | 
|  | 631 | { | 
|  | 632 | struct vfp_single vsm; | 
|  | 633 | u32 d, exceptions = 0; | 
|  | 634 | int rmode = fpscr & FPSCR_RMODE_MASK; | 
|  | 635 |  | 
|  | 636 | vfp_single_unpack(&vsm, m); | 
|  | 637 | vfp_single_dump("VSM", &vsm); | 
|  | 638 |  | 
|  | 639 | /* | 
|  | 640 | * Do we have a denormalised number? | 
|  | 641 | */ | 
|  | 642 | if (vfp_single_type(&vsm) & VFP_DENORMAL) | 
|  | 643 | exceptions |= FPSCR_IDC; | 
|  | 644 |  | 
|  | 645 | if (vsm.exponent >= 127 + 32) { | 
|  | 646 | /* | 
|  | 647 | * m >= 2^31-2^7: invalid | 
|  | 648 | */ | 
|  | 649 | d = 0x7fffffff; | 
|  | 650 | if (vsm.sign) | 
|  | 651 | d = ~d; | 
|  | 652 | exceptions |= FPSCR_IOC; | 
|  | 653 | } else if (vsm.exponent >= 127 - 1) { | 
|  | 654 | int shift = 127 + 31 - vsm.exponent; | 
|  | 655 | u32 rem, incr = 0; | 
|  | 656 |  | 
|  | 657 | /* 2^0 <= m <= 2^31-2^7 */ | 
|  | 658 | d = (vsm.significand << 1) >> shift; | 
|  | 659 | rem = vsm.significand << (33 - shift); | 
|  | 660 |  | 
|  | 661 | if (rmode == FPSCR_ROUND_NEAREST) { | 
|  | 662 | incr = 0x80000000; | 
|  | 663 | if ((d & 1) == 0) | 
|  | 664 | incr -= 1; | 
|  | 665 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 
|  | 666 | incr = 0; | 
|  | 667 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | 
|  | 668 | incr = ~0; | 
|  | 669 | } | 
|  | 670 |  | 
|  | 671 | if ((rem + incr) < rem && d < 0xffffffff) | 
|  | 672 | d += 1; | 
|  | 673 | if (d > 0x7fffffff + (vsm.sign != 0)) { | 
|  | 674 | d = 0x7fffffff + (vsm.sign != 0); | 
|  | 675 | exceptions |= FPSCR_IOC; | 
|  | 676 | } else if (rem) | 
|  | 677 | exceptions |= FPSCR_IXC; | 
|  | 678 |  | 
|  | 679 | if (vsm.sign) | 
|  | 680 | d = -d; | 
|  | 681 | } else { | 
|  | 682 | d = 0; | 
|  | 683 | if (vsm.exponent | vsm.significand) { | 
|  | 684 | exceptions |= FPSCR_IXC; | 
|  | 685 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | 
|  | 686 | d = 1; | 
|  | 687 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) | 
|  | 688 | d = -1; | 
|  | 689 | } | 
|  | 690 | } | 
|  | 691 |  | 
|  | 692 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 
|  | 693 |  | 
|  | 694 | vfp_put_float(sd, (s32)d); | 
|  | 695 |  | 
|  | 696 | return exceptions; | 
|  | 697 | } | 
|  | 698 |  | 
|  | 699 | static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) | 
|  | 700 | { | 
|  | 701 | return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); | 
|  | 702 | } | 
|  | 703 |  | 
|  | 704 | static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { | 
|  | 705 | [FEXT_TO_IDX(FEXT_FCPY)]	= vfp_single_fcpy, | 
|  | 706 | [FEXT_TO_IDX(FEXT_FABS)]	= vfp_single_fabs, | 
|  | 707 | [FEXT_TO_IDX(FEXT_FNEG)]	= vfp_single_fneg, | 
|  | 708 | [FEXT_TO_IDX(FEXT_FSQRT)]	= vfp_single_fsqrt, | 
|  | 709 | [FEXT_TO_IDX(FEXT_FCMP)]	= vfp_single_fcmp, | 
|  | 710 | [FEXT_TO_IDX(FEXT_FCMPE)]	= vfp_single_fcmpe, | 
|  | 711 | [FEXT_TO_IDX(FEXT_FCMPZ)]	= vfp_single_fcmpz, | 
|  | 712 | [FEXT_TO_IDX(FEXT_FCMPEZ)]	= vfp_single_fcmpez, | 
|  | 713 | [FEXT_TO_IDX(FEXT_FCVT)]	= vfp_single_fcvtd, | 
|  | 714 | [FEXT_TO_IDX(FEXT_FUITO)]	= vfp_single_fuito, | 
|  | 715 | [FEXT_TO_IDX(FEXT_FSITO)]	= vfp_single_fsito, | 
|  | 716 | [FEXT_TO_IDX(FEXT_FTOUI)]	= vfp_single_ftoui, | 
|  | 717 | [FEXT_TO_IDX(FEXT_FTOUIZ)]	= vfp_single_ftouiz, | 
|  | 718 | [FEXT_TO_IDX(FEXT_FTOSI)]	= vfp_single_ftosi, | 
|  | 719 | [FEXT_TO_IDX(FEXT_FTOSIZ)]	= vfp_single_ftosiz, | 
|  | 720 | }; | 
|  | 721 |  | 
|  | 722 |  | 
|  | 723 |  | 
|  | 724 |  | 
|  | 725 |  | 
|  | 726 | static u32 | 
|  | 727 | vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, | 
|  | 728 | struct vfp_single *vsm, u32 fpscr) | 
|  | 729 | { | 
|  | 730 | struct vfp_single *vsp; | 
|  | 731 | u32 exceptions = 0; | 
|  | 732 | int tn, tm; | 
|  | 733 |  | 
|  | 734 | tn = vfp_single_type(vsn); | 
|  | 735 | tm = vfp_single_type(vsm); | 
|  | 736 |  | 
|  | 737 | if (tn & tm & VFP_INFINITY) { | 
|  | 738 | /* | 
|  | 739 | * Two infinities.  Are they different signs? | 
|  | 740 | */ | 
|  | 741 | if (vsn->sign ^ vsm->sign) { | 
|  | 742 | /* | 
|  | 743 | * different signs -> invalid | 
|  | 744 | */ | 
|  | 745 | exceptions = FPSCR_IOC; | 
|  | 746 | vsp = &vfp_single_default_qnan; | 
|  | 747 | } else { | 
|  | 748 | /* | 
|  | 749 | * same signs -> valid | 
|  | 750 | */ | 
|  | 751 | vsp = vsn; | 
|  | 752 | } | 
|  | 753 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | 
|  | 754 | /* | 
|  | 755 | * One infinity and one number -> infinity | 
|  | 756 | */ | 
|  | 757 | vsp = vsn; | 
|  | 758 | } else { | 
|  | 759 | /* | 
|  | 760 | * 'n' is a NaN of some type | 
|  | 761 | */ | 
|  | 762 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | 
|  | 763 | } | 
|  | 764 | *vsd = *vsp; | 
|  | 765 | return exceptions; | 
|  | 766 | } | 
|  | 767 |  | 
|  | 768 | static u32 | 
|  | 769 | vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, | 
|  | 770 | struct vfp_single *vsm, u32 fpscr) | 
|  | 771 | { | 
|  | 772 | u32 exp_diff, m_sig; | 
|  | 773 |  | 
|  | 774 | if (vsn->significand & 0x80000000 || | 
|  | 775 | vsm->significand & 0x80000000) { | 
|  | 776 | pr_info("VFP: bad FP values in %s\n", __func__); | 
|  | 777 | vfp_single_dump("VSN", vsn); | 
|  | 778 | vfp_single_dump("VSM", vsm); | 
|  | 779 | } | 
|  | 780 |  | 
|  | 781 | /* | 
|  | 782 | * Ensure that 'n' is the largest magnitude number.  Note that | 
|  | 783 | * if 'n' and 'm' have equal exponents, we do not swap them. | 
|  | 784 | * This ensures that NaN propagation works correctly. | 
|  | 785 | */ | 
|  | 786 | if (vsn->exponent < vsm->exponent) { | 
|  | 787 | struct vfp_single *t = vsn; | 
|  | 788 | vsn = vsm; | 
|  | 789 | vsm = t; | 
|  | 790 | } | 
|  | 791 |  | 
|  | 792 | /* | 
|  | 793 | * Is 'n' an infinity or a NaN?  Note that 'm' may be a number, | 
|  | 794 | * infinity or a NaN here. | 
|  | 795 | */ | 
|  | 796 | if (vsn->exponent == 255) | 
|  | 797 | return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); | 
|  | 798 |  | 
|  | 799 | /* | 
|  | 800 | * We have two proper numbers, where 'vsn' is the larger magnitude. | 
|  | 801 | * | 
|  | 802 | * Copy 'n' to 'd' before doing the arithmetic. | 
|  | 803 | */ | 
|  | 804 | *vsd = *vsn; | 
|  | 805 |  | 
|  | 806 | /* | 
|  | 807 | * Align both numbers. | 
|  | 808 | */ | 
|  | 809 | exp_diff = vsn->exponent - vsm->exponent; | 
|  | 810 | m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); | 
|  | 811 |  | 
|  | 812 | /* | 
|  | 813 | * If the signs are different, we are really subtracting. | 
|  | 814 | */ | 
|  | 815 | if (vsn->sign ^ vsm->sign) { | 
|  | 816 | m_sig = vsn->significand - m_sig; | 
|  | 817 | if ((s32)m_sig < 0) { | 
|  | 818 | vsd->sign = vfp_sign_negate(vsd->sign); | 
|  | 819 | m_sig = -m_sig; | 
|  | 820 | } else if (m_sig == 0) { | 
|  | 821 | vsd->sign = (fpscr & FPSCR_RMODE_MASK) == | 
|  | 822 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | 
|  | 823 | } | 
|  | 824 | } else { | 
|  | 825 | m_sig = vsn->significand + m_sig; | 
|  | 826 | } | 
|  | 827 | vsd->significand = m_sig; | 
|  | 828 |  | 
|  | 829 | return 0; | 
|  | 830 | } | 
|  | 831 |  | 
|  | 832 | static u32 | 
|  | 833 | vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) | 
|  | 834 | { | 
|  | 835 | vfp_single_dump("VSN", vsn); | 
|  | 836 | vfp_single_dump("VSM", vsm); | 
|  | 837 |  | 
|  | 838 | /* | 
|  | 839 | * Ensure that 'n' is the largest magnitude number.  Note that | 
|  | 840 | * if 'n' and 'm' have equal exponents, we do not swap them. | 
|  | 841 | * This ensures that NaN propagation works correctly. | 
|  | 842 | */ | 
|  | 843 | if (vsn->exponent < vsm->exponent) { | 
|  | 844 | struct vfp_single *t = vsn; | 
|  | 845 | vsn = vsm; | 
|  | 846 | vsm = t; | 
|  | 847 | pr_debug("VFP: swapping M <-> N\n"); | 
|  | 848 | } | 
|  | 849 |  | 
|  | 850 | vsd->sign = vsn->sign ^ vsm->sign; | 
|  | 851 |  | 
|  | 852 | /* | 
|  | 853 | * If 'n' is an infinity or NaN, handle it.  'm' may be anything. | 
|  | 854 | */ | 
|  | 855 | if (vsn->exponent == 255) { | 
|  | 856 | if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) | 
|  | 857 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | 
|  | 858 | if ((vsm->exponent | vsm->significand) == 0) { | 
|  | 859 | *vsd = vfp_single_default_qnan; | 
|  | 860 | return FPSCR_IOC; | 
|  | 861 | } | 
|  | 862 | vsd->exponent = vsn->exponent; | 
|  | 863 | vsd->significand = 0; | 
|  | 864 | return 0; | 
|  | 865 | } | 
|  | 866 |  | 
|  | 867 | /* | 
|  | 868 | * If 'm' is zero, the result is always zero.  In this case, | 
|  | 869 | * 'n' may be zero or a number, but it doesn't matter which. | 
|  | 870 | */ | 
|  | 871 | if ((vsm->exponent | vsm->significand) == 0) { | 
|  | 872 | vsd->exponent = 0; | 
|  | 873 | vsd->significand = 0; | 
|  | 874 | return 0; | 
|  | 875 | } | 
|  | 876 |  | 
|  | 877 | /* | 
|  | 878 | * We add 2 to the destination exponent for the same reason as | 
|  | 879 | * the addition case - though this time we have +1 from each | 
|  | 880 | * input operand. | 
|  | 881 | */ | 
|  | 882 | vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; | 
|  | 883 | vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); | 
|  | 884 |  | 
|  | 885 | vfp_single_dump("VSD", vsd); | 
|  | 886 | return 0; | 
|  | 887 | } | 
|  | 888 |  | 
|  | 889 | #define NEG_MULTIPLY	(1 << 0) | 
|  | 890 | #define NEG_SUBTRACT	(1 << 1) | 
|  | 891 |  | 
|  | 892 | static u32 | 
|  | 893 | vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) | 
|  | 894 | { | 
|  | 895 | struct vfp_single vsd, vsp, vsn, vsm; | 
|  | 896 | u32 exceptions; | 
|  | 897 | s32 v; | 
|  | 898 |  | 
|  | 899 | v = vfp_get_float(sn); | 
|  | 900 | pr_debug("VFP: s%u = %08x\n", sn, v); | 
|  | 901 | vfp_single_unpack(&vsn, v); | 
|  | 902 | if (vsn.exponent == 0 && vsn.significand) | 
|  | 903 | vfp_single_normalise_denormal(&vsn); | 
|  | 904 |  | 
|  | 905 | vfp_single_unpack(&vsm, m); | 
|  | 906 | if (vsm.exponent == 0 && vsm.significand) | 
|  | 907 | vfp_single_normalise_denormal(&vsm); | 
|  | 908 |  | 
|  | 909 | exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); | 
|  | 910 | if (negate & NEG_MULTIPLY) | 
|  | 911 | vsp.sign = vfp_sign_negate(vsp.sign); | 
|  | 912 |  | 
|  | 913 | v = vfp_get_float(sd); | 
|  | 914 | pr_debug("VFP: s%u = %08x\n", sd, v); | 
|  | 915 | vfp_single_unpack(&vsn, v); | 
|  | 916 | if (negate & NEG_SUBTRACT) | 
|  | 917 | vsn.sign = vfp_sign_negate(vsn.sign); | 
|  | 918 |  | 
|  | 919 | exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); | 
|  | 920 |  | 
|  | 921 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); | 
|  | 922 | } | 
|  | 923 |  | 
|  | 924 | /* | 
|  | 925 | * Standard operations | 
|  | 926 | */ | 
|  | 927 |  | 
|  | 928 | /* | 
|  | 929 | * sd = sd + (sn * sm) | 
|  | 930 | */ | 
|  | 931 | static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) | 
|  | 932 | { | 
|  | 933 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); | 
|  | 934 | } | 
|  | 935 |  | 
|  | 936 | /* | 
|  | 937 | * sd = sd - (sn * sm) | 
|  | 938 | */ | 
|  | 939 | static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) | 
|  | 940 | { | 
|  | 941 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); | 
|  | 942 | } | 
|  | 943 |  | 
|  | 944 | /* | 
|  | 945 | * sd = -sd + (sn * sm) | 
|  | 946 | */ | 
|  | 947 | static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) | 
|  | 948 | { | 
|  | 949 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); | 
|  | 950 | } | 
|  | 951 |  | 
|  | 952 | /* | 
|  | 953 | * sd = -sd - (sn * sm) | 
|  | 954 | */ | 
|  | 955 | static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) | 
|  | 956 | { | 
|  | 957 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | 
|  | 958 | } | 
|  | 959 |  | 
|  | 960 | /* | 
|  | 961 | * sd = sn * sm | 
|  | 962 | */ | 
|  | 963 | static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) | 
|  | 964 | { | 
|  | 965 | struct vfp_single vsd, vsn, vsm; | 
|  | 966 | u32 exceptions; | 
|  | 967 | s32 n = vfp_get_float(sn); | 
|  | 968 |  | 
|  | 969 | pr_debug("VFP: s%u = %08x\n", sn, n); | 
|  | 970 |  | 
|  | 971 | vfp_single_unpack(&vsn, n); | 
|  | 972 | if (vsn.exponent == 0 && vsn.significand) | 
|  | 973 | vfp_single_normalise_denormal(&vsn); | 
|  | 974 |  | 
|  | 975 | vfp_single_unpack(&vsm, m); | 
|  | 976 | if (vsm.exponent == 0 && vsm.significand) | 
|  | 977 | vfp_single_normalise_denormal(&vsm); | 
|  | 978 |  | 
|  | 979 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | 
|  | 980 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); | 
|  | 981 | } | 
|  | 982 |  | 
|  | 983 | /* | 
|  | 984 | * sd = -(sn * sm) | 
|  | 985 | */ | 
|  | 986 | static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) | 
|  | 987 | { | 
|  | 988 | struct vfp_single vsd, vsn, vsm; | 
|  | 989 | u32 exceptions; | 
|  | 990 | s32 n = vfp_get_float(sn); | 
|  | 991 |  | 
|  | 992 | pr_debug("VFP: s%u = %08x\n", sn, n); | 
|  | 993 |  | 
|  | 994 | vfp_single_unpack(&vsn, n); | 
|  | 995 | if (vsn.exponent == 0 && vsn.significand) | 
|  | 996 | vfp_single_normalise_denormal(&vsn); | 
|  | 997 |  | 
|  | 998 | vfp_single_unpack(&vsm, m); | 
|  | 999 | if (vsm.exponent == 0 && vsm.significand) | 
|  | 1000 | vfp_single_normalise_denormal(&vsm); | 
|  | 1001 |  | 
|  | 1002 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | 
|  | 1003 | vsd.sign = vfp_sign_negate(vsd.sign); | 
|  | 1004 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); | 
|  | 1005 | } | 
|  | 1006 |  | 
|  | 1007 | /* | 
|  | 1008 | * sd = sn + sm | 
|  | 1009 | */ | 
|  | 1010 | static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) | 
|  | 1011 | { | 
|  | 1012 | struct vfp_single vsd, vsn, vsm; | 
|  | 1013 | u32 exceptions; | 
|  | 1014 | s32 n = vfp_get_float(sn); | 
|  | 1015 |  | 
|  | 1016 | pr_debug("VFP: s%u = %08x\n", sn, n); | 
|  | 1017 |  | 
|  | 1018 | /* | 
|  | 1019 | * Unpack and normalise denormals. | 
|  | 1020 | */ | 
|  | 1021 | vfp_single_unpack(&vsn, n); | 
|  | 1022 | if (vsn.exponent == 0 && vsn.significand) | 
|  | 1023 | vfp_single_normalise_denormal(&vsn); | 
|  | 1024 |  | 
|  | 1025 | vfp_single_unpack(&vsm, m); | 
|  | 1026 | if (vsm.exponent == 0 && vsm.significand) | 
|  | 1027 | vfp_single_normalise_denormal(&vsm); | 
|  | 1028 |  | 
|  | 1029 | exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); | 
|  | 1030 |  | 
|  | 1031 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); | 
|  | 1032 | } | 
|  | 1033 |  | 
|  | 1034 | /* | 
|  | 1035 | * sd = sn - sm | 
|  | 1036 | */ | 
|  | 1037 | static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) | 
|  | 1038 | { | 
|  | 1039 | /* | 
|  | 1040 | * Subtraction is addition with one sign inverted. | 
|  | 1041 | */ | 
|  | 1042 | return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); | 
|  | 1043 | } | 
|  | 1044 |  | 
|  | 1045 | /* | 
|  | 1046 | * sd = sn / sm | 
|  | 1047 | */ | 
|  | 1048 | static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) | 
|  | 1049 | { | 
|  | 1050 | struct vfp_single vsd, vsn, vsm; | 
|  | 1051 | u32 exceptions = 0; | 
|  | 1052 | s32 n = vfp_get_float(sn); | 
|  | 1053 | int tm, tn; | 
|  | 1054 |  | 
|  | 1055 | pr_debug("VFP: s%u = %08x\n", sn, n); | 
|  | 1056 |  | 
|  | 1057 | vfp_single_unpack(&vsn, n); | 
|  | 1058 | vfp_single_unpack(&vsm, m); | 
|  | 1059 |  | 
|  | 1060 | vsd.sign = vsn.sign ^ vsm.sign; | 
|  | 1061 |  | 
|  | 1062 | tn = vfp_single_type(&vsn); | 
|  | 1063 | tm = vfp_single_type(&vsm); | 
|  | 1064 |  | 
|  | 1065 | /* | 
|  | 1066 | * Is n a NAN? | 
|  | 1067 | */ | 
|  | 1068 | if (tn & VFP_NAN) | 
|  | 1069 | goto vsn_nan; | 
|  | 1070 |  | 
|  | 1071 | /* | 
|  | 1072 | * Is m a NAN? | 
|  | 1073 | */ | 
|  | 1074 | if (tm & VFP_NAN) | 
|  | 1075 | goto vsm_nan; | 
|  | 1076 |  | 
|  | 1077 | /* | 
|  | 1078 | * If n and m are infinity, the result is invalid | 
|  | 1079 | * If n and m are zero, the result is invalid | 
|  | 1080 | */ | 
|  | 1081 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | 
|  | 1082 | goto invalid; | 
|  | 1083 |  | 
|  | 1084 | /* | 
|  | 1085 | * If n is infinity, the result is infinity | 
|  | 1086 | */ | 
|  | 1087 | if (tn & VFP_INFINITY) | 
|  | 1088 | goto infinity; | 
|  | 1089 |  | 
|  | 1090 | /* | 
|  | 1091 | * If m is zero, raise div0 exception | 
|  | 1092 | */ | 
|  | 1093 | if (tm & VFP_ZERO) | 
|  | 1094 | goto divzero; | 
|  | 1095 |  | 
|  | 1096 | /* | 
|  | 1097 | * If m is infinity, or n is zero, the result is zero | 
|  | 1098 | */ | 
|  | 1099 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | 
|  | 1100 | goto zero; | 
|  | 1101 |  | 
|  | 1102 | if (tn & VFP_DENORMAL) | 
|  | 1103 | vfp_single_normalise_denormal(&vsn); | 
|  | 1104 | if (tm & VFP_DENORMAL) | 
|  | 1105 | vfp_single_normalise_denormal(&vsm); | 
|  | 1106 |  | 
|  | 1107 | /* | 
|  | 1108 | * Ok, we have two numbers, we can perform division. | 
|  | 1109 | */ | 
|  | 1110 | vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; | 
|  | 1111 | vsm.significand <<= 1; | 
|  | 1112 | if (vsm.significand <= (2 * vsn.significand)) { | 
|  | 1113 | vsn.significand >>= 1; | 
|  | 1114 | vsd.exponent++; | 
|  | 1115 | } | 
| Russell King | 438a761 | 2005-06-29 23:01:02 +0100 | [diff] [blame] | 1116 | { | 
|  | 1117 | u64 significand = (u64)vsn.significand << 32; | 
|  | 1118 | do_div(significand, vsm.significand); | 
|  | 1119 | vsd.significand = significand; | 
|  | 1120 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1121 | if ((vsd.significand & 0x3f) == 0) | 
|  | 1122 | vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); | 
|  | 1123 |  | 
|  | 1124 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); | 
|  | 1125 |  | 
|  | 1126 | vsn_nan: | 
|  | 1127 | exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); | 
|  | 1128 | pack: | 
|  | 1129 | vfp_put_float(sd, vfp_single_pack(&vsd)); | 
|  | 1130 | return exceptions; | 
|  | 1131 |  | 
|  | 1132 | vsm_nan: | 
|  | 1133 | exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); | 
|  | 1134 | goto pack; | 
|  | 1135 |  | 
|  | 1136 | zero: | 
|  | 1137 | vsd.exponent = 0; | 
|  | 1138 | vsd.significand = 0; | 
|  | 1139 | goto pack; | 
|  | 1140 |  | 
|  | 1141 | divzero: | 
|  | 1142 | exceptions = FPSCR_DZC; | 
|  | 1143 | infinity: | 
|  | 1144 | vsd.exponent = 255; | 
|  | 1145 | vsd.significand = 0; | 
|  | 1146 | goto pack; | 
|  | 1147 |  | 
|  | 1148 | invalid: | 
|  | 1149 | vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); | 
|  | 1150 | return FPSCR_IOC; | 
|  | 1151 | } | 
|  | 1152 |  | 
|  | 1153 | static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { | 
|  | 1154 | [FOP_TO_IDX(FOP_FMAC)]	= vfp_single_fmac, | 
|  | 1155 | [FOP_TO_IDX(FOP_FNMAC)]	= vfp_single_fnmac, | 
|  | 1156 | [FOP_TO_IDX(FOP_FMSC)]	= vfp_single_fmsc, | 
|  | 1157 | [FOP_TO_IDX(FOP_FNMSC)]	= vfp_single_fnmsc, | 
|  | 1158 | [FOP_TO_IDX(FOP_FMUL)]	= vfp_single_fmul, | 
|  | 1159 | [FOP_TO_IDX(FOP_FNMUL)]	= vfp_single_fnmul, | 
|  | 1160 | [FOP_TO_IDX(FOP_FADD)]	= vfp_single_fadd, | 
|  | 1161 | [FOP_TO_IDX(FOP_FSUB)]	= vfp_single_fsub, | 
|  | 1162 | [FOP_TO_IDX(FOP_FDIV)]	= vfp_single_fdiv, | 
|  | 1163 | }; | 
|  | 1164 |  | 
|  | 1165 | #define FREG_BANK(x)	((x) & 0x18) | 
|  | 1166 | #define FREG_IDX(x)	((x) & 7) | 
|  | 1167 |  | 
|  | 1168 | u32 vfp_single_cpdo(u32 inst, u32 fpscr) | 
|  | 1169 | { | 
|  | 1170 | u32 op = inst & FOP_MASK; | 
|  | 1171 | u32 exceptions = 0; | 
|  | 1172 | unsigned int sd = vfp_get_sd(inst); | 
|  | 1173 | unsigned int sn = vfp_get_sn(inst); | 
|  | 1174 | unsigned int sm = vfp_get_sm(inst); | 
|  | 1175 | unsigned int vecitr, veclen, vecstride; | 
|  | 1176 | u32 (*fop)(int, int, s32, u32); | 
|  | 1177 |  | 
|  | 1178 | veclen = fpscr & FPSCR_LENGTH_MASK; | 
|  | 1179 | vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); | 
|  | 1180 |  | 
|  | 1181 | /* | 
|  | 1182 | * If destination bank is zero, vector length is always '1'. | 
|  | 1183 | * ARM DDI0100F C5.1.3, C5.3.2. | 
|  | 1184 | */ | 
|  | 1185 | if (FREG_BANK(sd) == 0) | 
|  | 1186 | veclen = 0; | 
|  | 1187 |  | 
|  | 1188 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | 
|  | 1189 | (veclen >> FPSCR_LENGTH_BIT) + 1); | 
|  | 1190 |  | 
|  | 1191 | fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; | 
|  | 1192 | if (!fop) | 
|  | 1193 | goto invalid; | 
|  | 1194 |  | 
|  | 1195 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | 
|  | 1196 | s32 m = vfp_get_float(sm); | 
|  | 1197 | u32 except; | 
|  | 1198 |  | 
|  | 1199 | if (op == FOP_EXT) | 
|  | 1200 | pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", | 
|  | 1201 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); | 
|  | 1202 | else | 
|  | 1203 | pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", | 
|  | 1204 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, | 
|  | 1205 | FOP_TO_IDX(op), sm, m); | 
|  | 1206 |  | 
|  | 1207 | except = fop(sd, sn, m, fpscr); | 
|  | 1208 | pr_debug("VFP: itr%d: exceptions=%08x\n", | 
|  | 1209 | vecitr >> FPSCR_LENGTH_BIT, except); | 
|  | 1210 |  | 
|  | 1211 | exceptions |= except; | 
|  | 1212 |  | 
|  | 1213 | /* | 
|  | 1214 | * This ensures that comparisons only operate on scalars; | 
|  | 1215 | * comparisons always return with one FPSCR status bit set. | 
|  | 1216 | */ | 
|  | 1217 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 
|  | 1218 | break; | 
|  | 1219 |  | 
|  | 1220 | /* | 
|  | 1221 | * CHECK: It appears to be undefined whether we stop when | 
|  | 1222 | * we encounter an exception.  We continue. | 
|  | 1223 | */ | 
|  | 1224 |  | 
|  | 1225 | sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); | 
|  | 1226 | sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); | 
|  | 1227 | if (FREG_BANK(sm) != 0) | 
|  | 1228 | sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); | 
|  | 1229 | } | 
|  | 1230 | return exceptions; | 
|  | 1231 |  | 
|  | 1232 | invalid: | 
|  | 1233 | return (u32)-1; | 
|  | 1234 | } |