blob: f9f049132a17bffb920acb8df278f5e91b514f7a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2===============================================================================
3
4This C source file is part of the SoftFloat IEC/IEEE Floating-point
5Arithmetic Package, Release 2.
6
7Written by John R. Hauser. This work was made possible in part by the
8International Computer Science Institute, located at Suite 600, 1947 Center
9Street, Berkeley, California 94704. Funding was partially provided by the
10National Science Foundation under grant MIP-9311980. The original version
11of this code was written as part of a project to build a fixed-point vector
12processor in collaboration with the University of California at Berkeley,
13overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
15arithmetic/softfloat.html'.
16
17THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
18has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
19TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
20PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
22
23Derivative works are acceptable, even for commercial purposes, so long as
24(1) they include prominent notice that the work is derivative, and (2) they
25include prominent notice akin to these three paragraphs for those parts of
26this code that are retained.
27
28===============================================================================
29*/
30
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +010031#include <asm/div64.h>
32
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include "fpa11.h"
34//#include "milieu.h"
35//#include "softfloat.h"
36
37/*
38-------------------------------------------------------------------------------
Linus Torvalds1da177e2005-04-16 15:20:36 -070039Primitive arithmetic functions, including multi-word arithmetic, and
40division and square root approximations. (Can be specialized to target if
41desired.)
42-------------------------------------------------------------------------------
43*/
44#include "softfloat-macros"
45
46/*
47-------------------------------------------------------------------------------
48Functions and definitions to determine: (1) whether tininess for underflow
49is detected before or after rounding by default, (2) what (if anything)
50happens when exceptions are raised, (3) how signaling NaNs are distinguished
51from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52are propagated from function inputs to output. These details are target-
53specific.
54-------------------------------------------------------------------------------
55*/
56#include "softfloat-specialize"
57
58/*
59-------------------------------------------------------------------------------
60Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
61and 7, and returns the properly rounded 32-bit integer corresponding to the
62input. If `zSign' is nonzero, the input is negated before being converted
63to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
64input is simply rounded to an integer, with the inexact exception raised if
65the input cannot be represented exactly as an integer. If the fixed-point
66input is too large, however, the invalid exception is raised and the largest
67positive or negative integer is returned.
68-------------------------------------------------------------------------------
69*/
Richard Purdief148af22005-08-03 19:49:17 +010070static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
Linus Torvalds1da177e2005-04-16 15:20:36 -070071{
72 int8 roundingMode;
73 flag roundNearestEven;
74 int8 roundIncrement, roundBits;
75 int32 z;
76
Richard Purdief148af22005-08-03 19:49:17 +010077 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 roundNearestEven = ( roundingMode == float_round_nearest_even );
79 roundIncrement = 0x40;
80 if ( ! roundNearestEven ) {
81 if ( roundingMode == float_round_to_zero ) {
82 roundIncrement = 0;
83 }
84 else {
85 roundIncrement = 0x7F;
86 if ( zSign ) {
87 if ( roundingMode == float_round_up ) roundIncrement = 0;
88 }
89 else {
90 if ( roundingMode == float_round_down ) roundIncrement = 0;
91 }
92 }
93 }
94 roundBits = absZ & 0x7F;
95 absZ = ( absZ + roundIncrement )>>7;
96 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
97 z = absZ;
98 if ( zSign ) z = - z;
99 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
Richard Purdief148af22005-08-03 19:49:17 +0100100 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 return zSign ? 0x80000000 : 0x7FFFFFFF;
102 }
Richard Purdief148af22005-08-03 19:49:17 +0100103 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 return z;
105
106}
107
108/*
109-------------------------------------------------------------------------------
110Returns the fraction bits of the single-precision floating-point value `a'.
111-------------------------------------------------------------------------------
112*/
113INLINE bits32 extractFloat32Frac( float32 a )
114{
115
116 return a & 0x007FFFFF;
117
118}
119
120/*
121-------------------------------------------------------------------------------
122Returns the exponent bits of the single-precision floating-point value `a'.
123-------------------------------------------------------------------------------
124*/
125INLINE int16 extractFloat32Exp( float32 a )
126{
127
128 return ( a>>23 ) & 0xFF;
129
130}
131
132/*
133-------------------------------------------------------------------------------
134Returns the sign bit of the single-precision floating-point value `a'.
135-------------------------------------------------------------------------------
136*/
137#if 0 /* in softfloat.h */
138INLINE flag extractFloat32Sign( float32 a )
139{
140
141 return a>>31;
142
143}
144#endif
145
146/*
147-------------------------------------------------------------------------------
148Normalizes the subnormal single-precision floating-point value represented
149by the denormalized significand `aSig'. The normalized exponent and
150significand are stored at the locations pointed to by `zExpPtr' and
151`zSigPtr', respectively.
152-------------------------------------------------------------------------------
153*/
154static void
155 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
156{
157 int8 shiftCount;
158
159 shiftCount = countLeadingZeros32( aSig ) - 8;
160 *zSigPtr = aSig<<shiftCount;
161 *zExpPtr = 1 - shiftCount;
162
163}
164
165/*
166-------------------------------------------------------------------------------
167Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
168single-precision floating-point value, returning the result. After being
169shifted into the proper positions, the three fields are simply added
170together to form the result. This means that any integer portion of `zSig'
171will be added into the exponent. Since a properly normalized significand
172will have an integer portion equal to 1, the `zExp' input should be 1 less
173than the desired result exponent whenever `zSig' is a complete, normalized
174significand.
175-------------------------------------------------------------------------------
176*/
177INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
178{
179#if 0
180 float32 f;
181 __asm__("@ packFloat32 \n\
182 mov %0, %1, asl #31 \n\
183 orr %0, %2, asl #23 \n\
184 orr %0, %3"
185 : /* no outputs */
186 : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
187 : "cc");
188 return f;
189#else
190 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
191#endif
192}
193
194/*
195-------------------------------------------------------------------------------
196Takes an abstract floating-point value having sign `zSign', exponent `zExp',
197and significand `zSig', and returns the proper single-precision floating-
198point value corresponding to the abstract input. Ordinarily, the abstract
199value is simply rounded and packed into the single-precision format, with
200the inexact exception raised if the abstract input cannot be represented
201exactly. If the abstract value is too large, however, the overflow and
202inexact exceptions are raised and an infinity or maximal finite value is
203returned. If the abstract value is too small, the input value is rounded to
204a subnormal number, and the underflow and inexact exceptions are raised if
205the abstract input cannot be represented exactly as a subnormal single-
206precision floating-point number.
207 The input significand `zSig' has its binary point between bits 30
208and 29, which is 7 bits to the left of the usual location. This shifted
209significand must be normalized or smaller. If `zSig' is not normalized,
210`zExp' must be 0; in that case, the result returned is a subnormal number,
211and it must not require rounding. In the usual case that `zSig' is
212normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
213The handling of underflow and overflow follows the IEC/IEEE Standard for
214Binary Floating-point Arithmetic.
215-------------------------------------------------------------------------------
216*/
Richard Purdief148af22005-08-03 19:49:17 +0100217static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218{
219 int8 roundingMode;
220 flag roundNearestEven;
221 int8 roundIncrement, roundBits;
222 flag isTiny;
223
Richard Purdief148af22005-08-03 19:49:17 +0100224 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 roundNearestEven = ( roundingMode == float_round_nearest_even );
226 roundIncrement = 0x40;
227 if ( ! roundNearestEven ) {
228 if ( roundingMode == float_round_to_zero ) {
229 roundIncrement = 0;
230 }
231 else {
232 roundIncrement = 0x7F;
233 if ( zSign ) {
234 if ( roundingMode == float_round_up ) roundIncrement = 0;
235 }
236 else {
237 if ( roundingMode == float_round_down ) roundIncrement = 0;
238 }
239 }
240 }
241 roundBits = zSig & 0x7F;
242 if ( 0xFD <= (bits16) zExp ) {
243 if ( ( 0xFD < zExp )
244 || ( ( zExp == 0xFD )
245 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
246 ) {
Richard Purdief148af22005-08-03 19:49:17 +0100247 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
249 }
250 if ( zExp < 0 ) {
251 isTiny =
252 ( float_detect_tininess == float_tininess_before_rounding )
253 || ( zExp < -1 )
254 || ( zSig + roundIncrement < 0x80000000 );
255 shift32RightJamming( zSig, - zExp, &zSig );
256 zExp = 0;
257 roundBits = zSig & 0x7F;
Richard Purdief148af22005-08-03 19:49:17 +0100258 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 }
Richard Purdief148af22005-08-03 19:49:17 +0100261 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 zSig = ( zSig + roundIncrement )>>7;
263 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
264 if ( zSig == 0 ) zExp = 0;
265 return packFloat32( zSign, zExp, zSig );
266
267}
268
269/*
270-------------------------------------------------------------------------------
271Takes an abstract floating-point value having sign `zSign', exponent `zExp',
272and significand `zSig', and returns the proper single-precision floating-
273point value corresponding to the abstract input. This routine is just like
274`roundAndPackFloat32' except that `zSig' does not have to be normalized in
275any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
276point exponent.
277-------------------------------------------------------------------------------
278*/
279static float32
Richard Purdief148af22005-08-03 19:49:17 +0100280 normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281{
282 int8 shiftCount;
283
284 shiftCount = countLeadingZeros32( zSig ) - 1;
Richard Purdief148af22005-08-03 19:49:17 +0100285 return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
287}
288
289/*
290-------------------------------------------------------------------------------
291Returns the fraction bits of the double-precision floating-point value `a'.
292-------------------------------------------------------------------------------
293*/
294INLINE bits64 extractFloat64Frac( float64 a )
295{
296
297 return a & LIT64( 0x000FFFFFFFFFFFFF );
298
299}
300
301/*
302-------------------------------------------------------------------------------
303Returns the exponent bits of the double-precision floating-point value `a'.
304-------------------------------------------------------------------------------
305*/
306INLINE int16 extractFloat64Exp( float64 a )
307{
308
309 return ( a>>52 ) & 0x7FF;
310
311}
312
313/*
314-------------------------------------------------------------------------------
315Returns the sign bit of the double-precision floating-point value `a'.
316-------------------------------------------------------------------------------
317*/
318#if 0 /* in softfloat.h */
319INLINE flag extractFloat64Sign( float64 a )
320{
321
322 return a>>63;
323
324}
325#endif
326
327/*
328-------------------------------------------------------------------------------
329Normalizes the subnormal double-precision floating-point value represented
330by the denormalized significand `aSig'. The normalized exponent and
331significand are stored at the locations pointed to by `zExpPtr' and
332`zSigPtr', respectively.
333-------------------------------------------------------------------------------
334*/
335static void
336 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
337{
338 int8 shiftCount;
339
340 shiftCount = countLeadingZeros64( aSig ) - 11;
341 *zSigPtr = aSig<<shiftCount;
342 *zExpPtr = 1 - shiftCount;
343
344}
345
346/*
347-------------------------------------------------------------------------------
348Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
349double-precision floating-point value, returning the result. After being
350shifted into the proper positions, the three fields are simply added
351together to form the result. This means that any integer portion of `zSig'
352will be added into the exponent. Since a properly normalized significand
353will have an integer portion equal to 1, the `zExp' input should be 1 less
354than the desired result exponent whenever `zSig' is a complete, normalized
355significand.
356-------------------------------------------------------------------------------
357*/
358INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
359{
360
361 return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
362
363}
364
365/*
366-------------------------------------------------------------------------------
367Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368and significand `zSig', and returns the proper double-precision floating-
369point value corresponding to the abstract input. Ordinarily, the abstract
370value is simply rounded and packed into the double-precision format, with
371the inexact exception raised if the abstract input cannot be represented
372exactly. If the abstract value is too large, however, the overflow and
373inexact exceptions are raised and an infinity or maximal finite value is
374returned. If the abstract value is too small, the input value is rounded to
375a subnormal number, and the underflow and inexact exceptions are raised if
376the abstract input cannot be represented exactly as a subnormal double-
377precision floating-point number.
378 The input significand `zSig' has its binary point between bits 62
379and 61, which is 10 bits to the left of the usual location. This shifted
380significand must be normalized or smaller. If `zSig' is not normalized,
381`zExp' must be 0; in that case, the result returned is a subnormal number,
382and it must not require rounding. In the usual case that `zSig' is
383normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
384The handling of underflow and overflow follows the IEC/IEEE Standard for
385Binary Floating-point Arithmetic.
386-------------------------------------------------------------------------------
387*/
Richard Purdief148af22005-08-03 19:49:17 +0100388static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
390 int8 roundingMode;
391 flag roundNearestEven;
392 int16 roundIncrement, roundBits;
393 flag isTiny;
394
Richard Purdief148af22005-08-03 19:49:17 +0100395 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 roundNearestEven = ( roundingMode == float_round_nearest_even );
397 roundIncrement = 0x200;
398 if ( ! roundNearestEven ) {
399 if ( roundingMode == float_round_to_zero ) {
400 roundIncrement = 0;
401 }
402 else {
403 roundIncrement = 0x3FF;
404 if ( zSign ) {
405 if ( roundingMode == float_round_up ) roundIncrement = 0;
406 }
407 else {
408 if ( roundingMode == float_round_down ) roundIncrement = 0;
409 }
410 }
411 }
412 roundBits = zSig & 0x3FF;
413 if ( 0x7FD <= (bits16) zExp ) {
414 if ( ( 0x7FD < zExp )
415 || ( ( zExp == 0x7FD )
416 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
417 ) {
418 //register int lr = __builtin_return_address(0);
419 //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
Richard Purdief148af22005-08-03 19:49:17 +0100420 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
422 }
423 if ( zExp < 0 ) {
424 isTiny =
425 ( float_detect_tininess == float_tininess_before_rounding )
426 || ( zExp < -1 )
427 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
428 shift64RightJamming( zSig, - zExp, &zSig );
429 zExp = 0;
430 roundBits = zSig & 0x3FF;
Richard Purdief148af22005-08-03 19:49:17 +0100431 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 }
433 }
Richard Purdief148af22005-08-03 19:49:17 +0100434 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 zSig = ( zSig + roundIncrement )>>10;
436 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
437 if ( zSig == 0 ) zExp = 0;
438 return packFloat64( zSign, zExp, zSig );
439
440}
441
442/*
443-------------------------------------------------------------------------------
444Takes an abstract floating-point value having sign `zSign', exponent `zExp',
445and significand `zSig', and returns the proper double-precision floating-
446point value corresponding to the abstract input. This routine is just like
447`roundAndPackFloat64' except that `zSig' does not have to be normalized in
448any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
449point exponent.
450-------------------------------------------------------------------------------
451*/
452static float64
Richard Purdief148af22005-08-03 19:49:17 +0100453 normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454{
455 int8 shiftCount;
456
457 shiftCount = countLeadingZeros64( zSig ) - 1;
Richard Purdief148af22005-08-03 19:49:17 +0100458 return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
460}
461
462#ifdef FLOATX80
463
464/*
465-------------------------------------------------------------------------------
466Returns the fraction bits of the extended double-precision floating-point
467value `a'.
468-------------------------------------------------------------------------------
469*/
470INLINE bits64 extractFloatx80Frac( floatx80 a )
471{
472
473 return a.low;
474
475}
476
477/*
478-------------------------------------------------------------------------------
479Returns the exponent bits of the extended double-precision floating-point
480value `a'.
481-------------------------------------------------------------------------------
482*/
483INLINE int32 extractFloatx80Exp( floatx80 a )
484{
485
486 return a.high & 0x7FFF;
487
488}
489
490/*
491-------------------------------------------------------------------------------
492Returns the sign bit of the extended double-precision floating-point value
493`a'.
494-------------------------------------------------------------------------------
495*/
496INLINE flag extractFloatx80Sign( floatx80 a )
497{
498
499 return a.high>>15;
500
501}
502
503/*
504-------------------------------------------------------------------------------
505Normalizes the subnormal extended double-precision floating-point value
506represented by the denormalized significand `aSig'. The normalized exponent
507and significand are stored at the locations pointed to by `zExpPtr' and
508`zSigPtr', respectively.
509-------------------------------------------------------------------------------
510*/
511static void
512 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
513{
514 int8 shiftCount;
515
516 shiftCount = countLeadingZeros64( aSig );
517 *zSigPtr = aSig<<shiftCount;
518 *zExpPtr = 1 - shiftCount;
519
520}
521
522/*
523-------------------------------------------------------------------------------
524Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
525extended double-precision floating-point value, returning the result.
526-------------------------------------------------------------------------------
527*/
528INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
529{
530 floatx80 z;
531
532 z.low = zSig;
533 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
534 return z;
535
536}
537
538/*
539-------------------------------------------------------------------------------
540Takes an abstract floating-point value having sign `zSign', exponent `zExp',
541and extended significand formed by the concatenation of `zSig0' and `zSig1',
542and returns the proper extended double-precision floating-point value
543corresponding to the abstract input. Ordinarily, the abstract value is
544rounded and packed into the extended double-precision format, with the
545inexact exception raised if the abstract input cannot be represented
546exactly. If the abstract value is too large, however, the overflow and
547inexact exceptions are raised and an infinity or maximal finite value is
548returned. If the abstract value is too small, the input value is rounded to
549a subnormal number, and the underflow and inexact exceptions are raised if
550the abstract input cannot be represented exactly as a subnormal extended
551double-precision floating-point number.
552 If `roundingPrecision' is 32 or 64, the result is rounded to the same
553number of bits as single or double precision, respectively. Otherwise, the
554result is rounded to the full precision of the extended double-precision
555format.
556 The input significand must be normalized or smaller. If the input
557significand is not normalized, `zExp' must be 0; in that case, the result
558returned is a subnormal number, and it must not require rounding. The
559handling of underflow and overflow follows the IEC/IEEE Standard for Binary
560Floating-point Arithmetic.
561-------------------------------------------------------------------------------
562*/
563static floatx80
564 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +0100565 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 )
567{
Richard Purdief148af22005-08-03 19:49:17 +0100568 int8 roundingMode, roundingPrecision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 flag roundNearestEven, increment, isTiny;
570 int64 roundIncrement, roundMask, roundBits;
571
Richard Purdief148af22005-08-03 19:49:17 +0100572 roundingMode = roundData->mode;
573 roundingPrecision = roundData->precision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 roundNearestEven = ( roundingMode == float_round_nearest_even );
575 if ( roundingPrecision == 80 ) goto precision80;
576 if ( roundingPrecision == 64 ) {
577 roundIncrement = LIT64( 0x0000000000000400 );
578 roundMask = LIT64( 0x00000000000007FF );
579 }
580 else if ( roundingPrecision == 32 ) {
581 roundIncrement = LIT64( 0x0000008000000000 );
582 roundMask = LIT64( 0x000000FFFFFFFFFF );
583 }
584 else {
585 goto precision80;
586 }
587 zSig0 |= ( zSig1 != 0 );
588 if ( ! roundNearestEven ) {
589 if ( roundingMode == float_round_to_zero ) {
590 roundIncrement = 0;
591 }
592 else {
593 roundIncrement = roundMask;
594 if ( zSign ) {
595 if ( roundingMode == float_round_up ) roundIncrement = 0;
596 }
597 else {
598 if ( roundingMode == float_round_down ) roundIncrement = 0;
599 }
600 }
601 }
602 roundBits = zSig0 & roundMask;
603 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
604 if ( ( 0x7FFE < zExp )
605 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
606 ) {
607 goto overflow;
608 }
609 if ( zExp <= 0 ) {
610 isTiny =
611 ( float_detect_tininess == float_tininess_before_rounding )
612 || ( zExp < 0 )
613 || ( zSig0 <= zSig0 + roundIncrement );
614 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
615 zExp = 0;
616 roundBits = zSig0 & roundMask;
Richard Purdief148af22005-08-03 19:49:17 +0100617 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
618 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 zSig0 += roundIncrement;
620 if ( (sbits64) zSig0 < 0 ) zExp = 1;
621 roundIncrement = roundMask + 1;
622 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
623 roundMask |= roundIncrement;
624 }
625 zSig0 &= ~ roundMask;
626 return packFloatx80( zSign, zExp, zSig0 );
627 }
628 }
Richard Purdief148af22005-08-03 19:49:17 +0100629 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 zSig0 += roundIncrement;
631 if ( zSig0 < roundIncrement ) {
632 ++zExp;
633 zSig0 = LIT64( 0x8000000000000000 );
634 }
635 roundIncrement = roundMask + 1;
636 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
637 roundMask |= roundIncrement;
638 }
639 zSig0 &= ~ roundMask;
640 if ( zSig0 == 0 ) zExp = 0;
641 return packFloatx80( zSign, zExp, zSig0 );
642 precision80:
643 increment = ( (sbits64) zSig1 < 0 );
644 if ( ! roundNearestEven ) {
645 if ( roundingMode == float_round_to_zero ) {
646 increment = 0;
647 }
648 else {
649 if ( zSign ) {
650 increment = ( roundingMode == float_round_down ) && zSig1;
651 }
652 else {
653 increment = ( roundingMode == float_round_up ) && zSig1;
654 }
655 }
656 }
657 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
658 if ( ( 0x7FFE < zExp )
659 || ( ( zExp == 0x7FFE )
660 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
661 && increment
662 )
663 ) {
664 roundMask = 0;
665 overflow:
Richard Purdief148af22005-08-03 19:49:17 +0100666 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 if ( ( roundingMode == float_round_to_zero )
668 || ( zSign && ( roundingMode == float_round_up ) )
669 || ( ! zSign && ( roundingMode == float_round_down ) )
670 ) {
671 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
672 }
673 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
674 }
675 if ( zExp <= 0 ) {
676 isTiny =
677 ( float_detect_tininess == float_tininess_before_rounding )
678 || ( zExp < 0 )
679 || ! increment
680 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
681 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
682 zExp = 0;
Richard Purdief148af22005-08-03 19:49:17 +0100683 if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
684 if ( zSig1 ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 if ( roundNearestEven ) {
686 increment = ( (sbits64) zSig1 < 0 );
687 }
688 else {
689 if ( zSign ) {
690 increment = ( roundingMode == float_round_down ) && zSig1;
691 }
692 else {
693 increment = ( roundingMode == float_round_up ) && zSig1;
694 }
695 }
696 if ( increment ) {
697 ++zSig0;
698 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
699 if ( (sbits64) zSig0 < 0 ) zExp = 1;
700 }
701 return packFloatx80( zSign, zExp, zSig0 );
702 }
703 }
Richard Purdief148af22005-08-03 19:49:17 +0100704 if ( zSig1 ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 if ( increment ) {
706 ++zSig0;
707 if ( zSig0 == 0 ) {
708 ++zExp;
709 zSig0 = LIT64( 0x8000000000000000 );
710 }
711 else {
712 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
713 }
714 }
715 else {
716 if ( zSig0 == 0 ) zExp = 0;
717 }
718
719 return packFloatx80( zSign, zExp, zSig0 );
720}
721
722/*
723-------------------------------------------------------------------------------
724Takes an abstract floating-point value having sign `zSign', exponent
725`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
726and returns the proper extended double-precision floating-point value
727corresponding to the abstract input. This routine is just like
728`roundAndPackFloatx80' except that the input significand does not have to be
729normalized.
730-------------------------------------------------------------------------------
731*/
732static floatx80
733 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +0100734 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 )
736{
737 int8 shiftCount;
738
739 if ( zSig0 == 0 ) {
740 zSig0 = zSig1;
741 zSig1 = 0;
742 zExp -= 64;
743 }
744 shiftCount = countLeadingZeros64( zSig0 );
745 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
746 zExp -= shiftCount;
747 return
Richard Purdief148af22005-08-03 19:49:17 +0100748 roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
750}
751
752#endif
753
754/*
755-------------------------------------------------------------------------------
756Returns the result of converting the 32-bit two's complement integer `a' to
757the single-precision floating-point format. The conversion is performed
758according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
759-------------------------------------------------------------------------------
760*/
Richard Purdief148af22005-08-03 19:49:17 +0100761float32 int32_to_float32(struct roundingData *roundData, int32 a)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762{
763 flag zSign;
764
765 if ( a == 0 ) return 0;
766 if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
767 zSign = ( a < 0 );
Richard Purdief148af22005-08-03 19:49:17 +0100768 return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
770}
771
772/*
773-------------------------------------------------------------------------------
774Returns the result of converting the 32-bit two's complement integer `a' to
775the double-precision floating-point format. The conversion is performed
776according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
777-------------------------------------------------------------------------------
778*/
779float64 int32_to_float64( int32 a )
780{
781 flag aSign;
782 uint32 absA;
783 int8 shiftCount;
784 bits64 zSig;
785
786 if ( a == 0 ) return 0;
787 aSign = ( a < 0 );
788 absA = aSign ? - a : a;
789 shiftCount = countLeadingZeros32( absA ) + 21;
790 zSig = absA;
791 return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
792
793}
794
795#ifdef FLOATX80
796
797/*
798-------------------------------------------------------------------------------
799Returns the result of converting the 32-bit two's complement integer `a'
800to the extended double-precision floating-point format. The conversion
801is performed according to the IEC/IEEE Standard for Binary Floating-point
802Arithmetic.
803-------------------------------------------------------------------------------
804*/
805floatx80 int32_to_floatx80( int32 a )
806{
807 flag zSign;
808 uint32 absA;
809 int8 shiftCount;
810 bits64 zSig;
811
812 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
813 zSign = ( a < 0 );
814 absA = zSign ? - a : a;
815 shiftCount = countLeadingZeros32( absA ) + 32;
816 zSig = absA;
817 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
818
819}
820
821#endif
822
823/*
824-------------------------------------------------------------------------------
825Returns the result of converting the single-precision floating-point value
826`a' to the 32-bit two's complement integer format. The conversion is
827performed according to the IEC/IEEE Standard for Binary Floating-point
828Arithmetic---which means in particular that the conversion is rounded
829according to the current rounding mode. If `a' is a NaN, the largest
830positive integer is returned. Otherwise, if the conversion overflows, the
831largest integer with the same sign as `a' is returned.
832-------------------------------------------------------------------------------
833*/
Richard Purdief148af22005-08-03 19:49:17 +0100834int32 float32_to_int32( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835{
836 flag aSign;
837 int16 aExp, shiftCount;
838 bits32 aSig;
839 bits64 zSig;
840
841 aSig = extractFloat32Frac( a );
842 aExp = extractFloat32Exp( a );
843 aSign = extractFloat32Sign( a );
844 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
845 if ( aExp ) aSig |= 0x00800000;
846 shiftCount = 0xAF - aExp;
847 zSig = aSig;
848 zSig <<= 32;
849 if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +0100850 return roundAndPackInt32( roundData, aSign, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
852}
853
854/*
855-------------------------------------------------------------------------------
856Returns the result of converting the single-precision floating-point value
857`a' to the 32-bit two's complement integer format. The conversion is
858performed according to the IEC/IEEE Standard for Binary Floating-point
859Arithmetic, except that the conversion is always rounded toward zero. If
860`a' is a NaN, the largest positive integer is returned. Otherwise, if the
861conversion overflows, the largest integer with the same sign as `a' is
862returned.
863-------------------------------------------------------------------------------
864*/
865int32 float32_to_int32_round_to_zero( float32 a )
866{
867 flag aSign;
868 int16 aExp, shiftCount;
869 bits32 aSig;
870 int32 z;
871
872 aSig = extractFloat32Frac( a );
873 aExp = extractFloat32Exp( a );
874 aSign = extractFloat32Sign( a );
875 shiftCount = aExp - 0x9E;
876 if ( 0 <= shiftCount ) {
877 if ( a == 0xCF000000 ) return 0x80000000;
878 float_raise( float_flag_invalid );
879 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
880 return 0x80000000;
881 }
882 else if ( aExp <= 0x7E ) {
Richard Purdief148af22005-08-03 19:49:17 +0100883 if ( aExp | aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 return 0;
885 }
886 aSig = ( aSig | 0x00800000 )<<8;
887 z = aSig>>( - shiftCount );
888 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
Richard Purdief148af22005-08-03 19:49:17 +0100889 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 }
891 return aSign ? - z : z;
892
893}
894
895/*
896-------------------------------------------------------------------------------
897Returns the result of converting the single-precision floating-point value
898`a' to the double-precision floating-point format. The conversion is
899performed according to the IEC/IEEE Standard for Binary Floating-point
900Arithmetic.
901-------------------------------------------------------------------------------
902*/
903float64 float32_to_float64( float32 a )
904{
905 flag aSign;
906 int16 aExp;
907 bits32 aSig;
908
909 aSig = extractFloat32Frac( a );
910 aExp = extractFloat32Exp( a );
911 aSign = extractFloat32Sign( a );
912 if ( aExp == 0xFF ) {
913 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
914 return packFloat64( aSign, 0x7FF, 0 );
915 }
916 if ( aExp == 0 ) {
917 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
918 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
919 --aExp;
920 }
921 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
922
923}
924
925#ifdef FLOATX80
926
927/*
928-------------------------------------------------------------------------------
929Returns the result of converting the single-precision floating-point value
930`a' to the extended double-precision floating-point format. The conversion
931is performed according to the IEC/IEEE Standard for Binary Floating-point
932Arithmetic.
933-------------------------------------------------------------------------------
934*/
935floatx80 float32_to_floatx80( float32 a )
936{
937 flag aSign;
938 int16 aExp;
939 bits32 aSig;
940
941 aSig = extractFloat32Frac( a );
942 aExp = extractFloat32Exp( a );
943 aSign = extractFloat32Sign( a );
944 if ( aExp == 0xFF ) {
945 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
946 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
947 }
948 if ( aExp == 0 ) {
949 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
950 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
951 }
952 aSig |= 0x00800000;
953 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
954
955}
956
957#endif
958
959/*
960-------------------------------------------------------------------------------
961Rounds the single-precision floating-point value `a' to an integer, and
962returns the result as a single-precision floating-point value. The
963operation is performed according to the IEC/IEEE Standard for Binary
964Floating-point Arithmetic.
965-------------------------------------------------------------------------------
966*/
Richard Purdief148af22005-08-03 19:49:17 +0100967float32 float32_round_to_int( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968{
969 flag aSign;
970 int16 aExp;
971 bits32 lastBitMask, roundBitsMask;
972 int8 roundingMode;
973 float32 z;
974
975 aExp = extractFloat32Exp( a );
976 if ( 0x96 <= aExp ) {
977 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
978 return propagateFloat32NaN( a, a );
979 }
980 return a;
981 }
Richard Purdief148af22005-08-03 19:49:17 +0100982 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if ( aExp <= 0x7E ) {
984 if ( (bits32) ( a<<1 ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +0100985 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 aSign = extractFloat32Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +0100987 switch ( roundingMode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 case float_round_nearest_even:
989 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
990 return packFloat32( aSign, 0x7F, 0 );
991 }
992 break;
993 case float_round_down:
994 return aSign ? 0xBF800000 : 0;
995 case float_round_up:
996 return aSign ? 0x80000000 : 0x3F800000;
997 }
998 return packFloat32( aSign, 0, 0 );
999 }
1000 lastBitMask = 1;
1001 lastBitMask <<= 0x96 - aExp;
1002 roundBitsMask = lastBitMask - 1;
1003 z = a;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 if ( roundingMode == float_round_nearest_even ) {
1005 z += lastBitMask>>1;
1006 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1007 }
1008 else if ( roundingMode != float_round_to_zero ) {
1009 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1010 z += roundBitsMask;
1011 }
1012 }
1013 z &= ~ roundBitsMask;
Richard Purdief148af22005-08-03 19:49:17 +01001014 if ( z != a ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 return z;
1016
1017}
1018
1019/*
1020-------------------------------------------------------------------------------
1021Returns the result of adding the absolute values of the single-precision
1022floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1023before being returned. `zSign' is ignored if the result is a NaN. The
1024addition is performed according to the IEC/IEEE Standard for Binary
1025Floating-point Arithmetic.
1026-------------------------------------------------------------------------------
1027*/
Richard Purdief148af22005-08-03 19:49:17 +01001028static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029{
1030 int16 aExp, bExp, zExp;
1031 bits32 aSig, bSig, zSig;
1032 int16 expDiff;
1033
1034 aSig = extractFloat32Frac( a );
1035 aExp = extractFloat32Exp( a );
1036 bSig = extractFloat32Frac( b );
1037 bExp = extractFloat32Exp( b );
1038 expDiff = aExp - bExp;
1039 aSig <<= 6;
1040 bSig <<= 6;
1041 if ( 0 < expDiff ) {
1042 if ( aExp == 0xFF ) {
1043 if ( aSig ) return propagateFloat32NaN( a, b );
1044 return a;
1045 }
1046 if ( bExp == 0 ) {
1047 --expDiff;
1048 }
1049 else {
1050 bSig |= 0x20000000;
1051 }
1052 shift32RightJamming( bSig, expDiff, &bSig );
1053 zExp = aExp;
1054 }
1055 else if ( expDiff < 0 ) {
1056 if ( bExp == 0xFF ) {
1057 if ( bSig ) return propagateFloat32NaN( a, b );
1058 return packFloat32( zSign, 0xFF, 0 );
1059 }
1060 if ( aExp == 0 ) {
1061 ++expDiff;
1062 }
1063 else {
1064 aSig |= 0x20000000;
1065 }
1066 shift32RightJamming( aSig, - expDiff, &aSig );
1067 zExp = bExp;
1068 }
1069 else {
1070 if ( aExp == 0xFF ) {
1071 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1072 return a;
1073 }
1074 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1075 zSig = 0x40000000 + aSig + bSig;
1076 zExp = aExp;
1077 goto roundAndPack;
1078 }
1079 aSig |= 0x20000000;
1080 zSig = ( aSig + bSig )<<1;
1081 --zExp;
1082 if ( (sbits32) zSig < 0 ) {
1083 zSig = aSig + bSig;
1084 ++zExp;
1085 }
1086 roundAndPack:
Richard Purdief148af22005-08-03 19:49:17 +01001087 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089}
1090
1091/*
1092-------------------------------------------------------------------------------
1093Returns the result of subtracting the absolute values of the single-
1094precision floating-point values `a' and `b'. If `zSign' is true, the
1095difference is negated before being returned. `zSign' is ignored if the
1096result is a NaN. The subtraction is performed according to the IEC/IEEE
1097Standard for Binary Floating-point Arithmetic.
1098-------------------------------------------------------------------------------
1099*/
Richard Purdief148af22005-08-03 19:49:17 +01001100static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101{
1102 int16 aExp, bExp, zExp;
1103 bits32 aSig, bSig, zSig;
1104 int16 expDiff;
1105
1106 aSig = extractFloat32Frac( a );
1107 aExp = extractFloat32Exp( a );
1108 bSig = extractFloat32Frac( b );
1109 bExp = extractFloat32Exp( b );
1110 expDiff = aExp - bExp;
1111 aSig <<= 7;
1112 bSig <<= 7;
1113 if ( 0 < expDiff ) goto aExpBigger;
1114 if ( expDiff < 0 ) goto bExpBigger;
1115 if ( aExp == 0xFF ) {
1116 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01001117 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 return float32_default_nan;
1119 }
1120 if ( aExp == 0 ) {
1121 aExp = 1;
1122 bExp = 1;
1123 }
1124 if ( bSig < aSig ) goto aBigger;
1125 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01001126 return packFloat32( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 bExpBigger:
1128 if ( bExp == 0xFF ) {
1129 if ( bSig ) return propagateFloat32NaN( a, b );
1130 return packFloat32( zSign ^ 1, 0xFF, 0 );
1131 }
1132 if ( aExp == 0 ) {
1133 ++expDiff;
1134 }
1135 else {
1136 aSig |= 0x40000000;
1137 }
1138 shift32RightJamming( aSig, - expDiff, &aSig );
1139 bSig |= 0x40000000;
1140 bBigger:
1141 zSig = bSig - aSig;
1142 zExp = bExp;
1143 zSign ^= 1;
1144 goto normalizeRoundAndPack;
1145 aExpBigger:
1146 if ( aExp == 0xFF ) {
1147 if ( aSig ) return propagateFloat32NaN( a, b );
1148 return a;
1149 }
1150 if ( bExp == 0 ) {
1151 --expDiff;
1152 }
1153 else {
1154 bSig |= 0x40000000;
1155 }
1156 shift32RightJamming( bSig, expDiff, &bSig );
1157 aSig |= 0x40000000;
1158 aBigger:
1159 zSig = aSig - bSig;
1160 zExp = aExp;
1161 normalizeRoundAndPack:
1162 --zExp;
Richard Purdief148af22005-08-03 19:49:17 +01001163 return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165}
1166
1167/*
1168-------------------------------------------------------------------------------
1169Returns the result of adding the single-precision floating-point values `a'
1170and `b'. The operation is performed according to the IEC/IEEE Standard for
1171Binary Floating-point Arithmetic.
1172-------------------------------------------------------------------------------
1173*/
Richard Purdief148af22005-08-03 19:49:17 +01001174float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
1176 flag aSign, bSign;
1177
1178 aSign = extractFloat32Sign( a );
1179 bSign = extractFloat32Sign( b );
1180 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01001181 return addFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 }
1183 else {
Richard Purdief148af22005-08-03 19:49:17 +01001184 return subFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 }
1186
1187}
1188
1189/*
1190-------------------------------------------------------------------------------
1191Returns the result of subtracting the single-precision floating-point values
1192`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1193for Binary Floating-point Arithmetic.
1194-------------------------------------------------------------------------------
1195*/
Richard Purdief148af22005-08-03 19:49:17 +01001196float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197{
1198 flag aSign, bSign;
1199
1200 aSign = extractFloat32Sign( a );
1201 bSign = extractFloat32Sign( b );
1202 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01001203 return subFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
1205 else {
Richard Purdief148af22005-08-03 19:49:17 +01001206 return addFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 }
1208
1209}
1210
1211/*
1212-------------------------------------------------------------------------------
1213Returns the result of multiplying the single-precision floating-point values
1214`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1215for Binary Floating-point Arithmetic.
1216-------------------------------------------------------------------------------
1217*/
Richard Purdief148af22005-08-03 19:49:17 +01001218float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219{
1220 flag aSign, bSign, zSign;
1221 int16 aExp, bExp, zExp;
1222 bits32 aSig, bSig;
1223 bits64 zSig64;
1224 bits32 zSig;
1225
1226 aSig = extractFloat32Frac( a );
1227 aExp = extractFloat32Exp( a );
1228 aSign = extractFloat32Sign( a );
1229 bSig = extractFloat32Frac( b );
1230 bExp = extractFloat32Exp( b );
1231 bSign = extractFloat32Sign( b );
1232 zSign = aSign ^ bSign;
1233 if ( aExp == 0xFF ) {
1234 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1235 return propagateFloat32NaN( a, b );
1236 }
1237 if ( ( bExp | bSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001238 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 return float32_default_nan;
1240 }
1241 return packFloat32( zSign, 0xFF, 0 );
1242 }
1243 if ( bExp == 0xFF ) {
1244 if ( bSig ) return propagateFloat32NaN( a, b );
1245 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001246 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return float32_default_nan;
1248 }
1249 return packFloat32( zSign, 0xFF, 0 );
1250 }
1251 if ( aExp == 0 ) {
1252 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1253 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1254 }
1255 if ( bExp == 0 ) {
1256 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1257 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1258 }
1259 zExp = aExp + bExp - 0x7F;
1260 aSig = ( aSig | 0x00800000 )<<7;
1261 bSig = ( bSig | 0x00800000 )<<8;
1262 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1263 zSig = zSig64;
1264 if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1265 zSig <<= 1;
1266 --zExp;
1267 }
Richard Purdief148af22005-08-03 19:49:17 +01001268 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
1270}
1271
1272/*
1273-------------------------------------------------------------------------------
1274Returns the result of dividing the single-precision floating-point value `a'
1275by the corresponding value `b'. The operation is performed according to the
1276IEC/IEEE Standard for Binary Floating-point Arithmetic.
1277-------------------------------------------------------------------------------
1278*/
Richard Purdief148af22005-08-03 19:49:17 +01001279float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280{
1281 flag aSign, bSign, zSign;
1282 int16 aExp, bExp, zExp;
1283 bits32 aSig, bSig, zSig;
1284
1285 aSig = extractFloat32Frac( a );
1286 aExp = extractFloat32Exp( a );
1287 aSign = extractFloat32Sign( a );
1288 bSig = extractFloat32Frac( b );
1289 bExp = extractFloat32Exp( b );
1290 bSign = extractFloat32Sign( b );
1291 zSign = aSign ^ bSign;
1292 if ( aExp == 0xFF ) {
1293 if ( aSig ) return propagateFloat32NaN( a, b );
1294 if ( bExp == 0xFF ) {
1295 if ( bSig ) return propagateFloat32NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01001296 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 return float32_default_nan;
1298 }
1299 return packFloat32( zSign, 0xFF, 0 );
1300 }
1301 if ( bExp == 0xFF ) {
1302 if ( bSig ) return propagateFloat32NaN( a, b );
1303 return packFloat32( zSign, 0, 0 );
1304 }
1305 if ( bExp == 0 ) {
1306 if ( bSig == 0 ) {
1307 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001308 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 return float32_default_nan;
1310 }
Richard Purdief148af22005-08-03 19:49:17 +01001311 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 return packFloat32( zSign, 0xFF, 0 );
1313 }
1314 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1315 }
1316 if ( aExp == 0 ) {
1317 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1318 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1319 }
1320 zExp = aExp - bExp + 0x7D;
1321 aSig = ( aSig | 0x00800000 )<<7;
1322 bSig = ( bSig | 0x00800000 )<<8;
1323 if ( bSig <= ( aSig + aSig ) ) {
1324 aSig >>= 1;
1325 ++zExp;
1326 }
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +01001327 {
1328 bits64 tmp = ( (bits64) aSig )<<32;
1329 do_div( tmp, bSig );
1330 zSig = tmp;
1331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if ( ( zSig & 0x3F ) == 0 ) {
1333 zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1334 }
Richard Purdief148af22005-08-03 19:49:17 +01001335 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337}
1338
1339/*
1340-------------------------------------------------------------------------------
1341Returns the remainder of the single-precision floating-point value `a'
1342with respect to the corresponding value `b'. The operation is performed
1343according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1344-------------------------------------------------------------------------------
1345*/
Richard Purdief148af22005-08-03 19:49:17 +01001346float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347{
1348 flag aSign, bSign, zSign;
1349 int16 aExp, bExp, expDiff;
1350 bits32 aSig, bSig;
1351 bits32 q;
1352 bits64 aSig64, bSig64, q64;
1353 bits32 alternateASig;
1354 sbits32 sigMean;
1355
1356 aSig = extractFloat32Frac( a );
1357 aExp = extractFloat32Exp( a );
1358 aSign = extractFloat32Sign( a );
1359 bSig = extractFloat32Frac( b );
1360 bExp = extractFloat32Exp( b );
1361 bSign = extractFloat32Sign( b );
1362 if ( aExp == 0xFF ) {
1363 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1364 return propagateFloat32NaN( a, b );
1365 }
Richard Purdief148af22005-08-03 19:49:17 +01001366 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 return float32_default_nan;
1368 }
1369 if ( bExp == 0xFF ) {
1370 if ( bSig ) return propagateFloat32NaN( a, b );
1371 return a;
1372 }
1373 if ( bExp == 0 ) {
1374 if ( bSig == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001375 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 return float32_default_nan;
1377 }
1378 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1379 }
1380 if ( aExp == 0 ) {
1381 if ( aSig == 0 ) return a;
1382 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1383 }
1384 expDiff = aExp - bExp;
1385 aSig |= 0x00800000;
1386 bSig |= 0x00800000;
1387 if ( expDiff < 32 ) {
1388 aSig <<= 8;
1389 bSig <<= 8;
1390 if ( expDiff < 0 ) {
1391 if ( expDiff < -1 ) return a;
1392 aSig >>= 1;
1393 }
1394 q = ( bSig <= aSig );
1395 if ( q ) aSig -= bSig;
1396 if ( 0 < expDiff ) {
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +01001397 bits64 tmp = ( (bits64) aSig )<<32;
1398 do_div( tmp, bSig );
1399 q = tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 q >>= 32 - expDiff;
1401 bSig >>= 2;
1402 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1403 }
1404 else {
1405 aSig >>= 2;
1406 bSig >>= 2;
1407 }
1408 }
1409 else {
1410 if ( bSig <= aSig ) aSig -= bSig;
1411 aSig64 = ( (bits64) aSig )<<40;
1412 bSig64 = ( (bits64) bSig )<<40;
1413 expDiff -= 64;
1414 while ( 0 < expDiff ) {
1415 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1416 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1417 aSig64 = - ( ( bSig * q64 )<<38 );
1418 expDiff -= 62;
1419 }
1420 expDiff += 64;
1421 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1422 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1423 q = q64>>( 64 - expDiff );
1424 bSig <<= 6;
1425 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1426 }
1427 do {
1428 alternateASig = aSig;
1429 ++q;
1430 aSig -= bSig;
1431 } while ( 0 <= (sbits32) aSig );
1432 sigMean = aSig + alternateASig;
1433 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1434 aSig = alternateASig;
1435 }
1436 zSign = ( (sbits32) aSig < 0 );
1437 if ( zSign ) aSig = - aSig;
Richard Purdief148af22005-08-03 19:49:17 +01001438 return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
1440}
1441
1442/*
1443-------------------------------------------------------------------------------
1444Returns the square root of the single-precision floating-point value `a'.
1445The operation is performed according to the IEC/IEEE Standard for Binary
1446Floating-point Arithmetic.
1447-------------------------------------------------------------------------------
1448*/
Richard Purdief148af22005-08-03 19:49:17 +01001449float32 float32_sqrt( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450{
1451 flag aSign;
1452 int16 aExp, zExp;
1453 bits32 aSig, zSig;
1454 bits64 rem, term;
1455
1456 aSig = extractFloat32Frac( a );
1457 aExp = extractFloat32Exp( a );
1458 aSign = extractFloat32Sign( a );
1459 if ( aExp == 0xFF ) {
1460 if ( aSig ) return propagateFloat32NaN( a, 0 );
1461 if ( ! aSign ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001462 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return float32_default_nan;
1464 }
1465 if ( aSign ) {
1466 if ( ( aExp | aSig ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001467 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 return float32_default_nan;
1469 }
1470 if ( aExp == 0 ) {
1471 if ( aSig == 0 ) return 0;
1472 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1473 }
1474 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1475 aSig = ( aSig | 0x00800000 )<<8;
1476 zSig = estimateSqrt32( aExp, aSig ) + 2;
1477 if ( ( zSig & 0x7F ) <= 5 ) {
1478 if ( zSig < 2 ) {
1479 zSig = 0xFFFFFFFF;
1480 }
1481 else {
1482 aSig >>= aExp & 1;
1483 term = ( (bits64) zSig ) * zSig;
1484 rem = ( ( (bits64) aSig )<<32 ) - term;
1485 while ( (sbits64) rem < 0 ) {
1486 --zSig;
1487 rem += ( ( (bits64) zSig )<<1 ) | 1;
1488 }
1489 zSig |= ( rem != 0 );
1490 }
1491 }
1492 shift32RightJamming( zSig, 1, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +01001493 return roundAndPackFloat32( roundData, 0, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
1495}
1496
1497/*
1498-------------------------------------------------------------------------------
1499Returns 1 if the single-precision floating-point value `a' is equal to the
1500corresponding value `b', and 0 otherwise. The comparison is performed
1501according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1502-------------------------------------------------------------------------------
1503*/
1504flag float32_eq( float32 a, float32 b )
1505{
1506
1507 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1508 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1509 ) {
1510 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1511 float_raise( float_flag_invalid );
1512 }
1513 return 0;
1514 }
1515 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1516
1517}
1518
1519/*
1520-------------------------------------------------------------------------------
1521Returns 1 if the single-precision floating-point value `a' is less than or
1522equal to the corresponding value `b', and 0 otherwise. The comparison is
1523performed according to the IEC/IEEE Standard for Binary Floating-point
1524Arithmetic.
1525-------------------------------------------------------------------------------
1526*/
1527flag float32_le( float32 a, float32 b )
1528{
1529 flag aSign, bSign;
1530
1531 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1532 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1533 ) {
1534 float_raise( float_flag_invalid );
1535 return 0;
1536 }
1537 aSign = extractFloat32Sign( a );
1538 bSign = extractFloat32Sign( b );
1539 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1540 return ( a == b ) || ( aSign ^ ( a < b ) );
1541
1542}
1543
1544/*
1545-------------------------------------------------------------------------------
1546Returns 1 if the single-precision floating-point value `a' is less than
1547the corresponding value `b', and 0 otherwise. The comparison is performed
1548according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1549-------------------------------------------------------------------------------
1550*/
1551flag float32_lt( float32 a, float32 b )
1552{
1553 flag aSign, bSign;
1554
1555 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1556 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1557 ) {
1558 float_raise( float_flag_invalid );
1559 return 0;
1560 }
1561 aSign = extractFloat32Sign( a );
1562 bSign = extractFloat32Sign( b );
1563 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1564 return ( a != b ) && ( aSign ^ ( a < b ) );
1565
1566}
1567
1568/*
1569-------------------------------------------------------------------------------
1570Returns 1 if the single-precision floating-point value `a' is equal to the
1571corresponding value `b', and 0 otherwise. The invalid exception is raised
1572if either operand is a NaN. Otherwise, the comparison is performed
1573according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1574-------------------------------------------------------------------------------
1575*/
1576flag float32_eq_signaling( float32 a, float32 b )
1577{
1578
1579 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1580 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1581 ) {
1582 float_raise( float_flag_invalid );
1583 return 0;
1584 }
1585 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1586
1587}
1588
1589/*
1590-------------------------------------------------------------------------------
1591Returns 1 if the single-precision floating-point value `a' is less than or
1592equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1593cause an exception. Otherwise, the comparison is performed according to the
1594IEC/IEEE Standard for Binary Floating-point Arithmetic.
1595-------------------------------------------------------------------------------
1596*/
1597flag float32_le_quiet( float32 a, float32 b )
1598{
1599 flag aSign, bSign;
1600 //int16 aExp, bExp;
1601
1602 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1603 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1604 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01001605 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 return 0;
1607 }
1608 aSign = extractFloat32Sign( a );
1609 bSign = extractFloat32Sign( b );
1610 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1611 return ( a == b ) || ( aSign ^ ( a < b ) );
1612
1613}
1614
1615/*
1616-------------------------------------------------------------------------------
1617Returns 1 if the single-precision floating-point value `a' is less than
1618the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1619exception. Otherwise, the comparison is performed according to the IEC/IEEE
1620Standard for Binary Floating-point Arithmetic.
1621-------------------------------------------------------------------------------
1622*/
1623flag float32_lt_quiet( float32 a, float32 b )
1624{
1625 flag aSign, bSign;
1626
1627 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1628 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1629 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01001630 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 return 0;
1632 }
1633 aSign = extractFloat32Sign( a );
1634 bSign = extractFloat32Sign( b );
1635 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1636 return ( a != b ) && ( aSign ^ ( a < b ) );
1637
1638}
1639
1640/*
1641-------------------------------------------------------------------------------
1642Returns the result of converting the double-precision floating-point value
1643`a' to the 32-bit two's complement integer format. The conversion is
1644performed according to the IEC/IEEE Standard for Binary Floating-point
1645Arithmetic---which means in particular that the conversion is rounded
1646according to the current rounding mode. If `a' is a NaN, the largest
1647positive integer is returned. Otherwise, if the conversion overflows, the
1648largest integer with the same sign as `a' is returned.
1649-------------------------------------------------------------------------------
1650*/
Richard Purdief148af22005-08-03 19:49:17 +01001651int32 float64_to_int32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652{
1653 flag aSign;
1654 int16 aExp, shiftCount;
1655 bits64 aSig;
1656
1657 aSig = extractFloat64Frac( a );
1658 aExp = extractFloat64Exp( a );
1659 aSign = extractFloat64Sign( a );
1660 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1661 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1662 shiftCount = 0x42C - aExp;
1663 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01001664 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665
1666}
1667
1668/*
1669-------------------------------------------------------------------------------
1670Returns the result of converting the double-precision floating-point value
1671`a' to the 32-bit two's complement integer format. The conversion is
1672performed according to the IEC/IEEE Standard for Binary Floating-point
1673Arithmetic, except that the conversion is always rounded toward zero. If
1674`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1675conversion overflows, the largest integer with the same sign as `a' is
1676returned.
1677-------------------------------------------------------------------------------
1678*/
1679int32 float64_to_int32_round_to_zero( float64 a )
1680{
1681 flag aSign;
1682 int16 aExp, shiftCount;
1683 bits64 aSig, savedASig;
1684 int32 z;
1685
1686 aSig = extractFloat64Frac( a );
1687 aExp = extractFloat64Exp( a );
1688 aSign = extractFloat64Sign( a );
1689 shiftCount = 0x433 - aExp;
1690 if ( shiftCount < 21 ) {
1691 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1692 goto invalid;
1693 }
1694 else if ( 52 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01001695 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 return 0;
1697 }
1698 aSig |= LIT64( 0x0010000000000000 );
1699 savedASig = aSig;
1700 aSig >>= shiftCount;
1701 z = aSig;
1702 if ( aSign ) z = - z;
1703 if ( ( z < 0 ) ^ aSign ) {
1704 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01001705 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 return aSign ? 0x80000000 : 0x7FFFFFFF;
1707 }
1708 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01001709 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 }
1711 return z;
1712
1713}
1714
1715/*
1716-------------------------------------------------------------------------------
1717Returns the result of converting the double-precision floating-point value
1718`a' to the 32-bit two's complement unsigned integer format. The conversion
1719is performed according to the IEC/IEEE Standard for Binary Floating-point
1720Arithmetic---which means in particular that the conversion is rounded
1721according to the current rounding mode. If `a' is a NaN, the largest
1722positive integer is returned. Otherwise, if the conversion overflows, the
1723largest positive integer is returned.
1724-------------------------------------------------------------------------------
1725*/
Richard Purdief148af22005-08-03 19:49:17 +01001726int32 float64_to_uint32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727{
1728 flag aSign;
1729 int16 aExp, shiftCount;
1730 bits64 aSig;
1731
1732 aSig = extractFloat64Frac( a );
1733 aExp = extractFloat64Exp( a );
1734 aSign = 0; //extractFloat64Sign( a );
1735 //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1736 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1737 shiftCount = 0x42C - aExp;
1738 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01001739 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740}
1741
1742/*
1743-------------------------------------------------------------------------------
1744Returns the result of converting the double-precision floating-point value
1745`a' to the 32-bit two's complement integer format. The conversion is
1746performed according to the IEC/IEEE Standard for Binary Floating-point
1747Arithmetic, except that the conversion is always rounded toward zero. If
1748`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1749conversion overflows, the largest positive integer is returned.
1750-------------------------------------------------------------------------------
1751*/
1752int32 float64_to_uint32_round_to_zero( float64 a )
1753{
1754 flag aSign;
1755 int16 aExp, shiftCount;
1756 bits64 aSig, savedASig;
1757 int32 z;
1758
1759 aSig = extractFloat64Frac( a );
1760 aExp = extractFloat64Exp( a );
1761 aSign = extractFloat64Sign( a );
1762 shiftCount = 0x433 - aExp;
1763 if ( shiftCount < 21 ) {
1764 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1765 goto invalid;
1766 }
1767 else if ( 52 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01001768 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 return 0;
1770 }
1771 aSig |= LIT64( 0x0010000000000000 );
1772 savedASig = aSig;
1773 aSig >>= shiftCount;
1774 z = aSig;
1775 if ( aSign ) z = - z;
1776 if ( ( z < 0 ) ^ aSign ) {
1777 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01001778 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 return aSign ? 0x80000000 : 0x7FFFFFFF;
1780 }
1781 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01001782 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 }
1784 return z;
1785}
1786
1787/*
1788-------------------------------------------------------------------------------
1789Returns the result of converting the double-precision floating-point value
1790`a' to the single-precision floating-point format. The conversion is
1791performed according to the IEC/IEEE Standard for Binary Floating-point
1792Arithmetic.
1793-------------------------------------------------------------------------------
1794*/
Richard Purdief148af22005-08-03 19:49:17 +01001795float32 float64_to_float32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796{
1797 flag aSign;
1798 int16 aExp;
1799 bits64 aSig;
1800 bits32 zSig;
1801
1802 aSig = extractFloat64Frac( a );
1803 aExp = extractFloat64Exp( a );
1804 aSign = extractFloat64Sign( a );
1805 if ( aExp == 0x7FF ) {
1806 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
1807 return packFloat32( aSign, 0xFF, 0 );
1808 }
1809 shift64RightJamming( aSig, 22, &aSig );
1810 zSig = aSig;
1811 if ( aExp || zSig ) {
1812 zSig |= 0x40000000;
1813 aExp -= 0x381;
1814 }
Richard Purdief148af22005-08-03 19:49:17 +01001815 return roundAndPackFloat32( roundData, aSign, aExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
1817}
1818
1819#ifdef FLOATX80
1820
1821/*
1822-------------------------------------------------------------------------------
1823Returns the result of converting the double-precision floating-point value
1824`a' to the extended double-precision floating-point format. The conversion
1825is performed according to the IEC/IEEE Standard for Binary Floating-point
1826Arithmetic.
1827-------------------------------------------------------------------------------
1828*/
1829floatx80 float64_to_floatx80( float64 a )
1830{
1831 flag aSign;
1832 int16 aExp;
1833 bits64 aSig;
1834
1835 aSig = extractFloat64Frac( a );
1836 aExp = extractFloat64Exp( a );
1837 aSign = extractFloat64Sign( a );
1838 if ( aExp == 0x7FF ) {
1839 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1840 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1841 }
1842 if ( aExp == 0 ) {
1843 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1844 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1845 }
1846 return
1847 packFloatx80(
1848 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1849
1850}
1851
1852#endif
1853
1854/*
1855-------------------------------------------------------------------------------
1856Rounds the double-precision floating-point value `a' to an integer, and
1857returns the result as a double-precision floating-point value. The
1858operation is performed according to the IEC/IEEE Standard for Binary
1859Floating-point Arithmetic.
1860-------------------------------------------------------------------------------
1861*/
Richard Purdief148af22005-08-03 19:49:17 +01001862float64 float64_round_to_int( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863{
1864 flag aSign;
1865 int16 aExp;
1866 bits64 lastBitMask, roundBitsMask;
1867 int8 roundingMode;
1868 float64 z;
1869
1870 aExp = extractFloat64Exp( a );
1871 if ( 0x433 <= aExp ) {
1872 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
1873 return propagateFloat64NaN( a, a );
1874 }
1875 return a;
1876 }
1877 if ( aExp <= 0x3FE ) {
1878 if ( (bits64) ( a<<1 ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001879 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880 aSign = extractFloat64Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +01001881 switch ( roundData->mode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 case float_round_nearest_even:
1883 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1884 return packFloat64( aSign, 0x3FF, 0 );
1885 }
1886 break;
1887 case float_round_down:
1888 return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
1889 case float_round_up:
1890 return
1891 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
1892 }
1893 return packFloat64( aSign, 0, 0 );
1894 }
1895 lastBitMask = 1;
1896 lastBitMask <<= 0x433 - aExp;
1897 roundBitsMask = lastBitMask - 1;
1898 z = a;
Richard Purdief148af22005-08-03 19:49:17 +01001899 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 if ( roundingMode == float_round_nearest_even ) {
1901 z += lastBitMask>>1;
1902 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1903 }
1904 else if ( roundingMode != float_round_to_zero ) {
1905 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1906 z += roundBitsMask;
1907 }
1908 }
1909 z &= ~ roundBitsMask;
Richard Purdief148af22005-08-03 19:49:17 +01001910 if ( z != a ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 return z;
1912
1913}
1914
1915/*
1916-------------------------------------------------------------------------------
1917Returns the result of adding the absolute values of the double-precision
1918floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1919before being returned. `zSign' is ignored if the result is a NaN. The
1920addition is performed according to the IEC/IEEE Standard for Binary
1921Floating-point Arithmetic.
1922-------------------------------------------------------------------------------
1923*/
Richard Purdief148af22005-08-03 19:49:17 +01001924static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925{
1926 int16 aExp, bExp, zExp;
1927 bits64 aSig, bSig, zSig;
1928 int16 expDiff;
1929
1930 aSig = extractFloat64Frac( a );
1931 aExp = extractFloat64Exp( a );
1932 bSig = extractFloat64Frac( b );
1933 bExp = extractFloat64Exp( b );
1934 expDiff = aExp - bExp;
1935 aSig <<= 9;
1936 bSig <<= 9;
1937 if ( 0 < expDiff ) {
1938 if ( aExp == 0x7FF ) {
1939 if ( aSig ) return propagateFloat64NaN( a, b );
1940 return a;
1941 }
1942 if ( bExp == 0 ) {
1943 --expDiff;
1944 }
1945 else {
1946 bSig |= LIT64( 0x2000000000000000 );
1947 }
1948 shift64RightJamming( bSig, expDiff, &bSig );
1949 zExp = aExp;
1950 }
1951 else if ( expDiff < 0 ) {
1952 if ( bExp == 0x7FF ) {
1953 if ( bSig ) return propagateFloat64NaN( a, b );
1954 return packFloat64( zSign, 0x7FF, 0 );
1955 }
1956 if ( aExp == 0 ) {
1957 ++expDiff;
1958 }
1959 else {
1960 aSig |= LIT64( 0x2000000000000000 );
1961 }
1962 shift64RightJamming( aSig, - expDiff, &aSig );
1963 zExp = bExp;
1964 }
1965 else {
1966 if ( aExp == 0x7FF ) {
1967 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
1968 return a;
1969 }
1970 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1971 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
1972 zExp = aExp;
1973 goto roundAndPack;
1974 }
1975 aSig |= LIT64( 0x2000000000000000 );
1976 zSig = ( aSig + bSig )<<1;
1977 --zExp;
1978 if ( (sbits64) zSig < 0 ) {
1979 zSig = aSig + bSig;
1980 ++zExp;
1981 }
1982 roundAndPack:
Richard Purdief148af22005-08-03 19:49:17 +01001983 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984
1985}
1986
1987/*
1988-------------------------------------------------------------------------------
1989Returns the result of subtracting the absolute values of the double-
1990precision floating-point values `a' and `b'. If `zSign' is true, the
1991difference is negated before being returned. `zSign' is ignored if the
1992result is a NaN. The subtraction is performed according to the IEC/IEEE
1993Standard for Binary Floating-point Arithmetic.
1994-------------------------------------------------------------------------------
1995*/
Richard Purdief148af22005-08-03 19:49:17 +01001996static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997{
1998 int16 aExp, bExp, zExp;
1999 bits64 aSig, bSig, zSig;
2000 int16 expDiff;
2001
2002 aSig = extractFloat64Frac( a );
2003 aExp = extractFloat64Exp( a );
2004 bSig = extractFloat64Frac( b );
2005 bExp = extractFloat64Exp( b );
2006 expDiff = aExp - bExp;
2007 aSig <<= 10;
2008 bSig <<= 10;
2009 if ( 0 < expDiff ) goto aExpBigger;
2010 if ( expDiff < 0 ) goto bExpBigger;
2011 if ( aExp == 0x7FF ) {
2012 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01002013 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 return float64_default_nan;
2015 }
2016 if ( aExp == 0 ) {
2017 aExp = 1;
2018 bExp = 1;
2019 }
2020 if ( bSig < aSig ) goto aBigger;
2021 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01002022 return packFloat64( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 bExpBigger:
2024 if ( bExp == 0x7FF ) {
2025 if ( bSig ) return propagateFloat64NaN( a, b );
2026 return packFloat64( zSign ^ 1, 0x7FF, 0 );
2027 }
2028 if ( aExp == 0 ) {
2029 ++expDiff;
2030 }
2031 else {
2032 aSig |= LIT64( 0x4000000000000000 );
2033 }
2034 shift64RightJamming( aSig, - expDiff, &aSig );
2035 bSig |= LIT64( 0x4000000000000000 );
2036 bBigger:
2037 zSig = bSig - aSig;
2038 zExp = bExp;
2039 zSign ^= 1;
2040 goto normalizeRoundAndPack;
2041 aExpBigger:
2042 if ( aExp == 0x7FF ) {
2043 if ( aSig ) return propagateFloat64NaN( a, b );
2044 return a;
2045 }
2046 if ( bExp == 0 ) {
2047 --expDiff;
2048 }
2049 else {
2050 bSig |= LIT64( 0x4000000000000000 );
2051 }
2052 shift64RightJamming( bSig, expDiff, &bSig );
2053 aSig |= LIT64( 0x4000000000000000 );
2054 aBigger:
2055 zSig = aSig - bSig;
2056 zExp = aExp;
2057 normalizeRoundAndPack:
2058 --zExp;
Richard Purdief148af22005-08-03 19:49:17 +01002059 return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
2061}
2062
2063/*
2064-------------------------------------------------------------------------------
2065Returns the result of adding the double-precision floating-point values `a'
2066and `b'. The operation is performed according to the IEC/IEEE Standard for
2067Binary Floating-point Arithmetic.
2068-------------------------------------------------------------------------------
2069*/
Richard Purdief148af22005-08-03 19:49:17 +01002070float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071{
2072 flag aSign, bSign;
2073
2074 aSign = extractFloat64Sign( a );
2075 bSign = extractFloat64Sign( b );
2076 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002077 return addFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078 }
2079 else {
Richard Purdief148af22005-08-03 19:49:17 +01002080 return subFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 }
2082
2083}
2084
2085/*
2086-------------------------------------------------------------------------------
2087Returns the result of subtracting the double-precision floating-point values
2088`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2089for Binary Floating-point Arithmetic.
2090-------------------------------------------------------------------------------
2091*/
Richard Purdief148af22005-08-03 19:49:17 +01002092float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093{
2094 flag aSign, bSign;
2095
2096 aSign = extractFloat64Sign( a );
2097 bSign = extractFloat64Sign( b );
2098 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002099 return subFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 }
2101 else {
Richard Purdief148af22005-08-03 19:49:17 +01002102 return addFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 }
2104
2105}
2106
2107/*
2108-------------------------------------------------------------------------------
2109Returns the result of multiplying the double-precision floating-point values
2110`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2111for Binary Floating-point Arithmetic.
2112-------------------------------------------------------------------------------
2113*/
Richard Purdief148af22005-08-03 19:49:17 +01002114float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115{
2116 flag aSign, bSign, zSign;
2117 int16 aExp, bExp, zExp;
2118 bits64 aSig, bSig, zSig0, zSig1;
2119
2120 aSig = extractFloat64Frac( a );
2121 aExp = extractFloat64Exp( a );
2122 aSign = extractFloat64Sign( a );
2123 bSig = extractFloat64Frac( b );
2124 bExp = extractFloat64Exp( b );
2125 bSign = extractFloat64Sign( b );
2126 zSign = aSign ^ bSign;
2127 if ( aExp == 0x7FF ) {
2128 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2129 return propagateFloat64NaN( a, b );
2130 }
2131 if ( ( bExp | bSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002132 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 return float64_default_nan;
2134 }
2135 return packFloat64( zSign, 0x7FF, 0 );
2136 }
2137 if ( bExp == 0x7FF ) {
2138 if ( bSig ) return propagateFloat64NaN( a, b );
2139 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002140 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 return float64_default_nan;
2142 }
2143 return packFloat64( zSign, 0x7FF, 0 );
2144 }
2145 if ( aExp == 0 ) {
2146 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2147 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2148 }
2149 if ( bExp == 0 ) {
2150 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2151 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2152 }
2153 zExp = aExp + bExp - 0x3FF;
2154 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2155 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2156 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2157 zSig0 |= ( zSig1 != 0 );
2158 if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2159 zSig0 <<= 1;
2160 --zExp;
2161 }
Richard Purdief148af22005-08-03 19:49:17 +01002162 return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163
2164}
2165
2166/*
2167-------------------------------------------------------------------------------
2168Returns the result of dividing the double-precision floating-point value `a'
2169by the corresponding value `b'. The operation is performed according to
2170the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2171-------------------------------------------------------------------------------
2172*/
Richard Purdief148af22005-08-03 19:49:17 +01002173float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174{
2175 flag aSign, bSign, zSign;
2176 int16 aExp, bExp, zExp;
2177 bits64 aSig, bSig, zSig;
2178 bits64 rem0, rem1;
2179 bits64 term0, term1;
2180
2181 aSig = extractFloat64Frac( a );
2182 aExp = extractFloat64Exp( a );
2183 aSign = extractFloat64Sign( a );
2184 bSig = extractFloat64Frac( b );
2185 bExp = extractFloat64Exp( b );
2186 bSign = extractFloat64Sign( b );
2187 zSign = aSign ^ bSign;
2188 if ( aExp == 0x7FF ) {
2189 if ( aSig ) return propagateFloat64NaN( a, b );
2190 if ( bExp == 0x7FF ) {
2191 if ( bSig ) return propagateFloat64NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01002192 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 return float64_default_nan;
2194 }
2195 return packFloat64( zSign, 0x7FF, 0 );
2196 }
2197 if ( bExp == 0x7FF ) {
2198 if ( bSig ) return propagateFloat64NaN( a, b );
2199 return packFloat64( zSign, 0, 0 );
2200 }
2201 if ( bExp == 0 ) {
2202 if ( bSig == 0 ) {
2203 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002204 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 return float64_default_nan;
2206 }
Richard Purdief148af22005-08-03 19:49:17 +01002207 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 return packFloat64( zSign, 0x7FF, 0 );
2209 }
2210 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2211 }
2212 if ( aExp == 0 ) {
2213 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2214 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2215 }
2216 zExp = aExp - bExp + 0x3FD;
2217 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2218 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2219 if ( bSig <= ( aSig + aSig ) ) {
2220 aSig >>= 1;
2221 ++zExp;
2222 }
2223 zSig = estimateDiv128To64( aSig, 0, bSig );
2224 if ( ( zSig & 0x1FF ) <= 2 ) {
2225 mul64To128( bSig, zSig, &term0, &term1 );
2226 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2227 while ( (sbits64) rem0 < 0 ) {
2228 --zSig;
2229 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2230 }
2231 zSig |= ( rem1 != 0 );
2232 }
Richard Purdief148af22005-08-03 19:49:17 +01002233 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
2235}
2236
2237/*
2238-------------------------------------------------------------------------------
2239Returns the remainder of the double-precision floating-point value `a'
2240with respect to the corresponding value `b'. The operation is performed
2241according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2242-------------------------------------------------------------------------------
2243*/
Richard Purdief148af22005-08-03 19:49:17 +01002244float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245{
2246 flag aSign, bSign, zSign;
2247 int16 aExp, bExp, expDiff;
2248 bits64 aSig, bSig;
2249 bits64 q, alternateASig;
2250 sbits64 sigMean;
2251
2252 aSig = extractFloat64Frac( a );
2253 aExp = extractFloat64Exp( a );
2254 aSign = extractFloat64Sign( a );
2255 bSig = extractFloat64Frac( b );
2256 bExp = extractFloat64Exp( b );
2257 bSign = extractFloat64Sign( b );
2258 if ( aExp == 0x7FF ) {
2259 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2260 return propagateFloat64NaN( a, b );
2261 }
Richard Purdief148af22005-08-03 19:49:17 +01002262 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 return float64_default_nan;
2264 }
2265 if ( bExp == 0x7FF ) {
2266 if ( bSig ) return propagateFloat64NaN( a, b );
2267 return a;
2268 }
2269 if ( bExp == 0 ) {
2270 if ( bSig == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002271 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 return float64_default_nan;
2273 }
2274 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2275 }
2276 if ( aExp == 0 ) {
2277 if ( aSig == 0 ) return a;
2278 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2279 }
2280 expDiff = aExp - bExp;
2281 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2282 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2283 if ( expDiff < 0 ) {
2284 if ( expDiff < -1 ) return a;
2285 aSig >>= 1;
2286 }
2287 q = ( bSig <= aSig );
2288 if ( q ) aSig -= bSig;
2289 expDiff -= 64;
2290 while ( 0 < expDiff ) {
2291 q = estimateDiv128To64( aSig, 0, bSig );
2292 q = ( 2 < q ) ? q - 2 : 0;
2293 aSig = - ( ( bSig>>2 ) * q );
2294 expDiff -= 62;
2295 }
2296 expDiff += 64;
2297 if ( 0 < expDiff ) {
2298 q = estimateDiv128To64( aSig, 0, bSig );
2299 q = ( 2 < q ) ? q - 2 : 0;
2300 q >>= 64 - expDiff;
2301 bSig >>= 2;
2302 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2303 }
2304 else {
2305 aSig >>= 2;
2306 bSig >>= 2;
2307 }
2308 do {
2309 alternateASig = aSig;
2310 ++q;
2311 aSig -= bSig;
2312 } while ( 0 <= (sbits64) aSig );
2313 sigMean = aSig + alternateASig;
2314 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2315 aSig = alternateASig;
2316 }
2317 zSign = ( (sbits64) aSig < 0 );
2318 if ( zSign ) aSig = - aSig;
Richard Purdief148af22005-08-03 19:49:17 +01002319 return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320
2321}
2322
2323/*
2324-------------------------------------------------------------------------------
2325Returns the square root of the double-precision floating-point value `a'.
2326The operation is performed according to the IEC/IEEE Standard for Binary
2327Floating-point Arithmetic.
2328-------------------------------------------------------------------------------
2329*/
Richard Purdief148af22005-08-03 19:49:17 +01002330float64 float64_sqrt( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331{
2332 flag aSign;
2333 int16 aExp, zExp;
2334 bits64 aSig, zSig;
2335 bits64 rem0, rem1, term0, term1; //, shiftedRem;
2336 //float64 z;
2337
2338 aSig = extractFloat64Frac( a );
2339 aExp = extractFloat64Exp( a );
2340 aSign = extractFloat64Sign( a );
2341 if ( aExp == 0x7FF ) {
2342 if ( aSig ) return propagateFloat64NaN( a, a );
2343 if ( ! aSign ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01002344 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 return float64_default_nan;
2346 }
2347 if ( aSign ) {
2348 if ( ( aExp | aSig ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01002349 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 return float64_default_nan;
2351 }
2352 if ( aExp == 0 ) {
2353 if ( aSig == 0 ) return 0;
2354 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2355 }
2356 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2357 aSig |= LIT64( 0x0010000000000000 );
2358 zSig = estimateSqrt32( aExp, aSig>>21 );
2359 zSig <<= 31;
2360 aSig <<= 9 - ( aExp & 1 );
2361 zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2362 if ( ( zSig & 0x3FF ) <= 5 ) {
2363 if ( zSig < 2 ) {
2364 zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
2365 }
2366 else {
2367 aSig <<= 2;
2368 mul64To128( zSig, zSig, &term0, &term1 );
2369 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2370 while ( (sbits64) rem0 < 0 ) {
2371 --zSig;
2372 shortShift128Left( 0, zSig, 1, &term0, &term1 );
2373 term1 |= 1;
2374 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2375 }
2376 zSig |= ( ( rem0 | rem1 ) != 0 );
2377 }
2378 }
2379 shift64RightJamming( zSig, 1, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +01002380 return roundAndPackFloat64( roundData, 0, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381
2382}
2383
2384/*
2385-------------------------------------------------------------------------------
2386Returns 1 if the double-precision floating-point value `a' is equal to the
2387corresponding value `b', and 0 otherwise. The comparison is performed
2388according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2389-------------------------------------------------------------------------------
2390*/
2391flag float64_eq( float64 a, float64 b )
2392{
2393
2394 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2395 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2396 ) {
2397 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2398 float_raise( float_flag_invalid );
2399 }
2400 return 0;
2401 }
2402 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2403
2404}
2405
2406/*
2407-------------------------------------------------------------------------------
2408Returns 1 if the double-precision floating-point value `a' is less than or
2409equal to the corresponding value `b', and 0 otherwise. The comparison is
2410performed according to the IEC/IEEE Standard for Binary Floating-point
2411Arithmetic.
2412-------------------------------------------------------------------------------
2413*/
2414flag float64_le( float64 a, float64 b )
2415{
2416 flag aSign, bSign;
2417
2418 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2419 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2420 ) {
2421 float_raise( float_flag_invalid );
2422 return 0;
2423 }
2424 aSign = extractFloat64Sign( a );
2425 bSign = extractFloat64Sign( b );
2426 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2427 return ( a == b ) || ( aSign ^ ( a < b ) );
2428
2429}
2430
2431/*
2432-------------------------------------------------------------------------------
2433Returns 1 if the double-precision floating-point value `a' is less than
2434the corresponding value `b', and 0 otherwise. The comparison is performed
2435according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2436-------------------------------------------------------------------------------
2437*/
2438flag float64_lt( float64 a, float64 b )
2439{
2440 flag aSign, bSign;
2441
2442 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2443 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2444 ) {
2445 float_raise( float_flag_invalid );
2446 return 0;
2447 }
2448 aSign = extractFloat64Sign( a );
2449 bSign = extractFloat64Sign( b );
2450 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2451 return ( a != b ) && ( aSign ^ ( a < b ) );
2452
2453}
2454
2455/*
2456-------------------------------------------------------------------------------
2457Returns 1 if the double-precision floating-point value `a' is equal to the
2458corresponding value `b', and 0 otherwise. The invalid exception is raised
2459if either operand is a NaN. Otherwise, the comparison is performed
2460according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2461-------------------------------------------------------------------------------
2462*/
2463flag float64_eq_signaling( float64 a, float64 b )
2464{
2465
2466 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2467 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2468 ) {
2469 float_raise( float_flag_invalid );
2470 return 0;
2471 }
2472 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2473
2474}
2475
2476/*
2477-------------------------------------------------------------------------------
2478Returns 1 if the double-precision floating-point value `a' is less than or
2479equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2480cause an exception. Otherwise, the comparison is performed according to the
2481IEC/IEEE Standard for Binary Floating-point Arithmetic.
2482-------------------------------------------------------------------------------
2483*/
2484flag float64_le_quiet( float64 a, float64 b )
2485{
2486 flag aSign, bSign;
2487 //int16 aExp, bExp;
2488
2489 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2490 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2491 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01002492 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 return 0;
2494 }
2495 aSign = extractFloat64Sign( a );
2496 bSign = extractFloat64Sign( b );
2497 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2498 return ( a == b ) || ( aSign ^ ( a < b ) );
2499
2500}
2501
2502/*
2503-------------------------------------------------------------------------------
2504Returns 1 if the double-precision floating-point value `a' is less than
2505the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2506exception. Otherwise, the comparison is performed according to the IEC/IEEE
2507Standard for Binary Floating-point Arithmetic.
2508-------------------------------------------------------------------------------
2509*/
2510flag float64_lt_quiet( float64 a, float64 b )
2511{
2512 flag aSign, bSign;
2513
2514 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2515 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2516 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01002517 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 return 0;
2519 }
2520 aSign = extractFloat64Sign( a );
2521 bSign = extractFloat64Sign( b );
2522 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2523 return ( a != b ) && ( aSign ^ ( a < b ) );
2524
2525}
2526
2527#ifdef FLOATX80
2528
2529/*
2530-------------------------------------------------------------------------------
2531Returns the result of converting the extended double-precision floating-
2532point value `a' to the 32-bit two's complement integer format. The
2533conversion is performed according to the IEC/IEEE Standard for Binary
2534Floating-point Arithmetic---which means in particular that the conversion
2535is rounded according to the current rounding mode. If `a' is a NaN, the
2536largest positive integer is returned. Otherwise, if the conversion
2537overflows, the largest integer with the same sign as `a' is returned.
2538-------------------------------------------------------------------------------
2539*/
Richard Purdief148af22005-08-03 19:49:17 +01002540int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541{
2542 flag aSign;
2543 int32 aExp, shiftCount;
2544 bits64 aSig;
2545
2546 aSig = extractFloatx80Frac( a );
2547 aExp = extractFloatx80Exp( a );
2548 aSign = extractFloatx80Sign( a );
2549 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2550 shiftCount = 0x4037 - aExp;
2551 if ( shiftCount <= 0 ) shiftCount = 1;
2552 shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01002553 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554
2555}
2556
2557/*
2558-------------------------------------------------------------------------------
2559Returns the result of converting the extended double-precision floating-
2560point value `a' to the 32-bit two's complement integer format. The
2561conversion is performed according to the IEC/IEEE Standard for Binary
2562Floating-point Arithmetic, except that the conversion is always rounded
2563toward zero. If `a' is a NaN, the largest positive integer is returned.
2564Otherwise, if the conversion overflows, the largest integer with the same
2565sign as `a' is returned.
2566-------------------------------------------------------------------------------
2567*/
2568int32 floatx80_to_int32_round_to_zero( floatx80 a )
2569{
2570 flag aSign;
2571 int32 aExp, shiftCount;
2572 bits64 aSig, savedASig;
2573 int32 z;
2574
2575 aSig = extractFloatx80Frac( a );
2576 aExp = extractFloatx80Exp( a );
2577 aSign = extractFloatx80Sign( a );
2578 shiftCount = 0x403E - aExp;
2579 if ( shiftCount < 32 ) {
2580 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2581 goto invalid;
2582 }
2583 else if ( 63 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01002584 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 return 0;
2586 }
2587 savedASig = aSig;
2588 aSig >>= shiftCount;
2589 z = aSig;
2590 if ( aSign ) z = - z;
2591 if ( ( z < 0 ) ^ aSign ) {
2592 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01002593 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 return aSign ? 0x80000000 : 0x7FFFFFFF;
2595 }
2596 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01002597 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 }
2599 return z;
2600
2601}
2602
2603/*
2604-------------------------------------------------------------------------------
2605Returns the result of converting the extended double-precision floating-
2606point value `a' to the single-precision floating-point format. The
2607conversion is performed according to the IEC/IEEE Standard for Binary
2608Floating-point Arithmetic.
2609-------------------------------------------------------------------------------
2610*/
Richard Purdief148af22005-08-03 19:49:17 +01002611float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612{
2613 flag aSign;
2614 int32 aExp;
2615 bits64 aSig;
2616
2617 aSig = extractFloatx80Frac( a );
2618 aExp = extractFloatx80Exp( a );
2619 aSign = extractFloatx80Sign( a );
2620 if ( aExp == 0x7FFF ) {
2621 if ( (bits64) ( aSig<<1 ) ) {
2622 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2623 }
2624 return packFloat32( aSign, 0xFF, 0 );
2625 }
2626 shift64RightJamming( aSig, 33, &aSig );
2627 if ( aExp || aSig ) aExp -= 0x3F81;
Richard Purdief148af22005-08-03 19:49:17 +01002628 return roundAndPackFloat32( roundData, aSign, aExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629
2630}
2631
2632/*
2633-------------------------------------------------------------------------------
2634Returns the result of converting the extended double-precision floating-
2635point value `a' to the double-precision floating-point format. The
2636conversion is performed according to the IEC/IEEE Standard for Binary
2637Floating-point Arithmetic.
2638-------------------------------------------------------------------------------
2639*/
Richard Purdief148af22005-08-03 19:49:17 +01002640float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641{
2642 flag aSign;
2643 int32 aExp;
2644 bits64 aSig, zSig;
2645
2646 aSig = extractFloatx80Frac( a );
2647 aExp = extractFloatx80Exp( a );
2648 aSign = extractFloatx80Sign( a );
2649 if ( aExp == 0x7FFF ) {
2650 if ( (bits64) ( aSig<<1 ) ) {
2651 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2652 }
2653 return packFloat64( aSign, 0x7FF, 0 );
2654 }
2655 shift64RightJamming( aSig, 1, &zSig );
2656 if ( aExp || aSig ) aExp -= 0x3C01;
Richard Purdief148af22005-08-03 19:49:17 +01002657 return roundAndPackFloat64( roundData, aSign, aExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658
2659}
2660
2661/*
2662-------------------------------------------------------------------------------
2663Rounds the extended double-precision floating-point value `a' to an integer,
2664and returns the result as an extended quadruple-precision floating-point
2665value. The operation is performed according to the IEC/IEEE Standard for
2666Binary Floating-point Arithmetic.
2667-------------------------------------------------------------------------------
2668*/
Richard Purdief148af22005-08-03 19:49:17 +01002669floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670{
2671 flag aSign;
2672 int32 aExp;
2673 bits64 lastBitMask, roundBitsMask;
2674 int8 roundingMode;
2675 floatx80 z;
2676
2677 aExp = extractFloatx80Exp( a );
2678 if ( 0x403E <= aExp ) {
2679 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2680 return propagateFloatx80NaN( a, a );
2681 }
2682 return a;
2683 }
2684 if ( aExp <= 0x3FFE ) {
2685 if ( ( aExp == 0 )
2686 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2687 return a;
2688 }
Richard Purdief148af22005-08-03 19:49:17 +01002689 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002690 aSign = extractFloatx80Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +01002691 switch ( roundData->mode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692 case float_round_nearest_even:
2693 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2694 ) {
2695 return
2696 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2697 }
2698 break;
2699 case float_round_down:
2700 return
2701 aSign ?
2702 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2703 : packFloatx80( 0, 0, 0 );
2704 case float_round_up:
2705 return
2706 aSign ? packFloatx80( 1, 0, 0 )
2707 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2708 }
2709 return packFloatx80( aSign, 0, 0 );
2710 }
2711 lastBitMask = 1;
2712 lastBitMask <<= 0x403E - aExp;
2713 roundBitsMask = lastBitMask - 1;
2714 z = a;
Richard Purdief148af22005-08-03 19:49:17 +01002715 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 if ( roundingMode == float_round_nearest_even ) {
2717 z.low += lastBitMask>>1;
2718 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2719 }
2720 else if ( roundingMode != float_round_to_zero ) {
2721 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2722 z.low += roundBitsMask;
2723 }
2724 }
2725 z.low &= ~ roundBitsMask;
2726 if ( z.low == 0 ) {
2727 ++z.high;
2728 z.low = LIT64( 0x8000000000000000 );
2729 }
Richard Purdief148af22005-08-03 19:49:17 +01002730 if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 return z;
2732
2733}
2734
2735/*
2736-------------------------------------------------------------------------------
2737Returns the result of adding the absolute values of the extended double-
2738precision floating-point values `a' and `b'. If `zSign' is true, the sum is
2739negated before being returned. `zSign' is ignored if the result is a NaN.
2740The addition is performed according to the IEC/IEEE Standard for Binary
2741Floating-point Arithmetic.
2742-------------------------------------------------------------------------------
2743*/
Richard Purdief148af22005-08-03 19:49:17 +01002744static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745{
2746 int32 aExp, bExp, zExp;
2747 bits64 aSig, bSig, zSig0, zSig1;
2748 int32 expDiff;
2749
2750 aSig = extractFloatx80Frac( a );
2751 aExp = extractFloatx80Exp( a );
2752 bSig = extractFloatx80Frac( b );
2753 bExp = extractFloatx80Exp( b );
2754 expDiff = aExp - bExp;
2755 if ( 0 < expDiff ) {
2756 if ( aExp == 0x7FFF ) {
2757 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2758 return a;
2759 }
2760 if ( bExp == 0 ) --expDiff;
2761 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2762 zExp = aExp;
2763 }
2764 else if ( expDiff < 0 ) {
2765 if ( bExp == 0x7FFF ) {
2766 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2767 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2768 }
2769 if ( aExp == 0 ) ++expDiff;
2770 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2771 zExp = bExp;
2772 }
2773 else {
2774 if ( aExp == 0x7FFF ) {
2775 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2776 return propagateFloatx80NaN( a, b );
2777 }
2778 return a;
2779 }
2780 zSig1 = 0;
2781 zSig0 = aSig + bSig;
2782 if ( aExp == 0 ) {
2783 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2784 goto roundAndPack;
2785 }
2786 zExp = aExp;
2787 goto shiftRight1;
2788 }
2789
2790 zSig0 = aSig + bSig;
2791
2792 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
2793 shiftRight1:
2794 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2795 zSig0 |= LIT64( 0x8000000000000000 );
2796 ++zExp;
2797 roundAndPack:
2798 return
2799 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002800 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801
2802}
2803
2804/*
2805-------------------------------------------------------------------------------
2806Returns the result of subtracting the absolute values of the extended
2807double-precision floating-point values `a' and `b'. If `zSign' is true,
2808the difference is negated before being returned. `zSign' is ignored if the
2809result is a NaN. The subtraction is performed according to the IEC/IEEE
2810Standard for Binary Floating-point Arithmetic.
2811-------------------------------------------------------------------------------
2812*/
Richard Purdief148af22005-08-03 19:49:17 +01002813static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002814{
2815 int32 aExp, bExp, zExp;
2816 bits64 aSig, bSig, zSig0, zSig1;
2817 int32 expDiff;
2818 floatx80 z;
2819
2820 aSig = extractFloatx80Frac( a );
2821 aExp = extractFloatx80Exp( a );
2822 bSig = extractFloatx80Frac( b );
2823 bExp = extractFloatx80Exp( b );
2824 expDiff = aExp - bExp;
2825 if ( 0 < expDiff ) goto aExpBigger;
2826 if ( expDiff < 0 ) goto bExpBigger;
2827 if ( aExp == 0x7FFF ) {
2828 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2829 return propagateFloatx80NaN( a, b );
2830 }
Richard Purdief148af22005-08-03 19:49:17 +01002831 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002832 z.low = floatx80_default_nan_low;
2833 z.high = floatx80_default_nan_high;
2834 return z;
2835 }
2836 if ( aExp == 0 ) {
2837 aExp = 1;
2838 bExp = 1;
2839 }
2840 zSig1 = 0;
2841 if ( bSig < aSig ) goto aBigger;
2842 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01002843 return packFloatx80( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844 bExpBigger:
2845 if ( bExp == 0x7FFF ) {
2846 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2847 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2848 }
2849 if ( aExp == 0 ) ++expDiff;
2850 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2851 bBigger:
2852 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2853 zExp = bExp;
2854 zSign ^= 1;
2855 goto normalizeRoundAndPack;
2856 aExpBigger:
2857 if ( aExp == 0x7FFF ) {
2858 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2859 return a;
2860 }
2861 if ( bExp == 0 ) --expDiff;
2862 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2863 aBigger:
2864 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2865 zExp = aExp;
2866 normalizeRoundAndPack:
2867 return
2868 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002869 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002870
2871}
2872
2873/*
2874-------------------------------------------------------------------------------
2875Returns the result of adding the extended double-precision floating-point
2876values `a' and `b'. The operation is performed according to the IEC/IEEE
2877Standard for Binary Floating-point Arithmetic.
2878-------------------------------------------------------------------------------
2879*/
Richard Purdief148af22005-08-03 19:49:17 +01002880floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881{
2882 flag aSign, bSign;
2883
2884 aSign = extractFloatx80Sign( a );
2885 bSign = extractFloatx80Sign( b );
2886 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002887 return addFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888 }
2889 else {
Richard Purdief148af22005-08-03 19:49:17 +01002890 return subFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002891 }
2892
2893}
2894
2895/*
2896-------------------------------------------------------------------------------
2897Returns the result of subtracting the extended double-precision floating-
2898point values `a' and `b'. The operation is performed according to the
2899IEC/IEEE Standard for Binary Floating-point Arithmetic.
2900-------------------------------------------------------------------------------
2901*/
Richard Purdief148af22005-08-03 19:49:17 +01002902floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002903{
2904 flag aSign, bSign;
2905
2906 aSign = extractFloatx80Sign( a );
2907 bSign = extractFloatx80Sign( b );
2908 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002909 return subFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910 }
2911 else {
Richard Purdief148af22005-08-03 19:49:17 +01002912 return addFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002913 }
2914
2915}
2916
2917/*
2918-------------------------------------------------------------------------------
2919Returns the result of multiplying the extended double-precision floating-
2920point values `a' and `b'. The operation is performed according to the
2921IEC/IEEE Standard for Binary Floating-point Arithmetic.
2922-------------------------------------------------------------------------------
2923*/
Richard Purdief148af22005-08-03 19:49:17 +01002924floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002925{
2926 flag aSign, bSign, zSign;
2927 int32 aExp, bExp, zExp;
2928 bits64 aSig, bSig, zSig0, zSig1;
2929 floatx80 z;
2930
2931 aSig = extractFloatx80Frac( a );
2932 aExp = extractFloatx80Exp( a );
2933 aSign = extractFloatx80Sign( a );
2934 bSig = extractFloatx80Frac( b );
2935 bExp = extractFloatx80Exp( b );
2936 bSign = extractFloatx80Sign( b );
2937 zSign = aSign ^ bSign;
2938 if ( aExp == 0x7FFF ) {
2939 if ( (bits64) ( aSig<<1 )
2940 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
2941 return propagateFloatx80NaN( a, b );
2942 }
2943 if ( ( bExp | bSig ) == 0 ) goto invalid;
2944 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2945 }
2946 if ( bExp == 0x7FFF ) {
2947 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2948 if ( ( aExp | aSig ) == 0 ) {
2949 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01002950 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002951 z.low = floatx80_default_nan_low;
2952 z.high = floatx80_default_nan_high;
2953 return z;
2954 }
2955 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2956 }
2957 if ( aExp == 0 ) {
2958 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2959 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2960 }
2961 if ( bExp == 0 ) {
2962 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2963 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2964 }
2965 zExp = aExp + bExp - 0x3FFE;
2966 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2967 if ( 0 < (sbits64) zSig0 ) {
2968 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2969 --zExp;
2970 }
2971 return
2972 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002973 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002974
2975}
2976
2977/*
2978-------------------------------------------------------------------------------
2979Returns the result of dividing the extended double-precision floating-point
2980value `a' by the corresponding value `b'. The operation is performed
2981according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2982-------------------------------------------------------------------------------
2983*/
Richard Purdief148af22005-08-03 19:49:17 +01002984floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985{
2986 flag aSign, bSign, zSign;
2987 int32 aExp, bExp, zExp;
2988 bits64 aSig, bSig, zSig0, zSig1;
2989 bits64 rem0, rem1, rem2, term0, term1, term2;
2990 floatx80 z;
2991
2992 aSig = extractFloatx80Frac( a );
2993 aExp = extractFloatx80Exp( a );
2994 aSign = extractFloatx80Sign( a );
2995 bSig = extractFloatx80Frac( b );
2996 bExp = extractFloatx80Exp( b );
2997 bSign = extractFloatx80Sign( b );
2998 zSign = aSign ^ bSign;
2999 if ( aExp == 0x7FFF ) {
3000 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3001 if ( bExp == 0x7FFF ) {
3002 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3003 goto invalid;
3004 }
3005 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3006 }
3007 if ( bExp == 0x7FFF ) {
3008 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3009 return packFloatx80( zSign, 0, 0 );
3010 }
3011 if ( bExp == 0 ) {
3012 if ( bSig == 0 ) {
3013 if ( ( aExp | aSig ) == 0 ) {
3014 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003015 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003016 z.low = floatx80_default_nan_low;
3017 z.high = floatx80_default_nan_high;
3018 return z;
3019 }
Richard Purdief148af22005-08-03 19:49:17 +01003020 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3022 }
3023 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3024 }
3025 if ( aExp == 0 ) {
3026 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3027 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3028 }
3029 zExp = aExp - bExp + 0x3FFE;
3030 rem1 = 0;
3031 if ( bSig <= aSig ) {
3032 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3033 ++zExp;
3034 }
3035 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3036 mul64To128( bSig, zSig0, &term0, &term1 );
3037 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3038 while ( (sbits64) rem0 < 0 ) {
3039 --zSig0;
3040 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3041 }
3042 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3043 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3044 mul64To128( bSig, zSig1, &term1, &term2 );
3045 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3046 while ( (sbits64) rem1 < 0 ) {
3047 --zSig1;
3048 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3049 }
3050 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3051 }
3052 return
3053 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003054 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003055
3056}
3057
3058/*
3059-------------------------------------------------------------------------------
3060Returns the remainder of the extended double-precision floating-point value
3061`a' with respect to the corresponding value `b'. The operation is performed
3062according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3063-------------------------------------------------------------------------------
3064*/
Richard Purdief148af22005-08-03 19:49:17 +01003065floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066{
3067 flag aSign, bSign, zSign;
3068 int32 aExp, bExp, expDiff;
3069 bits64 aSig0, aSig1, bSig;
3070 bits64 q, term0, term1, alternateASig0, alternateASig1;
3071 floatx80 z;
3072
3073 aSig0 = extractFloatx80Frac( a );
3074 aExp = extractFloatx80Exp( a );
3075 aSign = extractFloatx80Sign( a );
3076 bSig = extractFloatx80Frac( b );
3077 bExp = extractFloatx80Exp( b );
3078 bSign = extractFloatx80Sign( b );
3079 if ( aExp == 0x7FFF ) {
3080 if ( (bits64) ( aSig0<<1 )
3081 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3082 return propagateFloatx80NaN( a, b );
3083 }
3084 goto invalid;
3085 }
3086 if ( bExp == 0x7FFF ) {
3087 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3088 return a;
3089 }
3090 if ( bExp == 0 ) {
3091 if ( bSig == 0 ) {
3092 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003093 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003094 z.low = floatx80_default_nan_low;
3095 z.high = floatx80_default_nan_high;
3096 return z;
3097 }
3098 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3099 }
3100 if ( aExp == 0 ) {
3101 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3102 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3103 }
3104 bSig |= LIT64( 0x8000000000000000 );
3105 zSign = aSign;
3106 expDiff = aExp - bExp;
3107 aSig1 = 0;
3108 if ( expDiff < 0 ) {
3109 if ( expDiff < -1 ) return a;
3110 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3111 expDiff = 0;
3112 }
3113 q = ( bSig <= aSig0 );
3114 if ( q ) aSig0 -= bSig;
3115 expDiff -= 64;
3116 while ( 0 < expDiff ) {
3117 q = estimateDiv128To64( aSig0, aSig1, bSig );
3118 q = ( 2 < q ) ? q - 2 : 0;
3119 mul64To128( bSig, q, &term0, &term1 );
3120 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3121 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3122 expDiff -= 62;
3123 }
3124 expDiff += 64;
3125 if ( 0 < expDiff ) {
3126 q = estimateDiv128To64( aSig0, aSig1, bSig );
3127 q = ( 2 < q ) ? q - 2 : 0;
3128 q >>= 64 - expDiff;
3129 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3130 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3131 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3132 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3133 ++q;
3134 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3135 }
3136 }
3137 else {
3138 term1 = 0;
3139 term0 = bSig;
3140 }
3141 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3142 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3143 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3144 && ( q & 1 ) )
3145 ) {
3146 aSig0 = alternateASig0;
3147 aSig1 = alternateASig1;
3148 zSign = ! zSign;
3149 }
Richard Purdief148af22005-08-03 19:49:17 +01003150
Linus Torvalds1da177e2005-04-16 15:20:36 -07003151 return
3152 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003153 roundData, zSign, bExp + expDiff, aSig0, aSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003154
3155}
3156
3157/*
3158-------------------------------------------------------------------------------
3159Returns the square root of the extended double-precision floating-point
3160value `a'. The operation is performed according to the IEC/IEEE Standard
3161for Binary Floating-point Arithmetic.
3162-------------------------------------------------------------------------------
3163*/
Richard Purdief148af22005-08-03 19:49:17 +01003164floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07003165{
3166 flag aSign;
3167 int32 aExp, zExp;
3168 bits64 aSig0, aSig1, zSig0, zSig1;
3169 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3170 bits64 shiftedRem0, shiftedRem1;
3171 floatx80 z;
3172
3173 aSig0 = extractFloatx80Frac( a );
3174 aExp = extractFloatx80Exp( a );
3175 aSign = extractFloatx80Sign( a );
3176 if ( aExp == 0x7FFF ) {
3177 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
3178 if ( ! aSign ) return a;
3179 goto invalid;
3180 }
3181 if ( aSign ) {
3182 if ( ( aExp | aSig0 ) == 0 ) return a;
3183 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003184 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003185 z.low = floatx80_default_nan_low;
3186 z.high = floatx80_default_nan_high;
3187 return z;
3188 }
3189 if ( aExp == 0 ) {
3190 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3191 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3192 }
3193 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3194 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3195 zSig0 <<= 31;
3196 aSig1 = 0;
3197 shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3198 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3199 if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
3200 shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3201 mul64To128( zSig0, zSig0, &term0, &term1 );
3202 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3203 while ( (sbits64) rem0 < 0 ) {
3204 --zSig0;
3205 shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3206 term1 |= 1;
3207 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3208 }
3209 shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3210 zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3211 if ( (bits64) ( zSig1<<1 ) <= 10 ) {
3212 if ( zSig1 == 0 ) zSig1 = 1;
3213 mul64To128( zSig0, zSig1, &term1, &term2 );
3214 shortShift128Left( term1, term2, 1, &term1, &term2 );
3215 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3216 mul64To128( zSig1, zSig1, &term2, &term3 );
3217 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3218 while ( (sbits64) rem1 < 0 ) {
3219 --zSig1;
3220 shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3221 term3 |= 1;
3222 add192(
3223 rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3224 }
3225 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3226 }
3227 return
3228 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003229 roundData, 0, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003230
3231}
3232
3233/*
3234-------------------------------------------------------------------------------
3235Returns 1 if the extended double-precision floating-point value `a' is
3236equal to the corresponding value `b', and 0 otherwise. The comparison is
3237performed according to the IEC/IEEE Standard for Binary Floating-point
3238Arithmetic.
3239-------------------------------------------------------------------------------
3240*/
3241flag floatx80_eq( floatx80 a, floatx80 b )
3242{
3243
3244 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3245 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3246 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3247 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3248 ) {
3249 if ( floatx80_is_signaling_nan( a )
3250 || floatx80_is_signaling_nan( b ) ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003251 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003252 }
3253 return 0;
3254 }
3255 return
3256 ( a.low == b.low )
3257 && ( ( a.high == b.high )
3258 || ( ( a.low == 0 )
3259 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3260 );
3261
3262}
3263
3264/*
3265-------------------------------------------------------------------------------
3266Returns 1 if the extended double-precision floating-point value `a' is
3267less than or equal to the corresponding value `b', and 0 otherwise. The
3268comparison is performed according to the IEC/IEEE Standard for Binary
3269Floating-point Arithmetic.
3270-------------------------------------------------------------------------------
3271*/
3272flag floatx80_le( floatx80 a, floatx80 b )
3273{
3274 flag aSign, bSign;
3275
3276 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3277 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3278 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3279 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3280 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003281 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003282 return 0;
3283 }
3284 aSign = extractFloatx80Sign( a );
3285 bSign = extractFloatx80Sign( b );
3286 if ( aSign != bSign ) {
3287 return
3288 aSign
3289 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3290 == 0 );
3291 }
3292 return
3293 aSign ? le128( b.high, b.low, a.high, a.low )
3294 : le128( a.high, a.low, b.high, b.low );
3295
3296}
3297
3298/*
3299-------------------------------------------------------------------------------
3300Returns 1 if the extended double-precision floating-point value `a' is
3301less than the corresponding value `b', and 0 otherwise. The comparison
3302is performed according to the IEC/IEEE Standard for Binary Floating-point
3303Arithmetic.
3304-------------------------------------------------------------------------------
3305*/
3306flag floatx80_lt( floatx80 a, floatx80 b )
3307{
3308 flag aSign, bSign;
3309
3310 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3311 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3312 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3313 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3314 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003315 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003316 return 0;
3317 }
3318 aSign = extractFloatx80Sign( a );
3319 bSign = extractFloatx80Sign( b );
3320 if ( aSign != bSign ) {
3321 return
3322 aSign
3323 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3324 != 0 );
3325 }
3326 return
3327 aSign ? lt128( b.high, b.low, a.high, a.low )
3328 : lt128( a.high, a.low, b.high, b.low );
3329
3330}
3331
3332/*
3333-------------------------------------------------------------------------------
3334Returns 1 if the extended double-precision floating-point value `a' is equal
3335to the corresponding value `b', and 0 otherwise. The invalid exception is
3336raised if either operand is a NaN. Otherwise, the comparison is performed
3337according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3338-------------------------------------------------------------------------------
3339*/
3340flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3341{
3342
3343 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3344 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3345 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3346 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3347 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003348 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003349 return 0;
3350 }
3351 return
3352 ( a.low == b.low )
3353 && ( ( a.high == b.high )
3354 || ( ( a.low == 0 )
3355 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3356 );
3357
3358}
3359
3360/*
3361-------------------------------------------------------------------------------
3362Returns 1 if the extended double-precision floating-point value `a' is less
3363than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
3364do not cause an exception. Otherwise, the comparison is performed according
3365to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3366-------------------------------------------------------------------------------
3367*/
3368flag floatx80_le_quiet( floatx80 a, floatx80 b )
3369{
3370 flag aSign, bSign;
3371
3372 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3373 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3374 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3375 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3376 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003377 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003378 return 0;
3379 }
3380 aSign = extractFloatx80Sign( a );
3381 bSign = extractFloatx80Sign( b );
3382 if ( aSign != bSign ) {
3383 return
3384 aSign
3385 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3386 == 0 );
3387 }
3388 return
3389 aSign ? le128( b.high, b.low, a.high, a.low )
3390 : le128( a.high, a.low, b.high, b.low );
3391
3392}
3393
3394/*
3395-------------------------------------------------------------------------------
3396Returns 1 if the extended double-precision floating-point value `a' is less
3397than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
3398an exception. Otherwise, the comparison is performed according to the
3399IEC/IEEE Standard for Binary Floating-point Arithmetic.
3400-------------------------------------------------------------------------------
3401*/
3402flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3403{
3404 flag aSign, bSign;
3405
3406 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3407 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3408 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3409 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3410 ) {
Richard Purdie54738e82005-08-15 20:42:32 +01003411 /* Do nothing, even if NaN as we're quiet */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003412 return 0;
3413 }
3414 aSign = extractFloatx80Sign( a );
3415 bSign = extractFloatx80Sign( b );
3416 if ( aSign != bSign ) {
3417 return
3418 aSign
3419 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3420 != 0 );
3421 }
3422 return
3423 aSign ? lt128( b.high, b.low, a.high, a.low )
3424 : lt128( a.high, a.low, b.high, b.low );
3425
3426}
3427
3428#endif
3429