| Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * ==================================================== | 
|  | 3 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 
|  | 4 | * | 
|  | 5 | * Developed at SunPro, a Sun Microsystems, Inc. business. | 
|  | 6 | * Permission to use, copy, modify, and distribute this | 
|  | 7 | * software is freely granted, provided that this notice | 
|  | 8 | * is preserved. | 
|  | 9 | * ==================================================== | 
|  | 10 | */ | 
|  | 11 |  | 
|  | 12 | /* | 
|  | 13 | * from: @(#)fdlibm.h 5.1 93/09/24 | 
|  | 14 | * $FreeBSD$ | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #ifndef _MATH_PRIVATE_H_ | 
|  | 18 | #define	_MATH_PRIVATE_H_ | 
|  | 19 |  | 
|  | 20 | #include <sys/types.h> | 
|  | 21 | #include <machine/endian.h> | 
|  | 22 |  | 
|  | 23 | /* | 
|  | 24 | * The original fdlibm code used statements like: | 
|  | 25 | *	n0 = ((*(int*)&one)>>29)^1;		* index of high word * | 
|  | 26 | *	ix0 = *(n0+(int*)&x);			* high word of x * | 
|  | 27 | *	ix1 = *((1-n0)+(int*)&x);		* low word of x * | 
|  | 28 | * to dig two 32 bit words out of the 64 bit IEEE floating point | 
|  | 29 | * value.  That is non-ANSI, and, moreover, the gcc instruction | 
|  | 30 | * scheduler gets it wrong.  We instead use the following macros. | 
|  | 31 | * Unlike the original code, we determine the endianness at compile | 
|  | 32 | * time, not at run time; I don't see much benefit to selecting | 
|  | 33 | * endianness at run time. | 
|  | 34 | */ | 
|  | 35 |  | 
|  | 36 | /* | 
|  | 37 | * A union which permits us to convert between a double and two 32 bit | 
|  | 38 | * ints. | 
|  | 39 | */ | 
|  | 40 |  | 
|  | 41 | #ifdef __arm__ | 
|  | 42 | #if defined(__VFP_FP__) | 
|  | 43 | #define	IEEE_WORD_ORDER	BYTE_ORDER | 
|  | 44 | #else | 
|  | 45 | #define	IEEE_WORD_ORDER	BIG_ENDIAN | 
|  | 46 | #endif | 
|  | 47 | #else /* __arm__ */ | 
|  | 48 | #define	IEEE_WORD_ORDER	BYTE_ORDER | 
|  | 49 | #endif | 
|  | 50 |  | 
|  | 51 | #if IEEE_WORD_ORDER == BIG_ENDIAN | 
|  | 52 |  | 
|  | 53 | typedef union | 
|  | 54 | { | 
|  | 55 | double value; | 
|  | 56 | struct | 
|  | 57 | { | 
|  | 58 | u_int32_t msw; | 
|  | 59 | u_int32_t lsw; | 
|  | 60 | } parts; | 
|  | 61 | struct | 
|  | 62 | { | 
|  | 63 | u_int64_t w; | 
|  | 64 | } xparts; | 
|  | 65 | } ieee_double_shape_type; | 
|  | 66 |  | 
|  | 67 | #endif | 
|  | 68 |  | 
|  | 69 | #if IEEE_WORD_ORDER == LITTLE_ENDIAN | 
|  | 70 |  | 
|  | 71 | typedef union | 
|  | 72 | { | 
|  | 73 | double value; | 
|  | 74 | struct | 
|  | 75 | { | 
|  | 76 | u_int32_t lsw; | 
|  | 77 | u_int32_t msw; | 
|  | 78 | } parts; | 
|  | 79 | struct | 
|  | 80 | { | 
|  | 81 | u_int64_t w; | 
|  | 82 | } xparts; | 
|  | 83 | } ieee_double_shape_type; | 
|  | 84 |  | 
|  | 85 | #endif | 
|  | 86 |  | 
|  | 87 | /* Get two 32 bit ints from a double.  */ | 
|  | 88 |  | 
|  | 89 | #define EXTRACT_WORDS(ix0,ix1,d)				\ | 
|  | 90 | do {								\ | 
|  | 91 | ieee_double_shape_type ew_u;					\ | 
|  | 92 | ew_u.value = (d);						\ | 
|  | 93 | (ix0) = ew_u.parts.msw;					\ | 
|  | 94 | (ix1) = ew_u.parts.lsw;					\ | 
|  | 95 | } while (0) | 
|  | 96 |  | 
|  | 97 | /* Get a 64-bit int from a double. */ | 
|  | 98 | #define EXTRACT_WORD64(ix,d)					\ | 
|  | 99 | do {								\ | 
|  | 100 | ieee_double_shape_type ew_u;					\ | 
|  | 101 | ew_u.value = (d);						\ | 
|  | 102 | (ix) = ew_u.xparts.w;						\ | 
|  | 103 | } while (0) | 
|  | 104 |  | 
|  | 105 | /* Get the more significant 32 bit int from a double.  */ | 
|  | 106 |  | 
|  | 107 | #define GET_HIGH_WORD(i,d)					\ | 
|  | 108 | do {								\ | 
|  | 109 | ieee_double_shape_type gh_u;					\ | 
|  | 110 | gh_u.value = (d);						\ | 
|  | 111 | (i) = gh_u.parts.msw;						\ | 
|  | 112 | } while (0) | 
|  | 113 |  | 
|  | 114 | /* Get the less significant 32 bit int from a double.  */ | 
|  | 115 |  | 
|  | 116 | #define GET_LOW_WORD(i,d)					\ | 
|  | 117 | do {								\ | 
|  | 118 | ieee_double_shape_type gl_u;					\ | 
|  | 119 | gl_u.value = (d);						\ | 
|  | 120 | (i) = gl_u.parts.lsw;						\ | 
|  | 121 | } while (0) | 
|  | 122 |  | 
|  | 123 | /* Set a double from two 32 bit ints.  */ | 
|  | 124 |  | 
|  | 125 | #define INSERT_WORDS(d,ix0,ix1)					\ | 
|  | 126 | do {								\ | 
|  | 127 | ieee_double_shape_type iw_u;					\ | 
|  | 128 | iw_u.parts.msw = (ix0);					\ | 
|  | 129 | iw_u.parts.lsw = (ix1);					\ | 
|  | 130 | (d) = iw_u.value;						\ | 
|  | 131 | } while (0) | 
|  | 132 |  | 
|  | 133 | /* Set a double from a 64-bit int. */ | 
|  | 134 | #define INSERT_WORD64(d,ix)					\ | 
|  | 135 | do {								\ | 
|  | 136 | ieee_double_shape_type iw_u;					\ | 
|  | 137 | iw_u.xparts.w = (ix);						\ | 
|  | 138 | (d) = iw_u.value;						\ | 
|  | 139 | } while (0) | 
|  | 140 |  | 
|  | 141 | /* Set the more significant 32 bits of a double from an int.  */ | 
|  | 142 |  | 
|  | 143 | #define SET_HIGH_WORD(d,v)					\ | 
|  | 144 | do {								\ | 
|  | 145 | ieee_double_shape_type sh_u;					\ | 
|  | 146 | sh_u.value = (d);						\ | 
|  | 147 | sh_u.parts.msw = (v);						\ | 
|  | 148 | (d) = sh_u.value;						\ | 
|  | 149 | } while (0) | 
|  | 150 |  | 
|  | 151 | /* Set the less significant 32 bits of a double from an int.  */ | 
|  | 152 |  | 
|  | 153 | #define SET_LOW_WORD(d,v)					\ | 
|  | 154 | do {								\ | 
|  | 155 | ieee_double_shape_type sl_u;					\ | 
|  | 156 | sl_u.value = (d);						\ | 
|  | 157 | sl_u.parts.lsw = (v);						\ | 
|  | 158 | (d) = sl_u.value;						\ | 
|  | 159 | } while (0) | 
|  | 160 |  | 
|  | 161 | /* | 
|  | 162 | * A union which permits us to convert between a float and a 32 bit | 
|  | 163 | * int. | 
|  | 164 | */ | 
|  | 165 |  | 
|  | 166 | typedef union | 
|  | 167 | { | 
|  | 168 | float value; | 
|  | 169 | /* FIXME: Assumes 32 bit int.  */ | 
|  | 170 | unsigned int word; | 
|  | 171 | } ieee_float_shape_type; | 
|  | 172 |  | 
|  | 173 | /* Get a 32 bit int from a float.  */ | 
|  | 174 |  | 
|  | 175 | #define GET_FLOAT_WORD(i,d)					\ | 
|  | 176 | do {								\ | 
|  | 177 | ieee_float_shape_type gf_u;					\ | 
|  | 178 | gf_u.value = (d);						\ | 
|  | 179 | (i) = gf_u.word;						\ | 
|  | 180 | } while (0) | 
|  | 181 |  | 
|  | 182 | /* Set a float from a 32 bit int.  */ | 
|  | 183 |  | 
|  | 184 | #define SET_FLOAT_WORD(d,i)					\ | 
|  | 185 | do {								\ | 
|  | 186 | ieee_float_shape_type sf_u;					\ | 
|  | 187 | sf_u.word = (i);						\ | 
|  | 188 | (d) = sf_u.value;						\ | 
|  | 189 | } while (0) | 
|  | 190 |  | 
| Elliott Hughes | 7841946 | 2013-06-12 16:37:58 -0700 | [diff] [blame^] | 191 | /* | 
|  | 192 | * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long | 
|  | 193 | * double. | 
|  | 194 | */ | 
|  | 195 |  | 
|  | 196 | #define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\ | 
|  | 197 | do {								\ | 
|  | 198 | union IEEEl2bits ew_u;					\ | 
|  | 199 | ew_u.e = (d);							\ | 
|  | 200 | (ix0) = ew_u.xbits.expsign;					\ | 
|  | 201 | (ix1) = ew_u.xbits.man;					\ | 
|  | 202 | } while (0) | 
|  | 203 |  | 
|  | 204 | /* | 
|  | 205 | * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit | 
|  | 206 | * long double. | 
|  | 207 | */ | 
|  | 208 |  | 
|  | 209 | #define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\ | 
|  | 210 | do {								\ | 
|  | 211 | union IEEEl2bits ew_u;					\ | 
|  | 212 | ew_u.e = (d);							\ | 
|  | 213 | (ix0) = ew_u.xbits.expsign;					\ | 
|  | 214 | (ix1) = ew_u.xbits.manh;					\ | 
|  | 215 | (ix2) = ew_u.xbits.manl;					\ | 
|  | 216 | } while (0) | 
|  | 217 |  | 
| Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 218 | /* Get expsign as a 16 bit int from a long double.  */ | 
|  | 219 |  | 
|  | 220 | #define	GET_LDBL_EXPSIGN(i,d)					\ | 
|  | 221 | do {								\ | 
|  | 222 | union IEEEl2bits ge_u;					\ | 
|  | 223 | ge_u.e = (d);							\ | 
|  | 224 | (i) = ge_u.xbits.expsign;					\ | 
|  | 225 | } while (0) | 
|  | 226 |  | 
| Elliott Hughes | 7841946 | 2013-06-12 16:37:58 -0700 | [diff] [blame^] | 227 | /* | 
|  | 228 | * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int | 
|  | 229 | * mantissa. | 
|  | 230 | */ | 
|  | 231 |  | 
|  | 232 | #define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\ | 
|  | 233 | do {								\ | 
|  | 234 | union IEEEl2bits iw_u;					\ | 
|  | 235 | iw_u.xbits.expsign = (ix0);					\ | 
|  | 236 | iw_u.xbits.man = (ix1);					\ | 
|  | 237 | (d) = iw_u.e;							\ | 
|  | 238 | } while (0) | 
|  | 239 |  | 
|  | 240 | /* | 
|  | 241 | * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints | 
|  | 242 | * comprising the mantissa. | 
|  | 243 | */ | 
|  | 244 |  | 
|  | 245 | #define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\ | 
|  | 246 | do {								\ | 
|  | 247 | union IEEEl2bits iw_u;					\ | 
|  | 248 | iw_u.xbits.expsign = (ix0);					\ | 
|  | 249 | iw_u.xbits.manh = (ix1);					\ | 
|  | 250 | iw_u.xbits.manl = (ix2);					\ | 
|  | 251 | (d) = iw_u.e;							\ | 
|  | 252 | } while (0) | 
|  | 253 |  | 
| Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 254 | /* Set expsign of a long double from a 16 bit int.  */ | 
|  | 255 |  | 
|  | 256 | #define	SET_LDBL_EXPSIGN(d,v)					\ | 
|  | 257 | do {								\ | 
|  | 258 | union IEEEl2bits se_u;					\ | 
|  | 259 | se_u.e = (d);							\ | 
|  | 260 | se_u.xbits.expsign = (v);					\ | 
|  | 261 | (d) = se_u.e;							\ | 
|  | 262 | } while (0) | 
|  | 263 |  | 
|  | 264 | #ifdef __i386__ | 
|  | 265 | /* Long double constants are broken on i386. */ | 
|  | 266 | #define	LD80C(m, ex, v) {						\ | 
|  | 267 | .xbits.man = __CONCAT(m, ULL),					\ | 
|  | 268 | .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0),	\ | 
|  | 269 | } | 
|  | 270 | #else | 
|  | 271 | /* The above works on non-i386 too, but we use this to check v. */ | 
|  | 272 | #define	LD80C(m, ex, v)	{ .e = (v), } | 
|  | 273 | #endif | 
|  | 274 |  | 
|  | 275 | #ifdef FLT_EVAL_METHOD | 
|  | 276 | /* | 
|  | 277 | * Attempt to get strict C99 semantics for assignment with non-C99 compilers. | 
|  | 278 | */ | 
|  | 279 | #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 | 
|  | 280 | #define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval)) | 
|  | 281 | #else | 
|  | 282 | #define	STRICT_ASSIGN(type, lval, rval) do {	\ | 
|  | 283 | volatile type __lval;			\ | 
|  | 284 | \ | 
|  | 285 | if (sizeof(type) >= sizeof(long double))	\ | 
|  | 286 | (lval) = (rval);		\ | 
|  | 287 | else {					\ | 
|  | 288 | __lval = (rval);		\ | 
|  | 289 | (lval) = __lval;		\ | 
|  | 290 | }					\ | 
|  | 291 | } while (0) | 
|  | 292 | #endif | 
|  | 293 | #endif /* FLT_EVAL_METHOD */ | 
|  | 294 |  | 
|  | 295 | /* Support switching the mode to FP_PE if necessary. */ | 
|  | 296 | #if defined(__i386__) && !defined(NO_FPSETPREC) | 
|  | 297 | #define	ENTERI()				\ | 
|  | 298 | long double __retval;			\ | 
|  | 299 | fp_prec_t __oprec;			\ | 
|  | 300 | \ | 
|  | 301 | if ((__oprec = fpgetprec()) != FP_PE)	\ | 
|  | 302 | fpsetprec(FP_PE) | 
|  | 303 | #define	RETURNI(x) do {				\ | 
|  | 304 | __retval = (x);				\ | 
|  | 305 | if (__oprec != FP_PE)			\ | 
|  | 306 | fpsetprec(__oprec);		\ | 
|  | 307 | RETURNF(__retval);			\ | 
|  | 308 | } while (0) | 
|  | 309 | #else | 
|  | 310 | #define	ENTERI(x) | 
|  | 311 | #define	RETURNI(x)	RETURNF(x) | 
|  | 312 | #endif | 
|  | 313 |  | 
|  | 314 | /* Default return statement if hack*_t() is not used. */ | 
|  | 315 | #define      RETURNF(v)      return (v) | 
|  | 316 |  | 
|  | 317 | /* | 
| Elliott Hughes | 7841946 | 2013-06-12 16:37:58 -0700 | [diff] [blame^] | 318 | * 2sum gives the same result as 2sumF without requiring |a| >= |b| or | 
|  | 319 | * a == 0, but is slower. | 
|  | 320 | */ | 
|  | 321 | #define	_2sum(a, b) do {	\ | 
|  | 322 | __typeof(a) __s, __w;	\ | 
|  | 323 | \ | 
|  | 324 | __w = (a) + (b);	\ | 
|  | 325 | __s = __w - (a);	\ | 
|  | 326 | (b) = ((a) - (__w - __s)) + ((b) - __s); \ | 
|  | 327 | (a) = __w;		\ | 
|  | 328 | } while (0) | 
|  | 329 |  | 
|  | 330 | /* | 
|  | 331 | * 2sumF algorithm. | 
|  | 332 | * | 
|  | 333 | * "Normalize" the terms in the infinite-precision expression a + b for | 
|  | 334 | * the sum of 2 floating point values so that b is as small as possible | 
|  | 335 | * relative to 'a'.  (The resulting 'a' is the value of the expression in | 
|  | 336 | * the same precision as 'a' and the resulting b is the rounding error.) | 
|  | 337 | * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and | 
|  | 338 | * exponent overflow or underflow must not occur.  This uses a Theorem of | 
|  | 339 | * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum" | 
|  | 340 | * is apparently due to Skewchuk (1997). | 
|  | 341 | * | 
|  | 342 | * For this to always work, assignment of a + b to 'a' must not retain any | 
|  | 343 | * extra precision in a + b.  This is required by C standards but broken | 
|  | 344 | * in many compilers.  The brokenness cannot be worked around using | 
|  | 345 | * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this | 
|  | 346 | * algorithm would be destroyed by non-null strict assignments.  (The | 
|  | 347 | * compilers are correct to be broken -- the efficiency of all floating | 
|  | 348 | * point code calculations would be destroyed similarly if they forced the | 
|  | 349 | * conversions.) | 
|  | 350 | * | 
|  | 351 | * Fortunately, a case that works well can usually be arranged by building | 
|  | 352 | * any extra precision into the type of 'a' -- 'a' should have type float_t, | 
|  | 353 | * double_t or long double.  b's type should be no larger than 'a's type. | 
|  | 354 | * Callers should use these types with scopes as large as possible, to | 
|  | 355 | * reduce their own extra-precision and efficiciency problems.  In | 
|  | 356 | * particular, they shouldn't convert back and forth just to call here. | 
|  | 357 | */ | 
|  | 358 | #ifdef DEBUG | 
|  | 359 | #define	_2sumF(a, b) do {				\ | 
|  | 360 | __typeof(a) __w;				\ | 
|  | 361 | volatile __typeof(a) __ia, __ib, __r, __vw;	\ | 
|  | 362 | \ | 
|  | 363 | __ia = (a);					\ | 
|  | 364 | __ib = (b);					\ | 
|  | 365 | assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\ | 
|  | 366 | \ | 
|  | 367 | __w = (a) + (b);				\ | 
|  | 368 | (b) = ((a) - __w) + (b);			\ | 
|  | 369 | (a) = __w;					\ | 
|  | 370 | \ | 
|  | 371 | /* The next 2 assertions are weak if (a) is already long double. */ \ | 
|  | 372 | assert((long double)__ia + __ib == (long double)(a) + (b));	\ | 
|  | 373 | __vw = __ia + __ib;				\ | 
|  | 374 | __r = __ia - __vw;				\ | 
|  | 375 | __r += __ib;					\ | 
|  | 376 | assert(__vw == (a) && __r == (b));		\ | 
|  | 377 | } while (0) | 
|  | 378 | #else /* !DEBUG */ | 
|  | 379 | #define	_2sumF(a, b) do {	\ | 
|  | 380 | __typeof(a) __w;	\ | 
|  | 381 | \ | 
|  | 382 | __w = (a) + (b);	\ | 
|  | 383 | (b) = ((a) - __w) + (b); \ | 
|  | 384 | (a) = __w;		\ | 
|  | 385 | } while (0) | 
|  | 386 | #endif /* DEBUG */ | 
|  | 387 |  | 
|  | 388 | /* | 
|  | 389 | * Set x += c, where x is represented in extra precision as a + b. | 
|  | 390 | * x must be sufficiently normalized and sufficiently larger than c, | 
|  | 391 | * and the result is then sufficiently normalized. | 
|  | 392 | * | 
|  | 393 | * The details of ordering are that |a| must be >= |c| (so that (a, c) | 
|  | 394 | * can be normalized without extra work to swap 'a' with c).  The details of | 
|  | 395 | * the normalization are that b must be small relative to the normalized 'a'. | 
|  | 396 | * Normalization of (a, c) makes the normalized c tiny relative to the | 
|  | 397 | * normalized a, so b remains small relative to 'a' in the result.  However, | 
|  | 398 | * b need not ever be tiny relative to 'a'.  For example, b might be about | 
|  | 399 | * 2**20 times smaller than 'a' to give about 20 extra bits of precision. | 
|  | 400 | * That is usually enough, and adding c (which by normalization is about | 
|  | 401 | * 2**53 times smaller than a) cannot change b significantly.  However, | 
|  | 402 | * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' | 
|  | 403 | * significantly relative to b.  The caller must ensure that significant | 
|  | 404 | * cancellation doesn't occur, either by having c of the same sign as 'a', | 
|  | 405 | * or by having |c| a few percent smaller than |a|.  Pre-normalization of | 
|  | 406 | * (a, b) may help. | 
|  | 407 | * | 
|  | 408 | * This is is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 | 
|  | 409 | * exercise 19).  We gain considerable efficiency by requiring the terms to | 
|  | 410 | * be sufficiently normalized and sufficiently increasing. | 
|  | 411 | */ | 
|  | 412 | #define	_3sumF(a, b, c) do {	\ | 
|  | 413 | __typeof(a) __tmp;	\ | 
|  | 414 | \ | 
|  | 415 | __tmp = (c);		\ | 
|  | 416 | _2sumF(__tmp, (a));	\ | 
|  | 417 | (b) += (a);		\ | 
|  | 418 | (a) = __tmp;		\ | 
|  | 419 | } while (0) | 
|  | 420 |  | 
|  | 421 | /* | 
| Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 422 | * Common routine to process the arguments to nan(), nanf(), and nanl(). | 
|  | 423 | */ | 
|  | 424 | void _scan_nan(uint32_t *__words, int __num_words, const char *__s); | 
|  | 425 |  | 
|  | 426 | #ifdef _COMPLEX_H | 
|  | 427 |  | 
|  | 428 | /* | 
|  | 429 | * C99 specifies that complex numbers have the same representation as | 
|  | 430 | * an array of two elements, where the first element is the real part | 
|  | 431 | * and the second element is the imaginary part. | 
|  | 432 | */ | 
|  | 433 | typedef union { | 
|  | 434 | float complex f; | 
|  | 435 | float a[2]; | 
|  | 436 | } float_complex; | 
|  | 437 | typedef union { | 
|  | 438 | double complex f; | 
|  | 439 | double a[2]; | 
|  | 440 | } double_complex; | 
|  | 441 | typedef union { | 
|  | 442 | long double complex f; | 
|  | 443 | long double a[2]; | 
|  | 444 | } long_double_complex; | 
|  | 445 | #define	REALPART(z)	((z).a[0]) | 
|  | 446 | #define	IMAGPART(z)	((z).a[1]) | 
|  | 447 |  | 
|  | 448 | /* | 
|  | 449 | * Inline functions that can be used to construct complex values. | 
|  | 450 | * | 
|  | 451 | * The C99 standard intends x+I*y to be used for this, but x+I*y is | 
|  | 452 | * currently unusable in general since gcc introduces many overflow, | 
|  | 453 | * underflow, sign and efficiency bugs by rewriting I*y as | 
|  | 454 | * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. | 
|  | 455 | * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted | 
|  | 456 | * to -0.0+I*0.0. | 
|  | 457 | */ | 
|  | 458 | static __inline float complex | 
|  | 459 | cpackf(float x, float y) | 
|  | 460 | { | 
|  | 461 | float_complex z; | 
|  | 462 |  | 
|  | 463 | REALPART(z) = x; | 
|  | 464 | IMAGPART(z) = y; | 
|  | 465 | return (z.f); | 
|  | 466 | } | 
|  | 467 |  | 
|  | 468 | static __inline double complex | 
|  | 469 | cpack(double x, double y) | 
|  | 470 | { | 
|  | 471 | double_complex z; | 
|  | 472 |  | 
|  | 473 | REALPART(z) = x; | 
|  | 474 | IMAGPART(z) = y; | 
|  | 475 | return (z.f); | 
|  | 476 | } | 
|  | 477 |  | 
|  | 478 | static __inline long double complex | 
|  | 479 | cpackl(long double x, long double y) | 
|  | 480 | { | 
|  | 481 | long_double_complex z; | 
|  | 482 |  | 
|  | 483 | REALPART(z) = x; | 
|  | 484 | IMAGPART(z) = y; | 
|  | 485 | return (z.f); | 
|  | 486 | } | 
|  | 487 | #endif /* _COMPLEX_H */ | 
|  | 488 |  | 
|  | 489 | #ifdef __GNUCLIKE_ASM | 
|  | 490 |  | 
|  | 491 | /* Asm versions of some functions. */ | 
|  | 492 |  | 
|  | 493 | #ifdef __amd64__ | 
|  | 494 | static __inline int | 
|  | 495 | irint(double x) | 
|  | 496 | { | 
|  | 497 | int n; | 
|  | 498 |  | 
|  | 499 | asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x)); | 
|  | 500 | return (n); | 
|  | 501 | } | 
|  | 502 | #define	HAVE_EFFICIENT_IRINT | 
|  | 503 | #endif | 
|  | 504 |  | 
|  | 505 | #ifdef __i386__ | 
|  | 506 | static __inline int | 
|  | 507 | irint(double x) | 
|  | 508 | { | 
|  | 509 | int n; | 
|  | 510 |  | 
|  | 511 | asm("fistl %0" : "=m" (n) : "t" (x)); | 
|  | 512 | return (n); | 
|  | 513 | } | 
|  | 514 | #define	HAVE_EFFICIENT_IRINT | 
|  | 515 | #endif | 
|  | 516 |  | 
|  | 517 | #if defined(__amd64__) || defined(__i386__) | 
|  | 518 | static __inline int | 
|  | 519 | irintl(long double x) | 
|  | 520 | { | 
|  | 521 | int n; | 
|  | 522 |  | 
|  | 523 | asm("fistl %0" : "=m" (n) : "t" (x)); | 
|  | 524 | return (n); | 
|  | 525 | } | 
|  | 526 | #define	HAVE_EFFICIENT_IRINTL | 
|  | 527 | #endif | 
|  | 528 |  | 
|  | 529 | #endif /* __GNUCLIKE_ASM */ | 
|  | 530 |  | 
| Elliott Hughes | 7841946 | 2013-06-12 16:37:58 -0700 | [diff] [blame^] | 531 | #ifdef DEBUG | 
|  | 532 | #if defined(__amd64__) || defined(__i386__) | 
|  | 533 | #define	breakpoint()	asm("int $3") | 
|  | 534 | #else | 
|  | 535 | #include <signal.h> | 
|  | 536 |  | 
|  | 537 | #define	breakpoint()	raise(SIGTRAP) | 
|  | 538 | #endif | 
|  | 539 | #endif | 
|  | 540 |  | 
|  | 541 | /* Write a pari script to test things externally. */ | 
|  | 542 | #ifdef DOPRINT | 
|  | 543 | #include <stdio.h> | 
|  | 544 |  | 
|  | 545 | #ifndef DOPRINT_SWIZZLE | 
|  | 546 | #define	DOPRINT_SWIZZLE		0 | 
|  | 547 | #endif | 
|  | 548 |  | 
|  | 549 | #ifdef DOPRINT_LD80 | 
|  | 550 |  | 
|  | 551 | #define	DOPRINT_START(xp) do {						\ | 
|  | 552 | uint64_t __lx;							\ | 
|  | 553 | uint16_t __hx;							\ | 
|  | 554 | \ | 
|  | 555 | /* Hack to give more-problematic args. */			\ | 
|  | 556 | EXTRACT_LDBL80_WORDS(__hx, __lx, *xp);				\ | 
|  | 557 | __lx ^= DOPRINT_SWIZZLE;					\ | 
|  | 558 | INSERT_LDBL80_WORDS(*xp, __hx, __lx);				\ | 
|  | 559 | printf("x = %.21Lg; ", (long double)*xp);			\ | 
|  | 560 | } while (0) | 
|  | 561 | #define	DOPRINT_END1(v)							\ | 
|  | 562 | printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) | 
|  | 563 | #define	DOPRINT_END2(hi, lo)						\ | 
|  | 564 | printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\ | 
|  | 565 | (long double)(hi), (long double)(lo)) | 
|  | 566 |  | 
|  | 567 | #elif defined(DOPRINT_D64) | 
|  | 568 |  | 
|  | 569 | #define	DOPRINT_START(xp) do {						\ | 
|  | 570 | uint32_t __hx, __lx;						\ | 
|  | 571 | \ | 
|  | 572 | EXTRACT_WORDS(__hx, __lx, *xp);					\ | 
|  | 573 | __lx ^= DOPRINT_SWIZZLE;					\ | 
|  | 574 | INSERT_WORDS(*xp, __hx, __lx);					\ | 
|  | 575 | printf("x = %.21Lg; ", (long double)*xp);			\ | 
|  | 576 | } while (0) | 
|  | 577 | #define	DOPRINT_END1(v)							\ | 
|  | 578 | printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) | 
|  | 579 | #define	DOPRINT_END2(hi, lo)						\ | 
|  | 580 | printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\ | 
|  | 581 | (long double)(hi), (long double)(lo)) | 
|  | 582 |  | 
|  | 583 | #elif defined(DOPRINT_F32) | 
|  | 584 |  | 
|  | 585 | #define	DOPRINT_START(xp) do {						\ | 
|  | 586 | uint32_t __hx;							\ | 
|  | 587 | \ | 
|  | 588 | GET_FLOAT_WORD(__hx, *xp);					\ | 
|  | 589 | __hx ^= DOPRINT_SWIZZLE;					\ | 
|  | 590 | SET_FLOAT_WORD(*xp, __hx);					\ | 
|  | 591 | printf("x = %.21Lg; ", (long double)*xp);			\ | 
|  | 592 | } while (0) | 
|  | 593 | #define	DOPRINT_END1(v)							\ | 
|  | 594 | printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) | 
|  | 595 | #define	DOPRINT_END2(hi, lo)						\ | 
|  | 596 | printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\ | 
|  | 597 | (long double)(hi), (long double)(lo)) | 
|  | 598 |  | 
|  | 599 | #else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */ | 
|  | 600 |  | 
|  | 601 | #ifndef DOPRINT_SWIZZLE_HIGH | 
|  | 602 | #define	DOPRINT_SWIZZLE_HIGH	0 | 
|  | 603 | #endif | 
|  | 604 |  | 
|  | 605 | #define	DOPRINT_START(xp) do {						\ | 
|  | 606 | uint64_t __lx, __llx;						\ | 
|  | 607 | uint16_t __hx;							\ | 
|  | 608 | \ | 
|  | 609 | EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp);			\ | 
|  | 610 | __llx ^= DOPRINT_SWIZZLE;					\ | 
|  | 611 | __lx ^= DOPRINT_SWIZZLE_HIGH;					\ | 
|  | 612 | INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx);			\ | 
|  | 613 | printf("x = %.36Lg; ", (long double)*xp);					\ | 
|  | 614 | } while (0) | 
|  | 615 | #define	DOPRINT_END1(v)							\ | 
|  | 616 | printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v)) | 
|  | 617 | #define	DOPRINT_END2(hi, lo)						\ | 
|  | 618 | printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n",		\ | 
|  | 619 | (long double)(hi), (long double)(lo)) | 
|  | 620 |  | 
|  | 621 | #endif /* DOPRINT_LD80 */ | 
|  | 622 |  | 
|  | 623 | #else /* !DOPRINT */ | 
|  | 624 | #define	DOPRINT_START(xp) | 
|  | 625 | #define	DOPRINT_END1(v) | 
|  | 626 | #define	DOPRINT_END2(hi, lo) | 
|  | 627 | #endif /* DOPRINT */ | 
|  | 628 |  | 
|  | 629 | #define	RETURNP(x) do {			\ | 
|  | 630 | DOPRINT_END1(x);		\ | 
|  | 631 | RETURNF(x);			\ | 
|  | 632 | } while (0) | 
|  | 633 | #define	RETURNPI(x) do {		\ | 
|  | 634 | DOPRINT_END1(x);		\ | 
|  | 635 | RETURNI(x);			\ | 
|  | 636 | } while (0) | 
|  | 637 | #define	RETURN2P(x, y) do {		\ | 
|  | 638 | DOPRINT_END2((x), (y));		\ | 
|  | 639 | RETURNF((x) + (y));		\ | 
|  | 640 | } while (0) | 
|  | 641 | #define	RETURN2PI(x, y) do {		\ | 
|  | 642 | DOPRINT_END2((x), (y));		\ | 
|  | 643 | RETURNI((x) + (y));		\ | 
|  | 644 | } while (0) | 
|  | 645 | #ifdef STRUCT_RETURN | 
|  | 646 | #define	RETURNSP(rp) do {		\ | 
|  | 647 | if (!(rp)->lo_set)		\ | 
|  | 648 | RETURNP((rp)->hi);	\ | 
|  | 649 | RETURN2P((rp)->hi, (rp)->lo);	\ | 
|  | 650 | } while (0) | 
|  | 651 | #define	RETURNSPI(rp) do {		\ | 
|  | 652 | if (!(rp)->lo_set)		\ | 
|  | 653 | RETURNPI((rp)->hi);	\ | 
|  | 654 | RETURN2PI((rp)->hi, (rp)->lo);	\ | 
|  | 655 | } while (0) | 
|  | 656 | #endif | 
|  | 657 | #define	SUM2P(x, y) ({			\ | 
|  | 658 | const __typeof (x) __x = (x);	\ | 
|  | 659 | const __typeof (y) __y = (y);	\ | 
|  | 660 | \ | 
|  | 661 | DOPRINT_END2(__x, __y);		\ | 
|  | 662 | __x + __y;			\ | 
|  | 663 | }) | 
|  | 664 |  | 
| Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 665 | /* | 
|  | 666 | * ieee style elementary functions | 
|  | 667 | * | 
|  | 668 | * We rename functions here to improve other sources' diffability | 
|  | 669 | * against fdlibm. | 
|  | 670 | */ | 
|  | 671 | #define	__ieee754_sqrt	sqrt | 
|  | 672 | #define	__ieee754_acos	acos | 
|  | 673 | #define	__ieee754_acosh	acosh | 
|  | 674 | #define	__ieee754_log	log | 
|  | 675 | #define	__ieee754_log2	log2 | 
|  | 676 | #define	__ieee754_atanh	atanh | 
|  | 677 | #define	__ieee754_asin	asin | 
|  | 678 | #define	__ieee754_atan2	atan2 | 
|  | 679 | #define	__ieee754_exp	exp | 
|  | 680 | #define	__ieee754_cosh	cosh | 
|  | 681 | #define	__ieee754_fmod	fmod | 
|  | 682 | #define	__ieee754_pow	pow | 
|  | 683 | #define	__ieee754_lgamma lgamma | 
|  | 684 | #define	__ieee754_gamma	gamma | 
|  | 685 | #define	__ieee754_lgamma_r lgamma_r | 
|  | 686 | #define	__ieee754_gamma_r gamma_r | 
|  | 687 | #define	__ieee754_log10	log10 | 
|  | 688 | #define	__ieee754_sinh	sinh | 
|  | 689 | #define	__ieee754_hypot	hypot | 
|  | 690 | #define	__ieee754_j0	j0 | 
|  | 691 | #define	__ieee754_j1	j1 | 
|  | 692 | #define	__ieee754_y0	y0 | 
|  | 693 | #define	__ieee754_y1	y1 | 
|  | 694 | #define	__ieee754_jn	jn | 
|  | 695 | #define	__ieee754_yn	yn | 
|  | 696 | #define	__ieee754_remainder remainder | 
|  | 697 | #define	__ieee754_scalb	scalb | 
|  | 698 | #define	__ieee754_sqrtf	sqrtf | 
|  | 699 | #define	__ieee754_acosf	acosf | 
|  | 700 | #define	__ieee754_acoshf acoshf | 
|  | 701 | #define	__ieee754_logf	logf | 
|  | 702 | #define	__ieee754_atanhf atanhf | 
|  | 703 | #define	__ieee754_asinf	asinf | 
|  | 704 | #define	__ieee754_atan2f atan2f | 
|  | 705 | #define	__ieee754_expf	expf | 
|  | 706 | #define	__ieee754_coshf	coshf | 
|  | 707 | #define	__ieee754_fmodf	fmodf | 
|  | 708 | #define	__ieee754_powf	powf | 
|  | 709 | #define	__ieee754_lgammaf lgammaf | 
|  | 710 | #define	__ieee754_gammaf gammaf | 
|  | 711 | #define	__ieee754_lgammaf_r lgammaf_r | 
|  | 712 | #define	__ieee754_gammaf_r gammaf_r | 
|  | 713 | #define	__ieee754_log10f log10f | 
|  | 714 | #define	__ieee754_log2f log2f | 
|  | 715 | #define	__ieee754_sinhf	sinhf | 
|  | 716 | #define	__ieee754_hypotf hypotf | 
|  | 717 | #define	__ieee754_j0f	j0f | 
|  | 718 | #define	__ieee754_j1f	j1f | 
|  | 719 | #define	__ieee754_y0f	y0f | 
|  | 720 | #define	__ieee754_y1f	y1f | 
|  | 721 | #define	__ieee754_jnf	jnf | 
|  | 722 | #define	__ieee754_ynf	ynf | 
|  | 723 | #define	__ieee754_remainderf remainderf | 
|  | 724 | #define	__ieee754_scalbf scalbf | 
|  | 725 |  | 
|  | 726 | /* fdlibm kernel function */ | 
|  | 727 | int	__kernel_rem_pio2(double*,double*,int,int,int); | 
|  | 728 |  | 
|  | 729 | /* double precision kernel functions */ | 
|  | 730 | #ifndef INLINE_REM_PIO2 | 
|  | 731 | int	__ieee754_rem_pio2(double,double*); | 
|  | 732 | #endif | 
|  | 733 | double	__kernel_sin(double,double,int); | 
|  | 734 | double	__kernel_cos(double,double); | 
|  | 735 | double	__kernel_tan(double,double,int); | 
|  | 736 | double	__ldexp_exp(double,int); | 
|  | 737 | #ifdef _COMPLEX_H | 
|  | 738 | double complex __ldexp_cexp(double complex,int); | 
|  | 739 | #endif | 
|  | 740 |  | 
|  | 741 | /* float precision kernel functions */ | 
|  | 742 | #ifndef INLINE_REM_PIO2F | 
|  | 743 | int	__ieee754_rem_pio2f(float,double*); | 
|  | 744 | #endif | 
|  | 745 | #ifndef INLINE_KERNEL_SINDF | 
|  | 746 | float	__kernel_sindf(double); | 
|  | 747 | #endif | 
|  | 748 | #ifndef INLINE_KERNEL_COSDF | 
|  | 749 | float	__kernel_cosdf(double); | 
|  | 750 | #endif | 
|  | 751 | #ifndef INLINE_KERNEL_TANDF | 
|  | 752 | float	__kernel_tandf(double,int); | 
|  | 753 | #endif | 
|  | 754 | float	__ldexp_expf(float,int); | 
|  | 755 | #ifdef _COMPLEX_H | 
|  | 756 | float complex __ldexp_cexpf(float complex,int); | 
|  | 757 | #endif | 
|  | 758 |  | 
|  | 759 | /* long double precision kernel functions */ | 
|  | 760 | long double __kernel_sinl(long double, long double, int); | 
|  | 761 | long double __kernel_cosl(long double, long double); | 
|  | 762 | long double __kernel_tanl(long double, long double, int); | 
|  | 763 |  | 
|  | 764 | #endif /* !_MATH_PRIVATE_H_ */ |