| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Basic four-word fraction declaration and manipulation. | 
|  | 3 | * | 
|  | 4 | * When adding quadword support for 32 bit machines, we need | 
|  | 5 | * to be a little careful as double multiply uses some of these | 
|  | 6 | * macros: (in op-2.h) | 
|  | 7 | * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4, | 
|  | 8 | * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4 | 
|  | 9 | * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use | 
|  | 10 | * _FP_FRAC_DECL_4: it appears to be broken and is not used | 
|  | 11 | * anywhere anyway. ) | 
|  | 12 | * | 
|  | 13 | * I've now fixed all the macros that were here from the sparc64 code. | 
|  | 14 | * [*none* of the shift macros were correct!] -- PMM 02/1998 | 
|  | 15 | * | 
|  | 16 | * The only quadword stuff that remains to be coded is: | 
|  | 17 | * 1) the conversion to/from ints, which requires | 
|  | 18 | * that we check (in op-common.h) that the following do the right thing | 
|  | 19 | * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt) | 
|  | 20 | * 2) multiply, divide and sqrt, which require: | 
|  | 21 | * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q), | 
|  | 22 | * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to | 
|  | 23 | * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h. | 
|  | 24 | * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for | 
|  | 25 | * these; they are used nowhere else. ] | 
|  | 26 | */ | 
|  | 27 |  | 
|  | 28 | #define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4] | 
|  | 29 | #define _FP_FRAC_COPY_4(D,S)			\ | 
|  | 30 | (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\ | 
|  | 31 | D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) | 
|  | 32 | /* The _FP_FRAC_SET_n(X,I) macro is intended for use with another | 
|  | 33 | * macro such as _FP_ZEROFRAC_n which returns n comma separated values. | 
|  | 34 | * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3) | 
|  | 35 | * which just assigns the In values to the array X##_f[]. | 
|  | 36 | * This is why the number of parameters doesn't appear to match | 
|  | 37 | * at first glance...      -- PMM | 
|  | 38 | */ | 
|  | 39 | #define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I) | 
|  | 40 | #define _FP_FRAC_HIGH_4(X)	(X##_f[3]) | 
|  | 41 | #define _FP_FRAC_LOW_4(X)	(X##_f[0]) | 
|  | 42 | #define _FP_FRAC_WORD_4(X,w)	(X##_f[w]) | 
|  | 43 |  | 
|  | 44 | #define _FP_FRAC_SLL_4(X,N)						\ | 
|  | 45 | do {									\ | 
|  | 46 | _FP_I_TYPE _up, _down, _skip, _i;					\ | 
|  | 47 | _skip = (N) / _FP_W_TYPE_SIZE;					\ | 
|  | 48 | _up = (N) % _FP_W_TYPE_SIZE;					\ | 
|  | 49 | _down = _FP_W_TYPE_SIZE - _up;					\ | 
|  | 50 | for (_i = 3; _i > _skip; --_i)					\ | 
|  | 51 | X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down;	\ | 
|  | 52 | /* bugfixed: was X##_f[_i] <<= _up;  -- PMM 02/1998 */                  \ | 
|  | 53 | X##_f[_i] = X##_f[0] << _up; 	                                \ | 
|  | 54 | for (--_i; _i >= 0; --_i)						\ | 
|  | 55 | X##_f[_i] = 0;							\ | 
|  | 56 | } while (0) | 
|  | 57 |  | 
|  | 58 | /* This one was broken too */ | 
|  | 59 | #define _FP_FRAC_SRL_4(X,N)						\ | 
|  | 60 | do {									\ | 
|  | 61 | _FP_I_TYPE _up, _down, _skip, _i;					\ | 
|  | 62 | _skip = (N) / _FP_W_TYPE_SIZE;					\ | 
|  | 63 | _down = (N) % _FP_W_TYPE_SIZE;					\ | 
|  | 64 | _up = _FP_W_TYPE_SIZE - _down;					\ | 
|  | 65 | for (_i = 0; _i < 3-_skip; ++_i)					\ | 
|  | 66 | X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\ | 
|  | 67 | X##_f[_i] = X##_f[3] >> _down;			         	\ | 
|  | 68 | for (++_i; _i < 4; ++_i)						\ | 
|  | 69 | X##_f[_i] = 0;							\ | 
|  | 70 | } while (0) | 
|  | 71 |  | 
|  | 72 |  | 
|  | 73 | /* Right shift with sticky-lsb. | 
|  | 74 | * What this actually means is that we do a standard right-shift, | 
|  | 75 | * but that if any of the bits that fall off the right hand side | 
|  | 76 | * were one then we always set the LSbit. | 
|  | 77 | */ | 
|  | 78 | #define _FP_FRAC_SRS_4(X,N,size)					\ | 
|  | 79 | do {									\ | 
|  | 80 | _FP_I_TYPE _up, _down, _skip, _i;					\ | 
|  | 81 | _FP_W_TYPE _s;							\ | 
|  | 82 | _skip = (N) / _FP_W_TYPE_SIZE;					\ | 
|  | 83 | _down = (N) % _FP_W_TYPE_SIZE;					\ | 
|  | 84 | _up = _FP_W_TYPE_SIZE - _down;					\ | 
|  | 85 | for (_s = _i = 0; _i < _skip; ++_i)					\ | 
|  | 86 | _s |= X##_f[_i];							\ | 
|  | 87 | _s |= X##_f[_i] << _up;						\ | 
|  | 88 | /* s is now != 0 if we want to set the LSbit */                         \ | 
|  | 89 | for (_i = 0; _i < 3-_skip; ++_i)					\ | 
|  | 90 | X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\ | 
|  | 91 | X##_f[_i] = X##_f[3] >> _down;					\ | 
|  | 92 | for (++_i; _i < 4; ++_i)						\ | 
|  | 93 | X##_f[_i] = 0;							\ | 
|  | 94 | /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ | 
|  | 95 | X##_f[0] |= (_s != 0);                                              \ | 
|  | 96 | } while (0) | 
|  | 97 |  | 
|  | 98 | #define _FP_FRAC_ADD_4(R,X,Y)						\ | 
|  | 99 | __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\ | 
|  | 100 | X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\ | 
|  | 101 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | 
|  | 102 |  | 
|  | 103 | #define _FP_FRAC_SUB_4(R,X,Y)                                           \ | 
|  | 104 | __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\ | 
|  | 105 | X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\ | 
|  | 106 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | 
|  | 107 |  | 
|  | 108 | #define _FP_FRAC_ADDI_4(X,I)                                            \ | 
|  | 109 | __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) | 
|  | 110 |  | 
|  | 111 | #define _FP_ZEROFRAC_4  0,0,0,0 | 
|  | 112 | #define _FP_MINFRAC_4   0,0,0,1 | 
|  | 113 |  | 
|  | 114 | #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) | 
|  | 115 | #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0) | 
|  | 116 | #define _FP_FRAC_OVERP_4(fs,X)  (X##_f[0] & _FP_OVERFLOW_##fs) | 
|  | 117 |  | 
|  | 118 | #define _FP_FRAC_EQ_4(X,Y)                              \ | 
|  | 119 | (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \ | 
|  | 120 | && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) | 
|  | 121 |  | 
|  | 122 | #define _FP_FRAC_GT_4(X,Y)                              \ | 
|  | 123 | (X##_f[3] > Y##_f[3] ||                                \ | 
|  | 124 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \ | 
|  | 125 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \ | 
|  | 126 | (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \ | 
|  | 127 | ))                                                   \ | 
|  | 128 | ))                                                    \ | 
|  | 129 | ) | 
|  | 130 |  | 
|  | 131 | #define _FP_FRAC_GE_4(X,Y)                              \ | 
|  | 132 | (X##_f[3] > Y##_f[3] ||                                \ | 
|  | 133 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \ | 
|  | 134 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \ | 
|  | 135 | (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \ | 
|  | 136 | ))                                                   \ | 
|  | 137 | ))                                                    \ | 
|  | 138 | ) | 
|  | 139 |  | 
|  | 140 |  | 
|  | 141 | #define _FP_FRAC_CLZ_4(R,X)             \ | 
|  | 142 | do {                                  \ | 
|  | 143 | if (X##_f[3])                       \ | 
|  | 144 | {                                   \ | 
|  | 145 | __FP_CLZ(R,X##_f[3]);           \ | 
|  | 146 | }                                   \ | 
|  | 147 | else if (X##_f[2])                  \ | 
|  | 148 | {                                   \ | 
|  | 149 | __FP_CLZ(R,X##_f[2]);           \ | 
|  | 150 | R += _FP_W_TYPE_SIZE;           \ | 
|  | 151 | }                                   \ | 
|  | 152 | else if (X##_f[1])                  \ | 
|  | 153 | {                                   \ | 
|  | 154 | __FP_CLZ(R,X##_f[2]);           \ | 
|  | 155 | R += _FP_W_TYPE_SIZE*2;         \ | 
|  | 156 | }                                   \ | 
|  | 157 | else                                \ | 
|  | 158 | {                                   \ | 
|  | 159 | __FP_CLZ(R,X##_f[0]);           \ | 
|  | 160 | R += _FP_W_TYPE_SIZE*3;         \ | 
|  | 161 | }                                   \ | 
|  | 162 | } while(0) | 
|  | 163 |  | 
|  | 164 |  | 
|  | 165 | #define _FP_UNPACK_RAW_4(fs, X, val)                            \ | 
|  | 166 | do {                                                          \ | 
|  | 167 | union _FP_UNION_##fs _flo; _flo.flt = (val);        	\ | 
|  | 168 | X##_f[0] = _flo.bits.frac0;                                 \ | 
|  | 169 | X##_f[1] = _flo.bits.frac1;                                 \ | 
|  | 170 | X##_f[2] = _flo.bits.frac2;                                 \ | 
|  | 171 | X##_f[3] = _flo.bits.frac3;                                 \ | 
|  | 172 | X##_e  = _flo.bits.exp;                                     \ | 
|  | 173 | X##_s  = _flo.bits.sign;                                    \ | 
|  | 174 | } while (0) | 
|  | 175 |  | 
|  | 176 | #define _FP_PACK_RAW_4(fs, val, X)                              \ | 
|  | 177 | do {                                                          \ | 
|  | 178 | union _FP_UNION_##fs _flo;					\ | 
|  | 179 | _flo.bits.frac0 = X##_f[0];                                 \ | 
|  | 180 | _flo.bits.frac1 = X##_f[1];                                 \ | 
|  | 181 | _flo.bits.frac2 = X##_f[2];                                 \ | 
|  | 182 | _flo.bits.frac3 = X##_f[3];                                 \ | 
|  | 183 | _flo.bits.exp   = X##_e;                                    \ | 
|  | 184 | _flo.bits.sign  = X##_s;                                    \ | 
|  | 185 | (val) = _flo.flt;                                   	\ | 
|  | 186 | } while (0) | 
|  | 187 |  | 
|  | 188 |  | 
|  | 189 | /* | 
|  | 190 | * Internals | 
|  | 191 | */ | 
|  | 192 |  | 
|  | 193 | #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\ | 
|  | 194 | (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) | 
|  | 195 |  | 
|  | 196 | #ifndef __FP_FRAC_ADD_4 | 
| Liu Yu | e3bc3a0 | 2008-01-18 11:21:39 +0800 | [diff] [blame] | 197 | #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\ | 
|  | 198 | do {								\ | 
|  | 199 | int _c1, _c2, _c3;						\ | 
|  | 200 | r0 = x0 + y0;						\ | 
|  | 201 | _c1 = r0 < x0;						\ | 
|  | 202 | r1 = x1 + y1;						\ | 
|  | 203 | _c2 = r1 < x1;						\ | 
|  | 204 | r1 += _c1;							\ | 
|  | 205 | _c2 |= r1 < _c1;						\ | 
|  | 206 | r2 = x2 + y2;						\ | 
|  | 207 | _c3 = r2 < x2;						\ | 
|  | 208 | r2 += _c2;							\ | 
|  | 209 | _c3 |= r2 < _c2;						\ | 
|  | 210 | r3 = x3 + y3 + _c3;						\ | 
|  | 211 | } while (0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 212 | #endif | 
|  | 213 |  | 
|  | 214 | #ifndef __FP_FRAC_SUB_4 | 
| Liu Yu | e3bc3a0 | 2008-01-18 11:21:39 +0800 | [diff] [blame] | 215 | #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\ | 
|  | 216 | do {								\ | 
|  | 217 | int _c1, _c2, _c3;						\ | 
|  | 218 | r0 = x0 - y0;						\ | 
|  | 219 | _c1 = r0 > x0;						\ | 
|  | 220 | r1 = x1 - y1;						\ | 
|  | 221 | _c2 = r1 > x1;						\ | 
|  | 222 | r1 -= _c1;							\ | 
|  | 223 | _c2 |= r1 > _c1;						\ | 
|  | 224 | r2 = x2 - y2;						\ | 
|  | 225 | _c3 = r2 > x2;						\ | 
|  | 226 | r2 -= _c2;							\ | 
|  | 227 | _c3 |= r2 > _c2;						\ | 
|  | 228 | r3 = x3 - y3 - _c3;						\ | 
|  | 229 | } while (0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 230 | #endif | 
|  | 231 |  | 
|  | 232 | #ifndef __FP_FRAC_ADDI_4 | 
|  | 233 | /* I always wanted to be a lisp programmer :-> */ | 
|  | 234 | #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \ | 
|  | 235 | (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2))) | 
|  | 236 | #endif | 
|  | 237 |  | 
|  | 238 | /* Convert FP values between word sizes. This appears to be more | 
|  | 239 | * complicated than I'd have expected it to be, so these might be | 
|  | 240 | * wrong... These macros are in any case somewhat bogus because they | 
|  | 241 | * use information about what various FRAC_n variables look like | 
|  | 242 | * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do | 
|  | 243 | * the ones in op-2.h and op-1.h. | 
|  | 244 | */ | 
|  | 245 | #define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \ | 
|  | 246 | do {                                                                 \ | 
|  | 247 | _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \ | 
|  | 248 | _FP_WFRACBITS_##sfs);                           \ | 
|  | 249 | D##_f = S##_f[0];                                                   \ | 
|  | 250 | } while (0) | 
|  | 251 |  | 
|  | 252 | #define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \ | 
|  | 253 | do {                                                                 \ | 
|  | 254 | _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \ | 
|  | 255 | _FP_WFRACBITS_##sfs);                           \ | 
|  | 256 | D##_f0 = S##_f[0];                                                  \ | 
|  | 257 | D##_f1 = S##_f[1];                                                  \ | 
|  | 258 | } while (0) | 
|  | 259 |  | 
|  | 260 | /* Assembly/disassembly for converting to/from integral types. | 
|  | 261 | * No shifting or overflow handled here. | 
|  | 262 | */ | 
|  | 263 | /* Put the FP value X into r, which is an integer of size rsize. */ | 
|  | 264 | #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \ | 
|  | 265 | do {                                                                  \ | 
|  | 266 | if (rsize <= _FP_W_TYPE_SIZE)                                       \ | 
|  | 267 | r = X##_f[0];                                                     \ | 
|  | 268 | else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \ | 
|  | 269 | {                                                                   \ | 
|  | 270 | r = X##_f[1];                                                     \ | 
|  | 271 | r <<= _FP_W_TYPE_SIZE;                                            \ | 
|  | 272 | r += X##_f[0];                                                    \ | 
|  | 273 | }                                                                   \ | 
|  | 274 | else                                                                \ | 
|  | 275 | {                                                                   \ | 
|  | 276 | /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ | 
|  | 277 | /* and int == 4words as a single case.                         */ \ | 
|  | 278 | r = X##_f[3];                                                     \ | 
|  | 279 | r <<= _FP_W_TYPE_SIZE;                                            \ | 
|  | 280 | r += X##_f[2];                                                    \ | 
|  | 281 | r <<= _FP_W_TYPE_SIZE;                                            \ | 
|  | 282 | r += X##_f[1];                                                    \ | 
|  | 283 | r <<= _FP_W_TYPE_SIZE;                                            \ | 
|  | 284 | r += X##_f[0];                                                    \ | 
|  | 285 | }                                                                   \ | 
|  | 286 | } while (0) | 
|  | 287 |  | 
|  | 288 | /* "No disassemble Number Five!" */ | 
|  | 289 | /* move an integer of size rsize into X's fractional part. We rely on | 
|  | 290 | * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid | 
|  | 291 | * having to mask the values we store into it. | 
|  | 292 | */ | 
|  | 293 | #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \ | 
|  | 294 | do {                                                                  \ | 
|  | 295 | X##_f[0] = r;                                                       \ | 
|  | 296 | X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \ | 
|  | 297 | X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ | 
|  | 298 | X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ | 
|  | 299 | } while (0) | 
|  | 300 |  | 
|  | 301 | #define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \ | 
|  | 302 | do {                                                                 \ | 
|  | 303 | D##_f[0] = S##_f;                                                  \ | 
|  | 304 | D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \ | 
|  | 305 | _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \ | 
|  | 306 | } while (0) | 
|  | 307 |  | 
|  | 308 | #define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \ | 
|  | 309 | do {                                                                 \ | 
|  | 310 | D##_f[0] = S##_f0;                                                 \ | 
|  | 311 | D##_f[1] = S##_f1;                                                 \ | 
|  | 312 | D##_f[2] = D##_f[3] = 0;                                           \ | 
|  | 313 | _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \ | 
|  | 314 | } while (0) | 
|  | 315 |  | 
|  | 316 | /* FIXME! This has to be written */ | 
|  | 317 | #define _FP_SQRT_MEAT_4(R, S, T, X, q) |