| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | 	.file	"wm_sqrt.S" | 
 | 2 | /*---------------------------------------------------------------------------+ | 
 | 3 |  |  wm_sqrt.S                                                                | | 
 | 4 |  |                                                                           | | 
 | 5 |  | Fixed point arithmetic square root evaluation.                            | | 
 | 6 |  |                                                                           | | 
 | 7 |  | Copyright (C) 1992,1993,1995,1997                                         | | 
 | 8 |  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | 
 | 9 |  |                       Australia.  E-mail billm@suburbia.net               | | 
 | 10 |  |                                                                           | | 
 | 11 |  | Call from C as:                                                           | | 
 | 12 |  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     | | 
 | 13 |  |                                                                           | | 
 | 14 |  +---------------------------------------------------------------------------*/ | 
 | 15 |  | 
 | 16 | /*---------------------------------------------------------------------------+ | 
 | 17 |  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           | | 
 | 18 |  |    returns the square root of n in n.                                     | | 
 | 19 |  |                                                                           | | 
 | 20 |  |  Use Newton's method to compute the square root of a number, which must   | | 
 | 21 |  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     | | 
 | 22 |  |  Does not check the sign or tag of the argument.                          | | 
 | 23 |  |  Sets the exponent, but not the sign or tag of the result.                | | 
 | 24 |  |                                                                           | | 
 | 25 |  |  The guess is kept in %esi:%edi                                           | | 
 | 26 |  +---------------------------------------------------------------------------*/ | 
 | 27 |  | 
 | 28 | #include "exception.h" | 
 | 29 | #include "fpu_emu.h" | 
 | 30 |  | 
 | 31 |  | 
 | 32 | #ifndef NON_REENTRANT_FPU | 
 | 33 | /*	Local storage on the stack: */ | 
 | 34 | #define FPU_accum_3	-4(%ebp)	/* ms word */ | 
 | 35 | #define FPU_accum_2	-8(%ebp) | 
 | 36 | #define FPU_accum_1	-12(%ebp) | 
 | 37 | #define FPU_accum_0	-16(%ebp) | 
 | 38 |  | 
 | 39 | /* | 
 | 40 |  * The de-normalised argument: | 
 | 41 |  *                  sq_2                  sq_1              sq_0 | 
 | 42 |  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0 | 
 | 43 |  *           ^ binary point here | 
 | 44 |  */ | 
 | 45 | #define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */ | 
 | 46 | #define FPU_fsqrt_arg_1	-24(%ebp) | 
 | 47 | #define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */ | 
 | 48 |  | 
 | 49 | #else | 
 | 50 | /*	Local storage in a static area: */ | 
 | 51 | .data | 
 | 52 | 	.align 4,0 | 
 | 53 | FPU_accum_3: | 
 | 54 | 	.long	0		/* ms word */ | 
 | 55 | FPU_accum_2: | 
 | 56 | 	.long	0 | 
 | 57 | FPU_accum_1: | 
 | 58 | 	.long	0 | 
 | 59 | FPU_accum_0: | 
 | 60 | 	.long	0 | 
 | 61 |  | 
 | 62 | /* The de-normalised argument: | 
 | 63 |                     sq_2                  sq_1              sq_0 | 
 | 64 |           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0 | 
 | 65 |              ^ binary point here | 
 | 66 |  */ | 
 | 67 | FPU_fsqrt_arg_2: | 
 | 68 | 	.long	0		/* ms word */ | 
 | 69 | FPU_fsqrt_arg_1: | 
 | 70 | 	.long	0 | 
 | 71 | FPU_fsqrt_arg_0: | 
 | 72 | 	.long	0		/* ls word, at most the ms bit is set */ | 
 | 73 | #endif /* NON_REENTRANT_FPU */  | 
 | 74 |  | 
 | 75 |  | 
 | 76 | .text | 
 | 77 | ENTRY(wm_sqrt) | 
 | 78 | 	pushl	%ebp | 
 | 79 | 	movl	%esp,%ebp | 
 | 80 | #ifndef NON_REENTRANT_FPU | 
 | 81 | 	subl	$28,%esp | 
 | 82 | #endif /* NON_REENTRANT_FPU */ | 
 | 83 | 	pushl	%esi | 
 | 84 | 	pushl	%edi | 
 | 85 | 	pushl	%ebx | 
 | 86 |  | 
 | 87 | 	movl	PARAM1,%esi | 
 | 88 |  | 
 | 89 | 	movl	SIGH(%esi),%eax | 
 | 90 | 	movl	SIGL(%esi),%ecx | 
 | 91 | 	xorl	%edx,%edx | 
 | 92 |  | 
 | 93 | /* We use a rough linear estimate for the first guess.. */ | 
 | 94 |  | 
 | 95 | 	cmpw	EXP_BIAS,EXP(%esi) | 
 | 96 | 	jnz	sqrt_arg_ge_2 | 
 | 97 |  | 
 | 98 | 	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */ | 
 | 99 | 	rcrl	$1,%ecx | 
 | 100 | 	rcrl	$1,%edx | 
 | 101 |  | 
 | 102 | sqrt_arg_ge_2: | 
 | 103 | /* From here on, n is never accessed directly again until it is | 
 | 104 |    replaced by the answer. */ | 
 | 105 |  | 
 | 106 | 	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */ | 
 | 107 | 	movl	%ecx,FPU_fsqrt_arg_1 | 
 | 108 | 	movl	%edx,FPU_fsqrt_arg_0 | 
 | 109 |  | 
 | 110 | /* Make a linear first estimate */ | 
 | 111 | 	shrl	$1,%eax | 
 | 112 | 	addl	$0x40000000,%eax | 
 | 113 | 	movl	$0xaaaaaaaa,%ecx | 
 | 114 | 	mull	%ecx | 
 | 115 | 	shll	%edx			/* max result was 7fff... */ | 
 | 116 | 	testl	$0x80000000,%edx	/* but min was 3fff... */ | 
 | 117 | 	jnz	sqrt_prelim_no_adjust | 
 | 118 |  | 
 | 119 | 	movl	$0x80000000,%edx	/* round up */ | 
 | 120 |  | 
 | 121 | sqrt_prelim_no_adjust: | 
 | 122 | 	movl	%edx,%esi	/* Our first guess */ | 
 | 123 |  | 
 | 124 | /* We have now computed (approx)   (2 + x) / 3, which forms the basis | 
 | 125 |    for a few iterations of Newton's method */ | 
 | 126 |  | 
 | 127 | 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */ | 
 | 128 |  | 
 | 129 | /* | 
 | 130 |  * From our initial estimate, three iterations are enough to get us | 
 | 131 |  * to 30 bits or so. This will then allow two iterations at better | 
 | 132 |  * precision to complete the process. | 
 | 133 |  */ | 
 | 134 |  | 
 | 135 | /* Compute  (g + n/g)/2  at each iteration (g is the guess). */ | 
 | 136 | 	shrl	%ecx		/* Doing this first will prevent a divide */ | 
 | 137 | 				/* overflow later. */ | 
 | 138 |  | 
 | 139 | 	movl	%ecx,%edx	/* msw of the arg / 2 */ | 
 | 140 | 	divl	%esi		/* current estimate */ | 
 | 141 | 	shrl	%esi		/* divide by 2 */ | 
 | 142 | 	addl	%eax,%esi	/* the new estimate */ | 
 | 143 |  | 
 | 144 | 	movl	%ecx,%edx | 
 | 145 | 	divl	%esi | 
 | 146 | 	shrl	%esi | 
 | 147 | 	addl	%eax,%esi | 
 | 148 |  | 
 | 149 | 	movl	%ecx,%edx | 
 | 150 | 	divl	%esi | 
 | 151 | 	shrl	%esi | 
 | 152 | 	addl	%eax,%esi | 
 | 153 |  | 
 | 154 | /* | 
 | 155 |  * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | 
 | 156 |  * we improve it to 60 bits or so. | 
 | 157 |  * | 
 | 158 |  * The strategy from now on is to compute new estimates from | 
 | 159 |  *      guess := guess + (n - guess^2) / (2 * guess) | 
 | 160 |  */ | 
 | 161 |  | 
 | 162 | /* First, find the square of the guess */ | 
 | 163 | 	movl	%esi,%eax | 
 | 164 | 	mull	%esi | 
 | 165 | /* guess^2 now in %edx:%eax */ | 
 | 166 |  | 
 | 167 | 	movl	FPU_fsqrt_arg_1,%ecx | 
 | 168 | 	subl	%ecx,%eax | 
 | 169 | 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */ | 
 | 170 | 	sbbl	%ecx,%edx | 
 | 171 | 	jnc	sqrt_stage_2_positive | 
 | 172 |  | 
 | 173 | /* Subtraction gives a negative result, | 
 | 174 |    negate the result before division. */ | 
 | 175 | 	notl	%edx | 
 | 176 | 	notl	%eax | 
 | 177 | 	addl	$1,%eax | 
 | 178 | 	adcl	$0,%edx | 
 | 179 |  | 
 | 180 | 	divl	%esi | 
 | 181 | 	movl	%eax,%ecx | 
 | 182 |  | 
 | 183 | 	movl	%edx,%eax | 
 | 184 | 	divl	%esi | 
 | 185 | 	jmp	sqrt_stage_2_finish | 
 | 186 |  | 
 | 187 | sqrt_stage_2_positive: | 
 | 188 | 	divl	%esi | 
 | 189 | 	movl	%eax,%ecx | 
 | 190 |  | 
 | 191 | 	movl	%edx,%eax | 
 | 192 | 	divl	%esi | 
 | 193 |  | 
 | 194 | 	notl	%ecx | 
 | 195 | 	notl	%eax | 
 | 196 | 	addl	$1,%eax | 
 | 197 | 	adcl	$0,%ecx | 
 | 198 |  | 
 | 199 | sqrt_stage_2_finish: | 
 | 200 | 	sarl	$1,%ecx		/* divide by 2 */ | 
 | 201 | 	rcrl	$1,%eax | 
 | 202 |  | 
 | 203 | 	/* Form the new estimate in %esi:%edi */ | 
 | 204 | 	movl	%eax,%edi | 
 | 205 | 	addl	%ecx,%esi | 
 | 206 |  | 
 | 207 | 	jnz	sqrt_stage_2_done	/* result should be [1..2) */ | 
 | 208 |  | 
 | 209 | #ifdef PARANOID | 
 | 210 | /* It should be possible to get here only if the arg is ffff....ffff */ | 
 | 211 | 	cmp	$0xffffffff,FPU_fsqrt_arg_1 | 
 | 212 | 	jnz	sqrt_stage_2_error | 
 | 213 | #endif /* PARANOID */ | 
 | 214 |  | 
 | 215 | /* The best rounded result. */ | 
 | 216 | 	xorl	%eax,%eax | 
 | 217 | 	decl	%eax | 
 | 218 | 	movl	%eax,%edi | 
 | 219 | 	movl	%eax,%esi | 
 | 220 | 	movl	$0x7fffffff,%eax | 
 | 221 | 	jmp	sqrt_round_result | 
 | 222 |  | 
 | 223 | #ifdef PARANOID | 
 | 224 | sqrt_stage_2_error: | 
 | 225 | 	pushl	EX_INTERNAL|0x213 | 
 | 226 | 	call	EXCEPTION | 
 | 227 | #endif /* PARANOID */  | 
 | 228 |  | 
 | 229 | sqrt_stage_2_done: | 
 | 230 |  | 
 | 231 | /* Now the square root has been computed to better than 60 bits. */ | 
 | 232 |  | 
 | 233 | /* Find the square of the guess. */ | 
 | 234 | 	movl	%edi,%eax		/* ls word of guess */ | 
 | 235 | 	mull	%edi | 
 | 236 | 	movl	%edx,FPU_accum_1 | 
 | 237 |  | 
 | 238 | 	movl	%esi,%eax | 
 | 239 | 	mull	%esi | 
 | 240 | 	movl	%edx,FPU_accum_3 | 
 | 241 | 	movl	%eax,FPU_accum_2 | 
 | 242 |  | 
 | 243 | 	movl	%edi,%eax | 
 | 244 | 	mull	%esi | 
 | 245 | 	addl	%eax,FPU_accum_1 | 
 | 246 | 	adcl	%edx,FPU_accum_2 | 
 | 247 | 	adcl	$0,FPU_accum_3 | 
 | 248 |  | 
 | 249 | /*	movl	%esi,%eax */ | 
 | 250 | /*	mull	%edi */ | 
 | 251 | 	addl	%eax,FPU_accum_1 | 
 | 252 | 	adcl	%edx,FPU_accum_2 | 
 | 253 | 	adcl	$0,FPU_accum_3 | 
 | 254 |  | 
 | 255 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | 
 | 256 |  | 
 | 257 | 	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */ | 
 | 258 | 	subl	%eax,FPU_accum_1 | 
 | 259 | 	movl	FPU_fsqrt_arg_1,%eax | 
 | 260 | 	sbbl	%eax,FPU_accum_2 | 
 | 261 | 	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */ | 
 | 262 | 	sbbl	%eax,FPU_accum_3 | 
 | 263 | 	jnc	sqrt_stage_3_positive | 
 | 264 |  | 
 | 265 | /* Subtraction gives a negative result, | 
 | 266 |    negate the result before division */ | 
 | 267 | 	notl	FPU_accum_1 | 
 | 268 | 	notl	FPU_accum_2 | 
 | 269 | 	notl	FPU_accum_3 | 
 | 270 | 	addl	$1,FPU_accum_1 | 
 | 271 | 	adcl	$0,FPU_accum_2 | 
 | 272 |  | 
 | 273 | #ifdef PARANOID | 
 | 274 | 	adcl	$0,FPU_accum_3	/* This must be zero */ | 
 | 275 | 	jz	sqrt_stage_3_no_error | 
 | 276 |  | 
 | 277 | sqrt_stage_3_error: | 
 | 278 | 	pushl	EX_INTERNAL|0x207 | 
 | 279 | 	call	EXCEPTION | 
 | 280 |  | 
 | 281 | sqrt_stage_3_no_error: | 
 | 282 | #endif /* PARANOID */ | 
 | 283 |  | 
 | 284 | 	movl	FPU_accum_2,%edx | 
 | 285 | 	movl	FPU_accum_1,%eax | 
 | 286 | 	divl	%esi | 
 | 287 | 	movl	%eax,%ecx | 
 | 288 |  | 
 | 289 | 	movl	%edx,%eax | 
 | 290 | 	divl	%esi | 
 | 291 |  | 
 | 292 | 	sarl	$1,%ecx		/* divide by 2 */ | 
 | 293 | 	rcrl	$1,%eax | 
 | 294 |  | 
 | 295 | 	/* prepare to round the result */ | 
 | 296 |  | 
 | 297 | 	addl	%ecx,%edi | 
 | 298 | 	adcl	$0,%esi | 
 | 299 |  | 
 | 300 | 	jmp	sqrt_stage_3_finished | 
 | 301 |  | 
 | 302 | sqrt_stage_3_positive: | 
 | 303 | 	movl	FPU_accum_2,%edx | 
 | 304 | 	movl	FPU_accum_1,%eax | 
 | 305 | 	divl	%esi | 
 | 306 | 	movl	%eax,%ecx | 
 | 307 |  | 
 | 308 | 	movl	%edx,%eax | 
 | 309 | 	divl	%esi | 
 | 310 |  | 
 | 311 | 	sarl	$1,%ecx		/* divide by 2 */ | 
 | 312 | 	rcrl	$1,%eax | 
 | 313 |  | 
 | 314 | 	/* prepare to round the result */ | 
 | 315 |  | 
 | 316 | 	notl	%eax		/* Negate the correction term */ | 
 | 317 | 	notl	%ecx | 
 | 318 | 	addl	$1,%eax | 
 | 319 | 	adcl	$0,%ecx		/* carry here ==> correction == 0 */ | 
 | 320 | 	adcl	$0xffffffff,%esi | 
 | 321 |  | 
 | 322 | 	addl	%ecx,%edi | 
 | 323 | 	adcl	$0,%esi | 
 | 324 |  | 
 | 325 | sqrt_stage_3_finished: | 
 | 326 |  | 
 | 327 | /* | 
 | 328 |  * The result in %esi:%edi:%esi should be good to about 90 bits here, | 
 | 329 |  * and the rounding information here does not have sufficient accuracy | 
 | 330 |  * in a few rare cases. | 
 | 331 |  */ | 
 | 332 | 	cmpl	$0xffffffe0,%eax | 
 | 333 | 	ja	sqrt_near_exact_x | 
 | 334 |  | 
 | 335 | 	cmpl	$0x00000020,%eax | 
 | 336 | 	jb	sqrt_near_exact | 
 | 337 |  | 
 | 338 | 	cmpl	$0x7fffffe0,%eax | 
 | 339 | 	jb	sqrt_round_result | 
 | 340 |  | 
 | 341 | 	cmpl	$0x80000020,%eax | 
 | 342 | 	jb	sqrt_get_more_precision | 
 | 343 |  | 
 | 344 | sqrt_round_result: | 
 | 345 | /* Set up for rounding operations */ | 
 | 346 | 	movl	%eax,%edx | 
 | 347 | 	movl	%esi,%eax | 
 | 348 | 	movl	%edi,%ebx | 
 | 349 | 	movl	PARAM1,%edi | 
 | 350 | 	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */ | 
 | 351 | 	jmp	fpu_reg_round | 
 | 352 |  | 
 | 353 |  | 
 | 354 | sqrt_near_exact_x: | 
 | 355 | /* First, the estimate must be rounded up. */ | 
 | 356 | 	addl	$1,%edi | 
 | 357 | 	adcl	$0,%esi | 
 | 358 |  | 
 | 359 | sqrt_near_exact: | 
 | 360 | /* | 
 | 361 |  * This is an easy case because x^1/2 is monotonic. | 
 | 362 |  * We need just find the square of our estimate, compare it | 
 | 363 |  * with the argument, and deduce whether our estimate is | 
 | 364 |  * above, below, or exact. We use the fact that the estimate | 
 | 365 |  * is known to be accurate to about 90 bits. | 
 | 366 |  */ | 
 | 367 | 	movl	%edi,%eax		/* ls word of guess */ | 
 | 368 | 	mull	%edi | 
 | 369 | 	movl	%edx,%ebx		/* 2nd ls word of square */ | 
 | 370 | 	movl	%eax,%ecx		/* ls word of square */ | 
 | 371 |  | 
 | 372 | 	movl	%edi,%eax | 
 | 373 | 	mull	%esi | 
 | 374 | 	addl	%eax,%ebx | 
 | 375 | 	addl	%eax,%ebx | 
 | 376 |  | 
 | 377 | #ifdef PARANOID | 
 | 378 | 	cmp	$0xffffffb0,%ebx | 
 | 379 | 	jb	sqrt_near_exact_ok | 
 | 380 |  | 
 | 381 | 	cmp	$0x00000050,%ebx | 
 | 382 | 	ja	sqrt_near_exact_ok | 
 | 383 |  | 
 | 384 | 	pushl	EX_INTERNAL|0x214 | 
 | 385 | 	call	EXCEPTION | 
 | 386 |  | 
 | 387 | sqrt_near_exact_ok: | 
 | 388 | #endif /* PARANOID */  | 
 | 389 |  | 
 | 390 | 	or	%ebx,%ebx | 
 | 391 | 	js	sqrt_near_exact_small | 
 | 392 |  | 
 | 393 | 	jnz	sqrt_near_exact_large | 
 | 394 |  | 
 | 395 | 	or	%ebx,%edx | 
 | 396 | 	jnz	sqrt_near_exact_large | 
 | 397 |  | 
 | 398 | /* Our estimate is exactly the right answer */ | 
 | 399 | 	xorl	%eax,%eax | 
 | 400 | 	jmp	sqrt_round_result | 
 | 401 |  | 
 | 402 | sqrt_near_exact_small: | 
 | 403 | /* Our estimate is too small */ | 
 | 404 | 	movl	$0x000000ff,%eax | 
 | 405 | 	jmp	sqrt_round_result | 
 | 406 | 	 | 
 | 407 | sqrt_near_exact_large: | 
 | 408 | /* Our estimate is too large, we need to decrement it */ | 
 | 409 | 	subl	$1,%edi | 
 | 410 | 	sbbl	$0,%esi | 
 | 411 | 	movl	$0xffffff00,%eax | 
 | 412 | 	jmp	sqrt_round_result | 
 | 413 |  | 
 | 414 |  | 
 | 415 | sqrt_get_more_precision: | 
 | 416 | /* This case is almost the same as the above, except we start | 
 | 417 |    with an extra bit of precision in the estimate. */ | 
 | 418 | 	stc			/* The extra bit. */ | 
 | 419 | 	rcll	$1,%edi		/* Shift the estimate left one bit */ | 
 | 420 | 	rcll	$1,%esi | 
 | 421 |  | 
 | 422 | 	movl	%edi,%eax		/* ls word of guess */ | 
 | 423 | 	mull	%edi | 
 | 424 | 	movl	%edx,%ebx		/* 2nd ls word of square */ | 
 | 425 | 	movl	%eax,%ecx		/* ls word of square */ | 
 | 426 |  | 
 | 427 | 	movl	%edi,%eax | 
 | 428 | 	mull	%esi | 
 | 429 | 	addl	%eax,%ebx | 
 | 430 | 	addl	%eax,%ebx | 
 | 431 |  | 
 | 432 | /* Put our estimate back to its original value */ | 
 | 433 | 	stc			/* The ms bit. */ | 
 | 434 | 	rcrl	$1,%esi		/* Shift the estimate left one bit */ | 
 | 435 | 	rcrl	$1,%edi | 
 | 436 |  | 
 | 437 | #ifdef PARANOID | 
 | 438 | 	cmp	$0xffffff60,%ebx | 
 | 439 | 	jb	sqrt_more_prec_ok | 
 | 440 |  | 
 | 441 | 	cmp	$0x000000a0,%ebx | 
 | 442 | 	ja	sqrt_more_prec_ok | 
 | 443 |  | 
 | 444 | 	pushl	EX_INTERNAL|0x215 | 
 | 445 | 	call	EXCEPTION | 
 | 446 |  | 
 | 447 | sqrt_more_prec_ok: | 
 | 448 | #endif /* PARANOID */  | 
 | 449 |  | 
 | 450 | 	or	%ebx,%ebx | 
 | 451 | 	js	sqrt_more_prec_small | 
 | 452 |  | 
 | 453 | 	jnz	sqrt_more_prec_large | 
 | 454 |  | 
 | 455 | 	or	%ebx,%ecx | 
 | 456 | 	jnz	sqrt_more_prec_large | 
 | 457 |  | 
 | 458 | /* Our estimate is exactly the right answer */ | 
 | 459 | 	movl	$0x80000000,%eax | 
 | 460 | 	jmp	sqrt_round_result | 
 | 461 |  | 
 | 462 | sqrt_more_prec_small: | 
 | 463 | /* Our estimate is too small */ | 
 | 464 | 	movl	$0x800000ff,%eax | 
 | 465 | 	jmp	sqrt_round_result | 
 | 466 | 	 | 
 | 467 | sqrt_more_prec_large: | 
 | 468 | /* Our estimate is too large */ | 
 | 469 | 	movl	$0x7fffff00,%eax | 
 | 470 | 	jmp	sqrt_round_result |