| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /*---------------------------------------------------------------------------+ | 
 | 2 |  |  polynomial_Xsig.S                                                        | | 
 | 3 |  |                                                                           | | 
 | 4 |  | Fixed point arithmetic polynomial evaluation.                             | | 
 | 5 |  |                                                                           | | 
 | 6 |  | Copyright (C) 1992,1993,1994,1995                                         | | 
 | 7 |  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | 
 | 8 |  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au | | 
 | 9 |  |                                                                           | | 
 | 10 |  | Call from C as:                                                           | | 
 | 11 |  |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 | | 
 | 12 |  |                        unsigned long long terms[], int n)                 | | 
 | 13 |  |                                                                           | | 
 | 14 |  | Computes:                                                                 | | 
 | 15 |  | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  | | 
 | 16 |  | and adds the result to the 12 byte Xsig.                                  | | 
 | 17 |  | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | 
 | 18 |  | precision.                                                                | | 
 | 19 |  |                                                                           | | 
 | 20 |  | This function must be used carefully: most overflow of intermediate       | | 
 | 21 |  | results is controlled, but overflow of the result is not.                 | | 
 | 22 |  |                                                                           | | 
 | 23 |  +---------------------------------------------------------------------------*/ | 
 | 24 | 	.file	"polynomial_Xsig.S" | 
 | 25 |  | 
 | 26 | #include "fpu_emu.h" | 
 | 27 |  | 
 | 28 |  | 
 | 29 | #define	TERM_SIZE	$8 | 
 | 30 | #define	SUM_MS		-20(%ebp)	/* sum ms long */ | 
 | 31 | #define SUM_MIDDLE	-24(%ebp)	/* sum middle long */ | 
 | 32 | #define	SUM_LS		-28(%ebp)	/* sum ls long */ | 
 | 33 | #define	ACCUM_MS	-4(%ebp)	/* accum ms long */ | 
 | 34 | #define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */ | 
 | 35 | #define	ACCUM_LS	-12(%ebp)	/* accum ls long */ | 
 | 36 | #define OVERFLOWED      -16(%ebp)	/* addition overflow flag */ | 
 | 37 |  | 
 | 38 | .text | 
 | 39 | ENTRY(polynomial_Xsig) | 
 | 40 | 	pushl	%ebp | 
 | 41 | 	movl	%esp,%ebp | 
 | 42 | 	subl	$32,%esp | 
 | 43 | 	pushl	%esi | 
 | 44 | 	pushl	%edi | 
 | 45 | 	pushl	%ebx | 
 | 46 |  | 
 | 47 | 	movl	PARAM2,%esi		/* x */ | 
 | 48 | 	movl	PARAM3,%edi		/* terms */ | 
 | 49 |  | 
 | 50 | 	movl	TERM_SIZE,%eax | 
 | 51 | 	mull	PARAM4			/* n */ | 
 | 52 | 	addl	%eax,%edi | 
 | 53 |  | 
 | 54 | 	movl	4(%edi),%edx		/* terms[n] */ | 
 | 55 | 	movl	%edx,SUM_MS | 
 | 56 | 	movl	(%edi),%edx		/* terms[n] */ | 
 | 57 | 	movl	%edx,SUM_MIDDLE | 
 | 58 | 	xor	%eax,%eax | 
 | 59 | 	movl	%eax,SUM_LS | 
 | 60 | 	movb	%al,OVERFLOWED | 
 | 61 |  | 
 | 62 | 	subl	TERM_SIZE,%edi | 
 | 63 | 	decl	PARAM4 | 
 | 64 | 	js	L_accum_done | 
 | 65 |  | 
 | 66 | L_accum_loop: | 
 | 67 | 	xor	%eax,%eax | 
 | 68 | 	movl	%eax,ACCUM_MS | 
 | 69 | 	movl	%eax,ACCUM_MIDDLE | 
 | 70 |  | 
 | 71 | 	movl	SUM_MIDDLE,%eax | 
 | 72 | 	mull	(%esi)			/* x ls long */ | 
 | 73 | 	movl	%edx,ACCUM_LS | 
 | 74 |  | 
 | 75 | 	movl	SUM_MIDDLE,%eax | 
 | 76 | 	mull	4(%esi)			/* x ms long */ | 
 | 77 | 	addl	%eax,ACCUM_LS | 
 | 78 | 	adcl	%edx,ACCUM_MIDDLE | 
 | 79 | 	adcl	$0,ACCUM_MS | 
 | 80 |  | 
 | 81 | 	movl	SUM_MS,%eax | 
 | 82 | 	mull	(%esi)			/* x ls long */ | 
 | 83 | 	addl	%eax,ACCUM_LS | 
 | 84 | 	adcl	%edx,ACCUM_MIDDLE | 
 | 85 | 	adcl	$0,ACCUM_MS | 
 | 86 |  | 
 | 87 | 	movl	SUM_MS,%eax | 
 | 88 | 	mull	4(%esi)			/* x ms long */ | 
 | 89 | 	addl	%eax,ACCUM_MIDDLE | 
 | 90 | 	adcl	%edx,ACCUM_MS | 
 | 91 |  | 
 | 92 | 	testb	$0xff,OVERFLOWED | 
 | 93 | 	jz	L_no_overflow | 
 | 94 |  | 
 | 95 | 	movl	(%esi),%eax | 
 | 96 | 	addl	%eax,ACCUM_MIDDLE | 
 | 97 | 	movl	4(%esi),%eax | 
 | 98 | 	adcl	%eax,ACCUM_MS		/* This could overflow too */ | 
 | 99 |  | 
 | 100 | L_no_overflow: | 
 | 101 |  | 
 | 102 | /* | 
 | 103 |  * Now put the sum of next term and the accumulator | 
 | 104 |  * into the sum register | 
 | 105 |  */ | 
 | 106 | 	movl	ACCUM_LS,%eax | 
 | 107 | 	addl	(%edi),%eax		/* term ls long */ | 
 | 108 | 	movl	%eax,SUM_LS | 
 | 109 | 	movl	ACCUM_MIDDLE,%eax | 
 | 110 | 	adcl	(%edi),%eax		/* term ls long */ | 
 | 111 | 	movl	%eax,SUM_MIDDLE | 
 | 112 | 	movl	ACCUM_MS,%eax | 
 | 113 | 	adcl	4(%edi),%eax		/* term ms long */ | 
 | 114 | 	movl	%eax,SUM_MS | 
 | 115 | 	sbbb	%al,%al | 
 | 116 | 	movb	%al,OVERFLOWED		/* Used in the next iteration */ | 
 | 117 |  | 
 | 118 | 	subl	TERM_SIZE,%edi | 
 | 119 | 	decl	PARAM4 | 
 | 120 | 	jns	L_accum_loop | 
 | 121 |  | 
 | 122 | L_accum_done: | 
 | 123 | 	movl	PARAM1,%edi		/* accum */ | 
 | 124 | 	movl	SUM_LS,%eax | 
 | 125 | 	addl	%eax,(%edi) | 
 | 126 | 	movl	SUM_MIDDLE,%eax | 
 | 127 | 	adcl	%eax,4(%edi) | 
 | 128 | 	movl	SUM_MS,%eax | 
 | 129 | 	adcl	%eax,8(%edi) | 
 | 130 |  | 
 | 131 | 	popl	%ebx | 
 | 132 | 	popl	%edi | 
 | 133 | 	popl	%esi | 
 | 134 | 	leave | 
 | 135 | 	ret |