| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /*---------------------------------------------------------------------------+ | 
|  | 2 | |  polynomial_Xsig.S                                                        | | 
|  | 3 | |                                                                           | | 
|  | 4 | | Fixed point arithmetic polynomial evaluation.                             | | 
|  | 5 | |                                                                           | | 
|  | 6 | | Copyright (C) 1992,1993,1994,1995                                         | | 
|  | 7 | |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | 
|  | 8 | |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au | | 
|  | 9 | |                                                                           | | 
|  | 10 | | Call from C as:                                                           | | 
|  | 11 | |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 | | 
|  | 12 | |                        unsigned long long terms[], int n)                 | | 
|  | 13 | |                                                                           | | 
|  | 14 | | Computes:                                                                 | | 
|  | 15 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  | | 
|  | 16 | | and adds the result to the 12 byte Xsig.                                  | | 
|  | 17 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | 
|  | 18 | | precision.                                                                | | 
|  | 19 | |                                                                           | | 
|  | 20 | | This function must be used carefully: most overflow of intermediate       | | 
|  | 21 | | results is controlled, but overflow of the result is not.                 | | 
|  | 22 | |                                                                           | | 
|  | 23 | +---------------------------------------------------------------------------*/ | 
|  | 24 | .file	"polynomial_Xsig.S" | 
|  | 25 |  | 
|  | 26 | #include "fpu_emu.h" | 
|  | 27 |  | 
|  | 28 |  | 
|  | 29 | #define	TERM_SIZE	$8 | 
|  | 30 | #define	SUM_MS		-20(%ebp)	/* sum ms long */ | 
|  | 31 | #define SUM_MIDDLE	-24(%ebp)	/* sum middle long */ | 
|  | 32 | #define	SUM_LS		-28(%ebp)	/* sum ls long */ | 
|  | 33 | #define	ACCUM_MS	-4(%ebp)	/* accum ms long */ | 
|  | 34 | #define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */ | 
|  | 35 | #define	ACCUM_LS	-12(%ebp)	/* accum ls long */ | 
|  | 36 | #define OVERFLOWED      -16(%ebp)	/* addition overflow flag */ | 
|  | 37 |  | 
|  | 38 | .text | 
|  | 39 | ENTRY(polynomial_Xsig) | 
|  | 40 | pushl	%ebp | 
|  | 41 | movl	%esp,%ebp | 
|  | 42 | subl	$32,%esp | 
|  | 43 | pushl	%esi | 
|  | 44 | pushl	%edi | 
|  | 45 | pushl	%ebx | 
|  | 46 |  | 
|  | 47 | movl	PARAM2,%esi		/* x */ | 
|  | 48 | movl	PARAM3,%edi		/* terms */ | 
|  | 49 |  | 
|  | 50 | movl	TERM_SIZE,%eax | 
|  | 51 | mull	PARAM4			/* n */ | 
|  | 52 | addl	%eax,%edi | 
|  | 53 |  | 
|  | 54 | movl	4(%edi),%edx		/* terms[n] */ | 
|  | 55 | movl	%edx,SUM_MS | 
|  | 56 | movl	(%edi),%edx		/* terms[n] */ | 
|  | 57 | movl	%edx,SUM_MIDDLE | 
|  | 58 | xor	%eax,%eax | 
|  | 59 | movl	%eax,SUM_LS | 
|  | 60 | movb	%al,OVERFLOWED | 
|  | 61 |  | 
|  | 62 | subl	TERM_SIZE,%edi | 
|  | 63 | decl	PARAM4 | 
|  | 64 | js	L_accum_done | 
|  | 65 |  | 
|  | 66 | L_accum_loop: | 
|  | 67 | xor	%eax,%eax | 
|  | 68 | movl	%eax,ACCUM_MS | 
|  | 69 | movl	%eax,ACCUM_MIDDLE | 
|  | 70 |  | 
|  | 71 | movl	SUM_MIDDLE,%eax | 
|  | 72 | mull	(%esi)			/* x ls long */ | 
|  | 73 | movl	%edx,ACCUM_LS | 
|  | 74 |  | 
|  | 75 | movl	SUM_MIDDLE,%eax | 
|  | 76 | mull	4(%esi)			/* x ms long */ | 
|  | 77 | addl	%eax,ACCUM_LS | 
|  | 78 | adcl	%edx,ACCUM_MIDDLE | 
|  | 79 | adcl	$0,ACCUM_MS | 
|  | 80 |  | 
|  | 81 | movl	SUM_MS,%eax | 
|  | 82 | mull	(%esi)			/* x ls long */ | 
|  | 83 | addl	%eax,ACCUM_LS | 
|  | 84 | adcl	%edx,ACCUM_MIDDLE | 
|  | 85 | adcl	$0,ACCUM_MS | 
|  | 86 |  | 
|  | 87 | movl	SUM_MS,%eax | 
|  | 88 | mull	4(%esi)			/* x ms long */ | 
|  | 89 | addl	%eax,ACCUM_MIDDLE | 
|  | 90 | adcl	%edx,ACCUM_MS | 
|  | 91 |  | 
|  | 92 | testb	$0xff,OVERFLOWED | 
|  | 93 | jz	L_no_overflow | 
|  | 94 |  | 
|  | 95 | movl	(%esi),%eax | 
|  | 96 | addl	%eax,ACCUM_MIDDLE | 
|  | 97 | movl	4(%esi),%eax | 
|  | 98 | adcl	%eax,ACCUM_MS		/* This could overflow too */ | 
|  | 99 |  | 
|  | 100 | L_no_overflow: | 
|  | 101 |  | 
|  | 102 | /* | 
|  | 103 | * Now put the sum of next term and the accumulator | 
|  | 104 | * into the sum register | 
|  | 105 | */ | 
|  | 106 | movl	ACCUM_LS,%eax | 
|  | 107 | addl	(%edi),%eax		/* term ls long */ | 
|  | 108 | movl	%eax,SUM_LS | 
|  | 109 | movl	ACCUM_MIDDLE,%eax | 
|  | 110 | adcl	(%edi),%eax		/* term ls long */ | 
|  | 111 | movl	%eax,SUM_MIDDLE | 
|  | 112 | movl	ACCUM_MS,%eax | 
|  | 113 | adcl	4(%edi),%eax		/* term ms long */ | 
|  | 114 | movl	%eax,SUM_MS | 
|  | 115 | sbbb	%al,%al | 
|  | 116 | movb	%al,OVERFLOWED		/* Used in the next iteration */ | 
|  | 117 |  | 
|  | 118 | subl	TERM_SIZE,%edi | 
|  | 119 | decl	PARAM4 | 
|  | 120 | jns	L_accum_loop | 
|  | 121 |  | 
|  | 122 | L_accum_done: | 
|  | 123 | movl	PARAM1,%edi		/* accum */ | 
|  | 124 | movl	SUM_LS,%eax | 
|  | 125 | addl	%eax,(%edi) | 
|  | 126 | movl	SUM_MIDDLE,%eax | 
|  | 127 | adcl	%eax,4(%edi) | 
|  | 128 | movl	SUM_MS,%eax | 
|  | 129 | adcl	%eax,8(%edi) | 
|  | 130 |  | 
|  | 131 | popl	%ebx | 
|  | 132 | popl	%edi | 
|  | 133 | popl	%esi | 
|  | 134 | leave | 
|  | 135 | ret |