Blame - arch/x86/math-emu/polynom_Xsig.S - android_kernel_htc_msm8960

blob: 17315c89ff3d07ae1c263a206dfafbe0bd531338 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*---------------------------------------------------------------------------+
				2	\| polynomial_Xsig.S \|
				3	\| \|
				4	\| Fixed point arithmetic polynomial evaluation. \|
				5	\| \|
				6	\| Copyright (C) 1992,1993,1994,1995 \|
				7	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
				8	\| Australia. E-mail billm@jacobi.maths.monash.edu.au \|
				9	\| \|
				10	\| Call from C as: \|
				11	\| void polynomial_Xsig(Xsig *accum, unsigned long long x, \|
				12	\| unsigned long long terms[], int n) \|
				13	\| \|
				14	\| Computes: \|
				15	\| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]x)x)x)x) ... )*x \|
				16	\| and adds the result to the 12 byte Xsig. \|
				17	\| The terms[] are each 8 bytes, but all computation is performed to 12 byte \|
				18	\| precision. \|
				19	\| \|
				20	\| This function must be used carefully: most overflow of intermediate \|
				21	\| results is controlled, but overflow of the result is not. \|
				22	\| \|
				23	+---------------------------------------------------------------------------*/
				24	.file "polynomial_Xsig.S"
				25
				26	#include "fpu_emu.h"
				27
				28
				29	#define TERM_SIZE $8
				30	#define SUM_MS -20(%ebp) /* sum ms long */
				31	#define SUM_MIDDLE -24(%ebp) /* sum middle long */
				32	#define SUM_LS -28(%ebp) /* sum ls long */
				33	#define ACCUM_MS -4(%ebp) /* accum ms long */
				34	#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
				35	#define ACCUM_LS -12(%ebp) /* accum ls long */
				36	#define OVERFLOWED -16(%ebp) /* addition overflow flag */
				37
				38	.text
				39	ENTRY(polynomial_Xsig)
				40	pushl %ebp
				41	movl %esp,%ebp
				42	subl $32,%esp
				43	pushl %esi
				44	pushl %edi
				45	pushl %ebx
				46
				47	movl PARAM2,%esi /* x */
				48	movl PARAM3,%edi /* terms */
				49
				50	movl TERM_SIZE,%eax
				51	mull PARAM4 /* n */
				52	addl %eax,%edi
				53
				54	movl 4(%edi),%edx /* terms[n] */
				55	movl %edx,SUM_MS
				56	movl (%edi),%edx /* terms[n] */
				57	movl %edx,SUM_MIDDLE
				58	xor %eax,%eax
				59	movl %eax,SUM_LS
				60	movb %al,OVERFLOWED
				61
				62	subl TERM_SIZE,%edi
				63	decl PARAM4
				64	js L_accum_done
				65
				66	L_accum_loop:
				67	xor %eax,%eax
				68	movl %eax,ACCUM_MS
				69	movl %eax,ACCUM_MIDDLE
				70
				71	movl SUM_MIDDLE,%eax
				72	mull (%esi) /* x ls long */
				73	movl %edx,ACCUM_LS
				74
				75	movl SUM_MIDDLE,%eax
				76	mull 4(%esi) /* x ms long */
				77	addl %eax,ACCUM_LS
				78	adcl %edx,ACCUM_MIDDLE
				79	adcl $0,ACCUM_MS
				80
				81	movl SUM_MS,%eax
				82	mull (%esi) /* x ls long */
				83	addl %eax,ACCUM_LS
				84	adcl %edx,ACCUM_MIDDLE
				85	adcl $0,ACCUM_MS
				86
				87	movl SUM_MS,%eax
				88	mull 4(%esi) /* x ms long */
				89	addl %eax,ACCUM_MIDDLE
				90	adcl %edx,ACCUM_MS
				91
				92	testb $0xff,OVERFLOWED
				93	jz L_no_overflow
				94
				95	movl (%esi),%eax
				96	addl %eax,ACCUM_MIDDLE
				97	movl 4(%esi),%eax
				98	adcl %eax,ACCUM_MS /* This could overflow too */
				99
				100	L_no_overflow:
				101
				102	/*
				103	* Now put the sum of next term and the accumulator
				104	* into the sum register
				105	*/
				106	movl ACCUM_LS,%eax
				107	addl (%edi),%eax /* term ls long */
				108	movl %eax,SUM_LS
				109	movl ACCUM_MIDDLE,%eax
				110	adcl (%edi),%eax /* term ls long */
				111	movl %eax,SUM_MIDDLE
				112	movl ACCUM_MS,%eax
				113	adcl 4(%edi),%eax /* term ms long */
				114	movl %eax,SUM_MS
				115	sbbb %al,%al
				116	movb %al,OVERFLOWED /* Used in the next iteration */
				117
				118	subl TERM_SIZE,%edi
				119	decl PARAM4
				120	jns L_accum_loop
				121
				122	L_accum_done:
				123	movl PARAM1,%edi /* accum */
				124	movl SUM_LS,%eax
				125	addl %eax,(%edi)
				126	movl SUM_MIDDLE,%eax
				127	adcl %eax,4(%edi)
				128	movl SUM_MS,%eax
				129	adcl %eax,8(%edi)
				130
				131	popl %ebx
				132	popl %edi
				133	popl %esi
				134	leave
				135	ret