Blame - arch/m68k/fpsp040/bindec.S - android_kernel_oneplus_msm8996

blob: f2e795231046049bd41950bd642178f81065c819 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	\|
				2	\| bindec.sa 3.4 1/3/91
				3	\|
				4	\| bindec
				5	\|
				6	\| Description:
				7	\| Converts an input in extended precision format
				8	\| to bcd format.
				9	\|
				10	\| Input:
				11	\| a0 points to the input extended precision value
				12	\| value in memory; d0 contains the k-factor sign-extended
				13	\| to 32-bits. The input may be either normalized,
				14	\| unnormalized, or denormalized.
				15	\|
				16	\| Output: result in the FP_SCR1 space on the stack.
				17	\|
				18	\| Saves and Modifies: D2-D7,A2,FP2
				19	\|
				20	\| Algorithm:
				21	\|
				22	\| A1. Set RM and size ext; Set SIGMA = sign of input.
				23	\| The k-factor is saved for use in d7. Clear the
				24	\| BINDEC_FLG for separating normalized/denormalized
				25	\| input. If input is unnormalized or denormalized,
				26	\| normalize it.
				27	\|
				28	\| A2. Set X = abs(input).
				29	\|
				30	\| A3. Compute ILOG.
				31	\| ILOG is the log base 10 of the input value. It is
				32	\| approximated by adding e + 0.f when the original
				33	\| value is viewed as 2^^e * 1.f in extended precision.
				34	\| This value is stored in d6.
				35	\|
				36	\| A4. Clr INEX bit.
				37	\| The operation in A3 above may have set INEX2.
				38	\|
				39	\| A5. Set ICTR = 0;
				40	\| ICTR is a flag used in A13. It must be set before the
				41	\| loop entry A6.
				42	\|
				43	\| A6. Calculate LEN.
				44	\| LEN is the number of digits to be displayed. The
				45	\| k-factor can dictate either the total number of digits,
				46	\| if it is a positive number, or the number of digits
				47	\| after the decimal point which are to be included as
				48	\| significant. See the 68882 manual for examples.
				49	\| If LEN is computed to be greater than 17, set OPERR in
				50	\| USER_FPSR. LEN is stored in d4.
				51	\|
				52	\| A7. Calculate SCALE.
				53	\| SCALE is equal to 10^ISCALE, where ISCALE is the number
				54	\| of decimal places needed to insure LEN integer digits
				55	\| in the output before conversion to bcd. LAMBDA is the
				56	\| sign of ISCALE, used in A9. Fp1 contains
				57	\| 10^^(abs(ISCALE)) using a rounding mode which is a
				58	\| function of the original rounding mode and the signs
				59	\| of ISCALE and X. A table is given in the code.
				60	\|
				61	\| A8. Clr INEX; Force RZ.
				62	\| The operation in A3 above may have set INEX2.
				63	\| RZ mode is forced for the scaling operation to insure
				64	\| only one rounding error. The grs bits are collected in
				65	\| the INEX flag for use in A10.
				66	\|
				67	\| A9. Scale X -> Y.
				68	\| The mantissa is scaled to the desired number of
				69	\| significant digits. The excess digits are collected
				70	\| in INEX2.
				71	\|
				72	\| A10. Or in INEX.
				73	\| If INEX is set, round error occurred. This is
				74	\| compensated for by 'or-ing' in the INEX2 flag to
				75	\| the lsb of Y.
				76	\|
				77	\| A11. Restore original FPCR; set size ext.
				78	\| Perform FINT operation in the user's rounding mode.
				79	\| Keep the size to extended.
				80	\|
				81	\| A12. Calculate YINT = FINT(Y) according to user's rounding
				82	\| mode. The FPSP routine sintd0 is used. The output
				83	\| is in fp0.
				84	\|
				85	\| A13. Check for LEN digits.
				86	\| If the int operation results in more than LEN digits,
				87	\| or less than LEN -1 digits, adjust ILOG and repeat from
				88	\| A6. This test occurs only on the first pass. If the
				89	\| result is exactly 10^LEN, decrement ILOG and divide
				90	\| the mantissa by 10.
				91	\|
				92	\| A14. Convert the mantissa to bcd.
				93	\| The binstr routine is used to convert the LEN digit
				94	\| mantissa to bcd in memory. The input to binstr is
				95	\| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
				96	\| such that the decimal point is to the left of bit 63.
				97	\| The bcd digits are stored in the correct position in
				98	\| the final string area in memory.
				99	\|
				100	\| A15. Convert the exponent to bcd.
				101	\| As in A14 above, the exp is converted to bcd and the
				102	\| digits are stored in the final string.
				103	\| Test the length of the final exponent string. If the
				104	\| length is 4, set operr.
				105	\|
				106	\| A16. Write sign bits to final string.
				107	\|
				108	\| Implementation Notes:
				109	\|
				110	\| The registers are used as follows:
				111	\|
				112	\| d0: scratch; LEN input to binstr
				113	\| d1: scratch
				114	\| d2: upper 32-bits of mantissa for binstr
				115	\| d3: scratch;lower 32-bits of mantissa for binstr
				116	\| d4: LEN
				117	\| d5: LAMBDA/ICTR
				118	\| d6: ILOG
				119	\| d7: k-factor
				120	\| a0: ptr for original operand/final result
				121	\| a1: scratch pointer
				122	\| a2: pointer to FP_X; abs(original value) in ext
				123	\| fp0: scratch
				124	\| fp1: scratch
				125	\| fp2: scratch
				126	\| F_SCR1:
				127	\| F_SCR2:
				128	\| L_SCR1:
				129	\| L_SCR2:
				130
				131	\| Copyright (C) Motorola, Inc. 1990
				132	\| All Rights Reserved
				133	\|
Matt Waddel	e00d82d	2006-02-11 17:55:48 -0800	[diff] [blame]	134	\| For details on the license for this file, please see the
				135	\| file, README, in this same directory.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	136
				137	\|BINDEC idnt 2,1 \| Motorola 040 Floating Point Software Package
				138
				139	#include "fpsp.h"
				140
				141	\|section 8
				142
				143	\| Constants in extended precision
				144	LOG2: .long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
				145	LOG2UP1: .long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
				146
				147	\| Constants in single precision
				148	FONE: .long 0x3F800000,0x00000000,0x00000000,0x00000000
				149	FTWO: .long 0x40000000,0x00000000,0x00000000,0x00000000
				150	FTEN: .long 0x41200000,0x00000000,0x00000000,0x00000000
				151	F4933: .long 0x459A2800,0x00000000,0x00000000,0x00000000
				152
				153	RBDTBL: .byte 0,0,0,0
				154	.byte 3,3,2,2
				155	.byte 3,2,2,3
				156	.byte 2,3,3,2
				157
				158	\|xref binstr
				159	\|xref sintdo
				160	\|xref ptenrn,ptenrm,ptenrp
				161
				162	.global bindec
				163	.global sc_mul
				164	bindec:
				165	moveml %d2-%d7/%a2,-(%a7)
				166	fmovemx %fp0-%fp2,-(%a7)
				167
				168	\| A1. Set RM and size ext. Set SIGMA = sign input;
				169	\| The k-factor is saved for use in d7. Clear BINDEC_FLG for
				170	\| separating normalized/denormalized input. If the input
				171	\| is a denormalized number, set the BINDEC_FLG memory word
				172	\| to signal denorm. If the input is unnormalized, normalize
				173	\| the input and test for denormalized result.
				174	\|
				175	fmovel #rm_mode,%FPCR \|set RM and ext
				176	movel (%a0),L_SCR2(%a6) \|save exponent for sign check
				177	movel %d0,%d7 \|move k-factor to d7
				178	clrb BINDEC_FLG(%a6) \|clr norm/denorm flag
				179	movew STAG(%a6),%d0 \|get stag
				180	andiw #0xe000,%d0 \|isolate stag bits
				181	beq A2_str \|if zero, input is norm
				182	\|
				183	\| Normalize the denorm
				184	\|
				185	un_de_norm:
				186	movew (%a0),%d0
				187	andiw #0x7fff,%d0 \|strip sign of normalized exp
				188	movel 4(%a0),%d1
				189	movel 8(%a0),%d2
				190	norm_loop:
				191	subw #1,%d0
				192	lsll #1,%d2
				193	roxll #1,%d1
				194	tstl %d1
				195	bges norm_loop
				196	\|
				197	\| Test if the normalized input is denormalized
				198	\|
				199	tstw %d0
				200	bgts pos_exp \|if greater than zero, it is a norm
				201	st BINDEC_FLG(%a6) \|set flag for denorm
				202	pos_exp:
				203	andiw #0x7fff,%d0 \|strip sign of normalized exp
				204	movew %d0,(%a0)
				205	movel %d1,4(%a0)
				206	movel %d2,8(%a0)
				207
				208	\| A2. Set X = abs(input).
				209	\|
				210	A2_str:
				211	movel (%a0),FP_SCR2(%a6) \| move input to work space
				212	movel 4(%a0),FP_SCR2+4(%a6) \| move input to work space
				213	movel 8(%a0),FP_SCR2+8(%a6) \| move input to work space
				214	andil #0x7fffffff,FP_SCR2(%a6) \|create abs(X)
				215
				216	\| A3. Compute ILOG.
				217	\| ILOG is the log base 10 of the input value. It is approx-
				218	\| imated by adding e + 0.f when the original value is viewed
				219	\| as 2^^e * 1.f in extended precision. This value is stored
				220	\| in d6.
				221	\|
				222	\| Register usage:
				223	\| Input/Output
				224	\| d0: k-factor/exponent
				225	\| d2: x/x
				226	\| d3: x/x
				227	\| d4: x/x
				228	\| d5: x/x
				229	\| d6: x/ILOG
				230	\| d7: k-factor/Unchanged
				231	\| a0: ptr for original operand/final result
				232	\| a1: x/x
				233	\| a2: x/x
				234	\| fp0: x/float(ILOG)
				235	\| fp1: x/x
				236	\| fp2: x/x
				237	\| F_SCR1:x/x
				238	\| F_SCR2:Abs(X)/Abs(X) with $3fff exponent
				239	\| L_SCR1:x/x
				240	\| L_SCR2:first word of X packed/Unchanged
				241
				242	tstb BINDEC_FLG(%a6) \|check for denorm
				243	beqs A3_cont \|if clr, continue with norm
				244	movel #-4933,%d6 \|force ILOG = -4933
				245	bras A4_str
				246	A3_cont:
				247	movew FP_SCR2(%a6),%d0 \|move exp to d0
				248	movew #0x3fff,FP_SCR2(%a6) \|replace exponent with 0x3fff
				249	fmovex FP_SCR2(%a6),%fp0 \|now fp0 has 1.f
				250	subw #0x3fff,%d0 \|strip off bias
				251	faddw %d0,%fp0 \|add in exp
				252	fsubs FONE,%fp0 \|subtract off 1.0
				253	fbge pos_res \|if pos, branch
				254	fmulx LOG2UP1,%fp0 \|if neg, mul by LOG2UP1
				255	fmovel %fp0,%d6 \|put ILOG in d6 as a lword
				256	bras A4_str \|go move out ILOG
				257	pos_res:
				258	fmulx LOG2,%fp0 \|if pos, mul by LOG2
				259	fmovel %fp0,%d6 \|put ILOG in d6 as a lword
				260
				261
				262	\| A4. Clr INEX bit.
				263	\| The operation in A3 above may have set INEX2.
				264
				265	A4_str:
				266	fmovel #0,%FPSR \|zero all of fpsr - nothing needed
				267
				268
				269	\| A5. Set ICTR = 0;
				270	\| ICTR is a flag used in A13. It must be set before the
				271	\| loop entry A6. The lower word of d5 is used for ICTR.
				272
				273	clrw %d5 \|clear ICTR
				274
				275
				276	\| A6. Calculate LEN.
				277	\| LEN is the number of digits to be displayed. The k-factor
				278	\| can dictate either the total number of digits, if it is
				279	\| a positive number, or the number of digits after the
				280	\| original decimal point which are to be included as
				281	\| significant. See the 68882 manual for examples.
				282	\| If LEN is computed to be greater than 17, set OPERR in
				283	\| USER_FPSR. LEN is stored in d4.
				284	\|
				285	\| Register usage:
				286	\| Input/Output
				287	\| d0: exponent/Unchanged
				288	\| d2: x/x/scratch
				289	\| d3: x/x
				290	\| d4: exc picture/LEN
				291	\| d5: ICTR/Unchanged
				292	\| d6: ILOG/Unchanged
				293	\| d7: k-factor/Unchanged
				294	\| a0: ptr for original operand/final result
				295	\| a1: x/x
				296	\| a2: x/x
				297	\| fp0: float(ILOG)/Unchanged
				298	\| fp1: x/x
				299	\| fp2: x/x
				300	\| F_SCR1:x/x
				301	\| F_SCR2:Abs(X) with $3fff exponent/Unchanged
				302	\| L_SCR1:x/x
				303	\| L_SCR2:first word of X packed/Unchanged
				304
				305	A6_str:
				306	tstl %d7 \|branch on sign of k
				307	bles k_neg \|if k <= 0, LEN = ILOG + 1 - k
				308	movel %d7,%d4 \|if k > 0, LEN = k
				309	bras len_ck \|skip to LEN check
				310	k_neg:
				311	movel %d6,%d4 \|first load ILOG to d4
				312	subl %d7,%d4 \|subtract off k
				313	addql #1,%d4 \|add in the 1
				314	len_ck:
				315	tstl %d4 \|LEN check: branch on sign of LEN
				316	bles LEN_ng \|if neg, set LEN = 1
				317	cmpl #17,%d4 \|test if LEN > 17
				318	bles A7_str \|if not, forget it
				319	movel #17,%d4 \|set max LEN = 17
				320	tstl %d7 \|if negative, never set OPERR
				321	bles A7_str \|if positive, continue
				322	orl #opaop_mask,USER_FPSR(%a6) \|set OPERR & AIOP in USER_FPSR
				323	bras A7_str \|finished here
				324	LEN_ng:
				325	moveql #1,%d4 \|min LEN is 1
				326
				327
				328	\| A7. Calculate SCALE.
				329	\| SCALE is equal to 10^ISCALE, where ISCALE is the number
				330	\| of decimal places needed to insure LEN integer digits
				331	\| in the output before conversion to bcd. LAMBDA is the sign
				332	\| of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
				333	\| the rounding mode as given in the following table (see
				334	\| Coonen, p. 7.23 as ref.; however, the SCALE variable is
				335	\| of opposite sign in bindec.sa from Coonen).
				336	\|
				337	\| Initial USE
				338	\| FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
				339	\| ----------------------------------------------
				340	\| RN 00 0 0 00/0 RN
				341	\| RN 00 0 1 00/0 RN
				342	\| RN 00 1 0 00/0 RN
				343	\| RN 00 1 1 00/0 RN
				344	\| RZ 01 0 0 11/3 RP
				345	\| RZ 01 0 1 11/3 RP
				346	\| RZ 01 1 0 10/2 RM
				347	\| RZ 01 1 1 10/2 RM
				348	\| RM 10 0 0 11/3 RP
				349	\| RM 10 0 1 10/2 RM
				350	\| RM 10 1 0 10/2 RM
				351	\| RM 10 1 1 11/3 RP
				352	\| RP 11 0 0 10/2 RM
				353	\| RP 11 0 1 11/3 RP
				354	\| RP 11 1 0 11/3 RP
				355	\| RP 11 1 1 10/2 RM
				356	\|
				357	\| Register usage:
				358	\| Input/Output
				359	\| d0: exponent/scratch - final is 0
				360	\| d2: x/0 or 24 for A9
				361	\| d3: x/scratch - offset ptr into PTENRM array
				362	\| d4: LEN/Unchanged
				363	\| d5: 0/ICTR:LAMBDA
				364	\| d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
				365	\| d7: k-factor/Unchanged
				366	\| a0: ptr for original operand/final result
				367	\| a1: x/ptr to PTENRM array
				368	\| a2: x/x
				369	\| fp0: float(ILOG)/Unchanged
				370	\| fp1: x/10^ISCALE
				371	\| fp2: x/x
				372	\| F_SCR1:x/x
				373	\| F_SCR2:Abs(X) with $3fff exponent/Unchanged
				374	\| L_SCR1:x/x
				375	\| L_SCR2:first word of X packed/Unchanged
				376
				377	A7_str:
				378	tstl %d7 \|test sign of k
				379	bgts k_pos \|if pos and > 0, skip this
				380	cmpl %d6,%d7 \|test k - ILOG
				381	blts k_pos \|if ILOG >= k, skip this
				382	movel %d7,%d6 \|if ((k<0) & (ILOG < k)) ILOG = k
				383	k_pos:
				384	movel %d6,%d0 \|calc ILOG + 1 - LEN in d0
				385	addql #1,%d0 \|add the 1
				386	subl %d4,%d0 \|sub off LEN
				387	swap %d5 \|use upper word of d5 for LAMBDA
				388	clrw %d5 \|set it zero initially
				389	clrw %d2 \|set up d2 for very small case
				390	tstl %d0 \|test sign of ISCALE
				391	bges iscale \|if pos, skip next inst
				392	addqw #1,%d5 \|if neg, set LAMBDA true
				393	cmpl #0xffffecd4,%d0 \|test iscale <= -4908
				394	bgts no_inf \|if false, skip rest
				395	addil #24,%d0 \|add in 24 to iscale
				396	movel #24,%d2 \|put 24 in d2 for A9
				397	no_inf:
				398	negl %d0 \|and take abs of ISCALE
				399	iscale:
				400	fmoves FONE,%fp1 \|init fp1 to 1
				401	bfextu USER_FPCR(%a6){#26:#2},%d1 \|get initial rmode bits
				402	lslw #1,%d1 \|put them in bits 2:1
				403	addw %d5,%d1 \|add in LAMBDA
				404	lslw #1,%d1 \|put them in bits 3:1
				405	tstl L_SCR2(%a6) \|test sign of original x
				406	bges x_pos \|if pos, don't set bit 0
				407	addql #1,%d1 \|if neg, set bit 0
				408	x_pos:
				409	leal RBDTBL,%a2 \|load rbdtbl base
				410	moveb (%a2,%d1),%d3 \|load d3 with new rmode
				411	lsll #4,%d3 \|put bits in proper position
				412	fmovel %d3,%fpcr \|load bits into fpu
				413	lsrl #4,%d3 \|put bits in proper position
				414	tstb %d3 \|decode new rmode for pten table
				415	bnes not_rn \|if zero, it is RN
				416	leal PTENRN,%a1 \|load a1 with RN table base
				417	bras rmode \|exit decode
				418	not_rn:
				419	lsrb #1,%d3 \|get lsb in carry
				420	bccs not_rp \|if carry clear, it is RM
				421	leal PTENRP,%a1 \|load a1 with RP table base
				422	bras rmode \|exit decode
				423	not_rp:
				424	leal PTENRM,%a1 \|load a1 with RM table base
				425	rmode:
				426	clrl %d3 \|clr table index
				427	e_loop:
				428	lsrl #1,%d0 \|shift next bit into carry
				429	bccs e_next \|if zero, skip the mul
				430	fmulx (%a1,%d3),%fp1 \|mul by 10**(d3_bit_no)
				431	e_next:
				432	addl #12,%d3 \|inc d3 to next pwrten table entry
				433	tstl %d0 \|test if ISCALE is zero
				434	bnes e_loop \|if not, loop
				435
				436
				437	\| A8. Clr INEX; Force RZ.
				438	\| The operation in A3 above may have set INEX2.
				439	\| RZ mode is forced for the scaling operation to insure
				440	\| only one rounding error. The grs bits are collected in
				441	\| the INEX flag for use in A10.
				442	\|
				443	\| Register usage:
				444	\| Input/Output
				445
				446	fmovel #0,%FPSR \|clr INEX
				447	fmovel #rz_mode,%FPCR \|set RZ rounding mode
				448
				449
				450	\| A9. Scale X -> Y.
				451	\| The mantissa is scaled to the desired number of significant
				452	\| digits. The excess digits are collected in INEX2. If mul,
				453	\| Check d2 for excess 10 exponential value. If not zero,
				454	\| the iscale value would have caused the pwrten calculation
				455	\| to overflow. Only a negative iscale can cause this, so
				456	\| multiply by 10^(d2), which is now only allowed to be 24,
				457	\| with a multiply by 10^8 and 10^16, which is exact since
				458	\| 10^24 is exact. If the input was denormalized, we must
				459	\| create a busy stack frame with the mul command and the
				460	\| two operands, and allow the fpu to complete the multiply.
				461	\|
				462	\| Register usage:
				463	\| Input/Output
				464	\| d0: FPCR with RZ mode/Unchanged
				465	\| d2: 0 or 24/unchanged
				466	\| d3: x/x
				467	\| d4: LEN/Unchanged
				468	\| d5: ICTR:LAMBDA
				469	\| d6: ILOG/Unchanged
				470	\| d7: k-factor/Unchanged
				471	\| a0: ptr for original operand/final result
				472	\| a1: ptr to PTENRM array/Unchanged
				473	\| a2: x/x
				474	\| fp0: float(ILOG)/X adjusted for SCALE (Y)
				475	\| fp1: 10^ISCALE/Unchanged
				476	\| fp2: x/x
				477	\| F_SCR1:x/x
				478	\| F_SCR2:Abs(X) with $3fff exponent/Unchanged
				479	\| L_SCR1:x/x
				480	\| L_SCR2:first word of X packed/Unchanged
				481
				482	A9_str:
				483	fmovex (%a0),%fp0 \|load X from memory
				484	fabsx %fp0 \|use abs(X)
				485	tstw %d5 \|LAMBDA is in lower word of d5
				486	bne sc_mul \|if neg (LAMBDA = 1), scale by mul
				487	fdivx %fp1,%fp0 \|calculate X / SCALE -> Y to fp0
				488	bras A10_st \|branch to A10
				489
				490	sc_mul:
				491	tstb BINDEC_FLG(%a6) \|check for denorm
				492	beqs A9_norm \|if norm, continue with mul
				493	fmovemx %fp1-%fp1,-(%a7) \|load ETEMP with 10^ISCALE
				494	movel 8(%a0),-(%a7) \|load FPTEMP with input arg
				495	movel 4(%a0),-(%a7)
				496	movel (%a0),-(%a7)
				497	movel #18,%d3 \|load count for busy stack
				498	A9_loop:
				499	clrl -(%a7) \|clear lword on stack
				500	dbf %d3,A9_loop
				501	moveb VER_TMP(%a6),(%a7) \|write current version number
				502	moveb #BUSY_SIZE-4,1(%a7) \|write current busy size
				503	moveb #0x10,0x44(%a7) \|set fcefpte[15] bit
				504	movew #0x0023,0x40(%a7) \|load cmdreg1b with mul command
				505	moveb #0xfe,0x8(%a7) \|load all 1s to cu savepc
				506	frestore (%a7)+ \|restore frame to fpu for completion
				507	fmulx 36(%a1),%fp0 \|multiply fp0 by 10^8
				508	fmulx 48(%a1),%fp0 \|multiply fp0 by 10^16
				509	bras A10_st
				510	A9_norm:
				511	tstw %d2 \|test for small exp case
				512	beqs A9_con \|if zero, continue as normal
				513	fmulx 36(%a1),%fp0 \|multiply fp0 by 10^8
				514	fmulx 48(%a1),%fp0 \|multiply fp0 by 10^16
				515	A9_con:
				516	fmulx %fp1,%fp0 \|calculate X * SCALE -> Y to fp0
				517
				518
				519	\| A10. Or in INEX.
				520	\| If INEX is set, round error occurred. This is compensated
				521	\| for by 'or-ing' in the INEX2 flag to the lsb of Y.
				522	\|
				523	\| Register usage:
				524	\| Input/Output
				525	\| d0: FPCR with RZ mode/FPSR with INEX2 isolated
				526	\| d2: x/x
				527	\| d3: x/x
				528	\| d4: LEN/Unchanged
				529	\| d5: ICTR:LAMBDA
				530	\| d6: ILOG/Unchanged
				531	\| d7: k-factor/Unchanged
				532	\| a0: ptr for original operand/final result
				533	\| a1: ptr to PTENxx array/Unchanged
				534	\| a2: x/ptr to FP_SCR2(a6)
				535	\| fp0: Y/Y with lsb adjusted
				536	\| fp1: 10^ISCALE/Unchanged
				537	\| fp2: x/x
				538
				539	A10_st:
				540	fmovel %FPSR,%d0 \|get FPSR
				541	fmovex %fp0,FP_SCR2(%a6) \|move Y to memory
				542	leal FP_SCR2(%a6),%a2 \|load a2 with ptr to FP_SCR2
				543	btstl #9,%d0 \|check if INEX2 set
				544	beqs A11_st \|if clear, skip rest
				545	oril #1,8(%a2) \|or in 1 to lsb of mantissa
				546	fmovex FP_SCR2(%a6),%fp0 \|write adjusted Y back to fpu
				547
				548
				549	\| A11. Restore original FPCR; set size ext.
				550	\| Perform FINT operation in the user's rounding mode. Keep
				551	\| the size to extended. The sintdo entry point in the sint
				552	\| routine expects the FPCR value to be in USER_FPCR for
				553	\| mode and precision. The original FPCR is saved in L_SCR1.
				554
				555	A11_st:
				556	movel USER_FPCR(%a6),L_SCR1(%a6) \|save it for later
				557	andil #0x00000030,USER_FPCR(%a6) \|set size to ext,
				558	\| ;block exceptions
				559
				560
				561	\| A12. Calculate YINT = FINT(Y) according to user's rounding mode.
				562	\| The FPSP routine sintd0 is used. The output is in fp0.
				563	\|
				564	\| Register usage:
				565	\| Input/Output
				566	\| d0: FPSR with AINEX cleared/FPCR with size set to ext
				567	\| d2: x/x/scratch
				568	\| d3: x/x
				569	\| d4: LEN/Unchanged
				570	\| d5: ICTR:LAMBDA/Unchanged
				571	\| d6: ILOG/Unchanged
				572	\| d7: k-factor/Unchanged
				573	\| a0: ptr for original operand/src ptr for sintdo
				574	\| a1: ptr to PTENxx array/Unchanged
				575	\| a2: ptr to FP_SCR2(a6)/Unchanged
				576	\| a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
				577	\| fp0: Y/YINT
				578	\| fp1: 10^ISCALE/Unchanged
				579	\| fp2: x/x
				580	\| F_SCR1:x/x
				581	\| F_SCR2:Y adjusted for inex/Y with original exponent
				582	\| L_SCR1:x/original USER_FPCR
				583	\| L_SCR2:first word of X packed/Unchanged
				584
				585	A12_st:
				586	moveml %d0-%d1/%a0-%a1,-(%a7) \|save regs used by sintd0
				587	movel L_SCR1(%a6),-(%a7)
				588	movel L_SCR2(%a6),-(%a7)
				589	leal FP_SCR2(%a6),%a0 \|a0 is ptr to F_SCR2(a6)
				590	fmovex %fp0,(%a0) \|move Y to memory at FP_SCR2(a6)
				591	tstl L_SCR2(%a6) \|test sign of original operand
				592	bges do_fint \|if pos, use Y
				593	orl #0x80000000,(%a0) \|if neg, use -Y
				594	do_fint:
				595	movel USER_FPSR(%a6),-(%a7)
				596	bsr sintdo \|sint routine returns int in fp0
				597	moveb (%a7),USER_FPSR(%a6)
				598	addl #4,%a7
				599	movel (%a7)+,L_SCR2(%a6)
				600	movel (%a7)+,L_SCR1(%a6)
				601	moveml (%a7)+,%d0-%d1/%a0-%a1 \|restore regs used by sint
				602	movel L_SCR2(%a6),FP_SCR2(%a6) \|restore original exponent
				603	movel L_SCR1(%a6),USER_FPCR(%a6) \|restore user's FPCR
				604
				605
				606	\| A13. Check for LEN digits.
				607	\| If the int operation results in more than LEN digits,
				608	\| or less than LEN -1 digits, adjust ILOG and repeat from
				609	\| A6. This test occurs only on the first pass. If the
				610	\| result is exactly 10^LEN, decrement ILOG and divide
				611	\| the mantissa by 10. The calculation of 10^LEN cannot
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	612	\| be inexact, since all powers of ten up to 10^27 are exact
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	\| in extended precision, so the use of a previous power-of-ten
				614	\| table will introduce no error.
				615	\|
				616	\|
				617	\| Register usage:
				618	\| Input/Output
				619	\| d0: FPCR with size set to ext/scratch final = 0
				620	\| d2: x/x
				621	\| d3: x/scratch final = x
				622	\| d4: LEN/LEN adjusted
				623	\| d5: ICTR:LAMBDA/LAMBDA:ICTR
				624	\| d6: ILOG/ILOG adjusted
				625	\| d7: k-factor/Unchanged
				626	\| a0: pointer into memory for packed bcd string formation
				627	\| a1: ptr to PTENxx array/Unchanged
				628	\| a2: ptr to FP_SCR2(a6)/Unchanged
				629	\| fp0: int portion of Y/abs(YINT) adjusted
				630	\| fp1: 10^ISCALE/Unchanged
				631	\| fp2: x/10^LEN
				632	\| F_SCR1:x/x
				633	\| F_SCR2:Y with original exponent/Unchanged
				634	\| L_SCR1:original USER_FPCR/Unchanged
				635	\| L_SCR2:first word of X packed/Unchanged
				636
				637	A13_st:
				638	swap %d5 \|put ICTR in lower word of d5
				639	tstw %d5 \|check if ICTR = 0
				640	bne not_zr \|if non-zero, go to second test
				641	\|
				642	\| Compute 10^(LEN-1)
				643	\|
				644	fmoves FONE,%fp2 \|init fp2 to 1.0
				645	movel %d4,%d0 \|put LEN in d0
				646	subql #1,%d0 \|d0 = LEN -1
				647	clrl %d3 \|clr table index
				648	l_loop:
				649	lsrl #1,%d0 \|shift next bit into carry
				650	bccs l_next \|if zero, skip the mul
				651	fmulx (%a1,%d3),%fp2 \|mul by 10**(d3_bit_no)
				652	l_next:
				653	addl #12,%d3 \|inc d3 to next pwrten table entry
				654	tstl %d0 \|test if LEN is zero
				655	bnes l_loop \|if not, loop
				656	\|
				657	\| 10^LEN-1 is computed for this test and A14. If the input was
				658	\| denormalized, check only the case in which YINT > 10^LEN.
				659	\|
				660	tstb BINDEC_FLG(%a6) \|check if input was norm
				661	beqs A13_con \|if norm, continue with checking
				662	fabsx %fp0 \|take abs of YINT
				663	bra test_2
				664	\|
				665	\| Compare abs(YINT) to 10^(LEN-1) and 10^LEN
				666	\|
				667	A13_con:
				668	fabsx %fp0 \|take abs of YINT
				669	fcmpx %fp2,%fp0 \|compare abs(YINT) with 10^(LEN-1)
				670	fbge test_2 \|if greater, do next test
				671	subql #1,%d6 \|subtract 1 from ILOG
				672	movew #1,%d5 \|set ICTR
				673	fmovel #rm_mode,%FPCR \|set rmode to RM
				674	fmuls FTEN,%fp2 \|compute 10^LEN
				675	bra A6_str \|return to A6 and recompute YINT
				676	test_2:
				677	fmuls FTEN,%fp2 \|compute 10^LEN
				678	fcmpx %fp2,%fp0 \|compare abs(YINT) with 10^LEN
				679	fblt A14_st \|if less, all is ok, go to A14
				680	fbgt fix_ex \|if greater, fix and redo
				681	fdivs FTEN,%fp0 \|if equal, divide by 10
				682	addql #1,%d6 \| and inc ILOG
				683	bras A14_st \| and continue elsewhere
				684	fix_ex:
				685	addql #1,%d6 \|increment ILOG by 1
				686	movew #1,%d5 \|set ICTR
				687	fmovel #rm_mode,%FPCR \|set rmode to RM
				688	bra A6_str \|return to A6 and recompute YINT
				689	\|
				690	\| Since ICTR <> 0, we have already been through one adjustment,
				691	\| and shouldn't have another; this is to check if abs(YINT) = 10^LEN
				692	\| 10^LEN is again computed using whatever table is in a1 since the
				693	\| value calculated cannot be inexact.
				694	\|
				695	not_zr:
				696	fmoves FONE,%fp2 \|init fp2 to 1.0
				697	movel %d4,%d0 \|put LEN in d0
				698	clrl %d3 \|clr table index
				699	z_loop:
				700	lsrl #1,%d0 \|shift next bit into carry
				701	bccs z_next \|if zero, skip the mul
				702	fmulx (%a1,%d3),%fp2 \|mul by 10**(d3_bit_no)
				703	z_next:
				704	addl #12,%d3 \|inc d3 to next pwrten table entry
				705	tstl %d0 \|test if LEN is zero
				706	bnes z_loop \|if not, loop
				707	fabsx %fp0 \|get abs(YINT)
				708	fcmpx %fp2,%fp0 \|check if abs(YINT) = 10^LEN
				709	fbne A14_st \|if not, skip this
				710	fdivs FTEN,%fp0 \|divide abs(YINT) by 10
				711	addql #1,%d6 \|and inc ILOG by 1
				712	addql #1,%d4 \| and inc LEN
				713	fmuls FTEN,%fp2 \| if LEN++, the get 10^^LEN
				714
				715
				716	\| A14. Convert the mantissa to bcd.
				717	\| The binstr routine is used to convert the LEN digit
				718	\| mantissa to bcd in memory. The input to binstr is
				719	\| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
				720	\| such that the decimal point is to the left of bit 63.
				721	\| The bcd digits are stored in the correct position in
				722	\| the final string area in memory.
				723	\|
				724	\|
				725	\| Register usage:
				726	\| Input/Output
				727	\| d0: x/LEN call to binstr - final is 0
				728	\| d1: x/0
				729	\| d2: x/ms 32-bits of mant of abs(YINT)
				730	\| d3: x/ls 32-bits of mant of abs(YINT)
				731	\| d4: LEN/Unchanged
				732	\| d5: ICTR:LAMBDA/LAMBDA:ICTR
				733	\| d6: ILOG
				734	\| d7: k-factor/Unchanged
				735	\| a0: pointer into memory for packed bcd string formation
				736	\| /ptr to first mantissa byte in result string
				737	\| a1: ptr to PTENxx array/Unchanged
				738	\| a2: ptr to FP_SCR2(a6)/Unchanged
				739	\| fp0: int portion of Y/abs(YINT) adjusted
				740	\| fp1: 10^ISCALE/Unchanged
				741	\| fp2: 10^LEN/Unchanged
				742	\| F_SCR1:x/Work area for final result
				743	\| F_SCR2:Y with original exponent/Unchanged
				744	\| L_SCR1:original USER_FPCR/Unchanged
				745	\| L_SCR2:first word of X packed/Unchanged
				746
				747	A14_st:
				748	fmovel #rz_mode,%FPCR \|force rz for conversion
				749	fdivx %fp2,%fp0 \|divide abs(YINT) by 10^LEN
				750	leal FP_SCR1(%a6),%a0
				751	fmovex %fp0,(%a0) \|move abs(YINT)/10^LEN to memory
				752	movel 4(%a0),%d2 \|move 2nd word of FP_RES to d2
				753	movel 8(%a0),%d3 \|move 3rd word of FP_RES to d3
				754	clrl 4(%a0) \|zero word 2 of FP_RES
				755	clrl 8(%a0) \|zero word 3 of FP_RES
				756	movel (%a0),%d0 \|move exponent to d0
				757	swap %d0 \|put exponent in lower word
				758	beqs no_sft \|if zero, don't shift
				759	subil #0x3ffd,%d0 \|sub bias less 2 to make fract
				760	tstl %d0 \|check if > 1
				761	bgts no_sft \|if so, don't shift
				762	negl %d0 \|make exp positive
				763	m_loop:
				764	lsrl #1,%d2 \|shift d2:d3 right, add 0s
				765	roxrl #1,%d3 \|the number of places
				766	dbf %d0,m_loop \|given in d0
				767	no_sft:
				768	tstl %d2 \|check for mantissa of zero
				769	bnes no_zr \|if not, go on
				770	tstl %d3 \|continue zero check
				771	beqs zer_m \|if zero, go directly to binstr
				772	no_zr:
				773	clrl %d1 \|put zero in d1 for addx
				774	addil #0x00000080,%d3 \|inc at bit 7
				775	addxl %d1,%d2 \|continue inc
				776	andil #0xffffff80,%d3 \|strip off lsb not used by 882
				777	zer_m:
				778	movel %d4,%d0 \|put LEN in d0 for binstr call
				779	addql #3,%a0 \|a0 points to M16 byte in result
				780	bsr binstr \|call binstr to convert mant
				781
				782
				783	\| A15. Convert the exponent to bcd.
				784	\| As in A14 above, the exp is converted to bcd and the
				785	\| digits are stored in the final string.
				786	\|
				787	\| Digits are stored in L_SCR1(a6) on return from BINDEC as:
				788	\|
				789	\| 32 16 15 0
				790	\| -----------------------------------------
				791	\| \| 0 \| e3 \| e2 \| e1 \| e4 \| X \| X \| X \|
				792	\| -----------------------------------------
				793	\|
				794	\| And are moved into their proper places in FP_SCR1. If digit e4
				795	\| is non-zero, OPERR is signaled. In all cases, all 4 digits are
				796	\| written as specified in the 881/882 manual for packed decimal.
				797	\|
				798	\| Register usage:
				799	\| Input/Output
				800	\| d0: x/LEN call to binstr - final is 0
				801	\| d1: x/scratch (0);shift count for final exponent packing
				802	\| d2: x/ms 32-bits of exp fraction/scratch
				803	\| d3: x/ls 32-bits of exp fraction
				804	\| d4: LEN/Unchanged
				805	\| d5: ICTR:LAMBDA/LAMBDA:ICTR
				806	\| d6: ILOG
				807	\| d7: k-factor/Unchanged
				808	\| a0: ptr to result string/ptr to L_SCR1(a6)
				809	\| a1: ptr to PTENxx array/Unchanged
				810	\| a2: ptr to FP_SCR2(a6)/Unchanged
				811	\| fp0: abs(YINT) adjusted/float(ILOG)
				812	\| fp1: 10^ISCALE/Unchanged
				813	\| fp2: 10^LEN/Unchanged
				814	\| F_SCR1:Work area for final result/BCD result
				815	\| F_SCR2:Y with original exponent/ILOG/10^4
				816	\| L_SCR1:original USER_FPCR/Exponent digits on return from binstr
				817	\| L_SCR2:first word of X packed/Unchanged
				818
				819	A15_st:
				820	tstb BINDEC_FLG(%a6) \|check for denorm
				821	beqs not_denorm
				822	ftstx %fp0 \|test for zero
				823	fbeq den_zero \|if zero, use k-factor or 4933
				824	fmovel %d6,%fp0 \|float ILOG
				825	fabsx %fp0 \|get abs of ILOG
				826	bras convrt
				827	den_zero:
				828	tstl %d7 \|check sign of the k-factor
				829	blts use_ilog \|if negative, use ILOG
				830	fmoves F4933,%fp0 \|force exponent to 4933
				831	bras convrt \|do it
				832	use_ilog:
				833	fmovel %d6,%fp0 \|float ILOG
				834	fabsx %fp0 \|get abs of ILOG
				835	bras convrt
				836	not_denorm:
				837	ftstx %fp0 \|test for zero
				838	fbne not_zero \|if zero, force exponent
				839	fmoves FONE,%fp0 \|force exponent to 1
				840	bras convrt \|do it
				841	not_zero:
				842	fmovel %d6,%fp0 \|float ILOG
				843	fabsx %fp0 \|get abs of ILOG
				844	convrt:
				845	fdivx 24(%a1),%fp0 \|compute ILOG/10^4
				846	fmovex %fp0,FP_SCR2(%a6) \|store fp0 in memory
				847	movel 4(%a2),%d2 \|move word 2 to d2
				848	movel 8(%a2),%d3 \|move word 3 to d3
				849	movew (%a2),%d0 \|move exp to d0
				850	beqs x_loop_fin \|if zero, skip the shift
				851	subiw #0x3ffd,%d0 \|subtract off bias
				852	negw %d0 \|make exp positive
				853	x_loop:
				854	lsrl #1,%d2 \|shift d2:d3 right
				855	roxrl #1,%d3 \|the number of places
				856	dbf %d0,x_loop \|given in d0
				857	x_loop_fin:
				858	clrl %d1 \|put zero in d1 for addx
				859	addil #0x00000080,%d3 \|inc at bit 6
				860	addxl %d1,%d2 \|continue inc
				861	andil #0xffffff80,%d3 \|strip off lsb not used by 882
				862	movel #4,%d0 \|put 4 in d0 for binstr call
				863	leal L_SCR1(%a6),%a0 \|a0 is ptr to L_SCR1 for exp digits
				864	bsr binstr \|call binstr to convert exp
				865	movel L_SCR1(%a6),%d0 \|load L_SCR1 lword to d0
				866	movel #12,%d1 \|use d1 for shift count
				867	lsrl %d1,%d0 \|shift d0 right by 12
				868	bfins %d0,FP_SCR1(%a6){#4:#12} \|put e3:e2:e1 in FP_SCR1
				869	lsrl %d1,%d0 \|shift d0 right by 12
				870	bfins %d0,FP_SCR1(%a6){#16:#4} \|put e4 in FP_SCR1
				871	tstb %d0 \|check if e4 is zero
				872	beqs A16_st \|if zero, skip rest
				873	orl #opaop_mask,USER_FPSR(%a6) \|set OPERR & AIOP in USER_FPSR
				874
				875
				876	\| A16. Write sign bits to final string.
				877	\| Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
				878	\|
				879	\| Register usage:
				880	\| Input/Output
				881	\| d0: x/scratch - final is x
				882	\| d2: x/x
				883	\| d3: x/x
				884	\| d4: LEN/Unchanged
				885	\| d5: ICTR:LAMBDA/LAMBDA:ICTR
				886	\| d6: ILOG/ILOG adjusted
				887	\| d7: k-factor/Unchanged
				888	\| a0: ptr to L_SCR1(a6)/Unchanged
				889	\| a1: ptr to PTENxx array/Unchanged
				890	\| a2: ptr to FP_SCR2(a6)/Unchanged
				891	\| fp0: float(ILOG)/Unchanged
				892	\| fp1: 10^ISCALE/Unchanged
				893	\| fp2: 10^LEN/Unchanged
				894	\| F_SCR1:BCD result with correct signs
				895	\| F_SCR2:ILOG/10^4
				896	\| L_SCR1:Exponent digits on return from binstr
				897	\| L_SCR2:first word of X packed/Unchanged
				898
				899	A16_st:
				900	clrl %d0 \|clr d0 for collection of signs
				901	andib #0x0f,FP_SCR1(%a6) \|clear first nibble of FP_SCR1
				902	tstl L_SCR2(%a6) \|check sign of original mantissa
				903	bges mant_p \|if pos, don't set SM
				904	moveql #2,%d0 \|move 2 in to d0 for SM
				905	mant_p:
				906	tstl %d6 \|check sign of ILOG
				907	bges wr_sgn \|if pos, don't set SE
				908	addql #1,%d0 \|set bit 0 in d0 for SE
				909	wr_sgn:
				910	bfins %d0,FP_SCR1(%a6){#0:#2} \|insert SM and SE into FP_SCR1
				911
				912	\| Clean up and restore all registers used.
				913
				914	fmovel #0,%FPSR \|clear possible inex2/ainex bits
				915	fmovemx (%a7)+,%fp0-%fp2
				916	moveml (%a7)+,%d2-%d7/%a2
				917	rts
				918
				919	\|end