| Aurelien Jacquiot | 09831ca | 2011-10-04 11:15:51 -0400 | [diff] [blame] | 1 | ;;  Copyright 2011  Free Software Foundation, Inc. | 
 | 2 | ;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>. | 
 | 3 | ;; | 
 | 4 | ;; This program is free software; you can redistribute it and/or modify | 
 | 5 | ;; it under the terms of the GNU General Public License as published by | 
 | 6 | ;; the Free Software Foundation; either version 2 of the License, or | 
 | 7 | ;; (at your option) any later version. | 
 | 8 | ;; | 
 | 9 | ;; This program is distributed in the hope that it will be useful, | 
 | 10 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 11 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 | 12 | ;; GNU General Public License for more details. | 
 | 13 | ;; | 
 | 14 | ;; You should have received a copy of the GNU General Public License | 
 | 15 | ;; along with this program; if not, write to the Free Software | 
 | 16 | ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 
 | 17 |  | 
 | 18 | #include <linux/linkage.h> | 
 | 19 |  | 
 | 20 | 	.text | 
 | 21 | ENTRY(__c6xabi_divremu) | 
 | 22 | 	;; We use a series of up to 31 subc instructions.  First, we find | 
 | 23 | 	;; out how many leading zero bits there are in the divisor.  This | 
 | 24 | 	;; gives us both a shift count for aligning (shifting) the divisor | 
 | 25 | 	;; to the, and the number of times we have to execute subc. | 
 | 26 |  | 
 | 27 | 	;; At the end, we have both the remainder and most of the quotient | 
 | 28 | 	;; in A4.  The top bit of the quotient is computed first and is | 
 | 29 | 	;; placed in A2. | 
 | 30 |  | 
 | 31 | 	;; Return immediately if the dividend is zero.	Setting B4 to 1 | 
 | 32 | 	;; is a trick to allow us to leave the following insns in the jump | 
 | 33 | 	;; delay slot without affecting the result. | 
 | 34 | 	mv	.s2x	A4, B1 | 
 | 35 |  | 
 | 36 |   [b1]	lmbd	.l2	1, B4, B1 | 
 | 37 | ||[!b1] b	.s2	B3	; RETURN A | 
 | 38 | ||[!b1] mvk	.d2	1, B4 | 
 | 39 |  | 
 | 40 | ||[!b1] zero	.s1	A5 | 
 | 41 | 	mv	.l1x	B1, A6 | 
 | 42 | ||	shl	.s2	B4, B1, B4 | 
 | 43 |  | 
 | 44 | 	;; The loop performs a maximum of 28 steps, so we do the | 
 | 45 | 	;; first 3 here. | 
 | 46 | 	cmpltu	.l1x	A4, B4, A2 | 
 | 47 |   [!A2]	sub	.l1x	A4, B4, A4 | 
 | 48 | ||	shru	.s2	B4, 1, B4 | 
 | 49 | ||	xor	.s1	1, A2, A2 | 
 | 50 |  | 
 | 51 | 	shl	.s1	A2, 31, A2 | 
 | 52 | || [b1]	subc	.l1x	A4,B4,A4 | 
 | 53 | || [b1]	add	.s2	-1, B1, B1 | 
 | 54 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 55 | || [b1]	add	.s2	-1, B1, B1 | 
 | 56 |  | 
 | 57 | 	;; RETURN A may happen here (note: must happen before the next branch) | 
 | 58 | __divremu0: | 
 | 59 | 	cmpgt	.l2	B1, 7, B0 | 
 | 60 | || [b1]	subc	.l1x	A4,B4,A4 | 
 | 61 | || [b1]	add	.s2	-1, B1, B1 | 
 | 62 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 63 | || [b1]	add	.s2	-1, B1, B1 | 
 | 64 | || [b0] b	.s1	__divremu0 | 
 | 65 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 66 | || [b1]	add	.s2	-1, B1, B1 | 
 | 67 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 68 | || [b1]	add	.s2	-1, B1, B1 | 
 | 69 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 70 | || [b1]	add	.s2	-1, B1, B1 | 
 | 71 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 72 | || [b1]	add	.s2	-1, B1, B1 | 
 | 73 |    [b1]	subc	.l1x	A4,B4,A4 | 
 | 74 | || [b1]	add	.s2	-1, B1, B1 | 
 | 75 | 	;; loop backwards branch happens here | 
 | 76 |  | 
 | 77 | 	ret	.s2	B3 | 
 | 78 | ||	mvk	.s1	32, A1 | 
 | 79 | 	sub	.l1	A1, A6, A6 | 
 | 80 | ||	extu	.s1	A4, A6, A5 | 
 | 81 | 	shl	.s1	A4, A6, A4 | 
 | 82 | 	shru	.s1	A4, 1, A4 | 
 | 83 | ||	sub	.l1	A6, 1, A6 | 
 | 84 | 	or	.l1	A2, A4, A4 | 
 | 85 | 	shru	.s1	A4, A6, A4 | 
 | 86 | 	nop | 
 | 87 | ENDPROC(__c6xabi_divremu) |