| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  *  linux/arch/arm/lib/div64.S | 
 | 3 |  * | 
 | 4 |  *  Optimized computation of 64-bit dividend / 32-bit divisor | 
 | 5 |  * | 
 | 6 |  *  Author:	Nicolas Pitre | 
 | 7 |  *  Created:	Oct 5, 2003 | 
 | 8 |  *  Copyright:	Monta Vista Software, Inc. | 
 | 9 |  * | 
 | 10 |  *  This program is free software; you can redistribute it and/or modify | 
 | 11 |  *  it under the terms of the GNU General Public License version 2 as | 
 | 12 |  *  published by the Free Software Foundation. | 
 | 13 |  */ | 
 | 14 |  | 
 | 15 | #include <linux/linkage.h> | 
 | 16 |  | 
 | 17 | #ifdef __ARMEB__ | 
 | 18 | #define xh r0 | 
 | 19 | #define xl r1 | 
 | 20 | #define yh r2 | 
 | 21 | #define yl r3 | 
 | 22 | #else | 
 | 23 | #define xl r0 | 
 | 24 | #define xh r1 | 
 | 25 | #define yl r2 | 
 | 26 | #define yh r3 | 
 | 27 | #endif | 
 | 28 |  | 
 | 29 | /* | 
 | 30 |  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. | 
 | 31 |  * | 
 | 32 |  * Note: Calling convention is totally non standard for optimal code. | 
 | 33 |  *       This is meant to be used by do_div() from include/asm/div64.h only. | 
 | 34 |  * | 
 | 35 |  * Input parameters: | 
 | 36 |  * 	xh-xl	= dividend (clobbered) | 
 | 37 |  * 	r4	= divisor (preserved) | 
 | 38 |  * | 
 | 39 |  * Output values: | 
 | 40 |  * 	yh-yl	= result | 
 | 41 |  * 	xh	= remainder | 
 | 42 |  * | 
 | 43 |  * Clobbered regs: xl, ip | 
 | 44 |  */ | 
 | 45 |  | 
 | 46 | ENTRY(__do_div64) | 
 | 47 |  | 
 | 48 | 	@ Test for easy paths first. | 
 | 49 | 	subs	ip, r4, #1 | 
 | 50 | 	bls	9f			@ divisor is 0 or 1 | 
 | 51 | 	tst	ip, r4 | 
 | 52 | 	beq	8f			@ divisor is power of 2 | 
 | 53 |  | 
 | 54 | 	@ See if we need to handle upper 32-bit result. | 
 | 55 | 	cmp	xh, r4 | 
 | 56 | 	mov	yh, #0 | 
 | 57 | 	blo	3f | 
 | 58 |  | 
 | 59 | 	@ Align divisor with upper part of dividend. | 
 | 60 | 	@ The aligned divisor is stored in yl preserving the original. | 
 | 61 | 	@ The bit position is stored in ip. | 
 | 62 |  | 
 | 63 | #if __LINUX_ARM_ARCH__ >= 5 | 
 | 64 |  | 
 | 65 | 	clz	yl, r4 | 
 | 66 | 	clz	ip, xh | 
 | 67 | 	sub	yl, yl, ip | 
 | 68 | 	mov	ip, #1 | 
 | 69 | 	mov	ip, ip, lsl yl | 
 | 70 | 	mov	yl, r4, lsl yl | 
 | 71 |  | 
 | 72 | #else | 
 | 73 |  | 
 | 74 | 	mov	yl, r4 | 
 | 75 | 	mov	ip, #1 | 
 | 76 | 1:	cmp	yl, #0x80000000 | 
 | 77 | 	cmpcc	yl, xh | 
 | 78 | 	movcc	yl, yl, lsl #1 | 
 | 79 | 	movcc	ip, ip, lsl #1 | 
 | 80 | 	bcc	1b | 
 | 81 |  | 
 | 82 | #endif | 
 | 83 |  | 
 | 84 | 	@ The division loop for needed upper bit positions. | 
 | 85 |  	@ Break out early if dividend reaches 0. | 
 | 86 | 2:	cmp	xh, yl | 
 | 87 | 	orrcs	yh, yh, ip | 
 | 88 | 	subcss	xh, xh, yl | 
 | 89 | 	movnes	ip, ip, lsr #1 | 
 | 90 | 	mov	yl, yl, lsr #1 | 
 | 91 | 	bne	2b | 
 | 92 |  | 
 | 93 | 	@ See if we need to handle lower 32-bit result. | 
 | 94 | 3:	cmp	xh, #0 | 
 | 95 | 	mov	yl, #0 | 
 | 96 | 	cmpeq	xl, r4 | 
 | 97 | 	movlo	xh, xl | 
 | 98 | 	movlo	pc, lr | 
 | 99 |  | 
 | 100 | 	@ The division loop for lower bit positions. | 
 | 101 | 	@ Here we shift remainer bits leftwards rather than moving the | 
 | 102 | 	@ divisor for comparisons, considering the carry-out bit as well. | 
 | 103 | 	mov	ip, #0x80000000 | 
 | 104 | 4:	movs	xl, xl, lsl #1 | 
 | 105 | 	adcs	xh, xh, xh | 
 | 106 | 	beq	6f | 
 | 107 | 	cmpcc	xh, r4 | 
 | 108 | 5:	orrcs	yl, yl, ip | 
 | 109 | 	subcs	xh, xh, r4 | 
 | 110 | 	movs	ip, ip, lsr #1 | 
 | 111 | 	bne	4b | 
 | 112 | 	mov	pc, lr | 
 | 113 |  | 
 | 114 | 	@ The top part of remainder became zero.  If carry is set | 
 | 115 | 	@ (the 33th bit) this is a false positive so resume the loop. | 
 | 116 | 	@ Otherwise, if lower part is also null then we are done. | 
 | 117 | 6:	bcs	5b | 
 | 118 | 	cmp	xl, #0 | 
 | 119 | 	moveq	pc, lr | 
 | 120 |  | 
 | 121 | 	@ We still have remainer bits in the low part.  Bring them up. | 
 | 122 |  | 
 | 123 | #if __LINUX_ARM_ARCH__ >= 5 | 
 | 124 |  | 
 | 125 | 	clz	xh, xl			@ we know xh is zero here so... | 
 | 126 | 	add	xh, xh, #1 | 
 | 127 | 	mov	xl, xl, lsl xh | 
 | 128 | 	mov	ip, ip, lsr xh | 
 | 129 |  | 
 | 130 | #else | 
 | 131 |  | 
 | 132 | 7:	movs	xl, xl, lsl #1 | 
 | 133 | 	mov	ip, ip, lsr #1 | 
 | 134 | 	bcc	7b | 
 | 135 |  | 
 | 136 | #endif | 
 | 137 |  | 
 | 138 | 	@ Current remainder is now 1.  It is worthless to compare with | 
 | 139 | 	@ divisor at this point since divisor can not be smaller than 3 here. | 
 | 140 | 	@ If possible, branch for another shift in the division loop. | 
 | 141 | 	@ If no bit position left then we are done. | 
 | 142 | 	movs	ip, ip, lsr #1 | 
 | 143 | 	mov	xh, #1 | 
 | 144 | 	bne	4b | 
 | 145 | 	mov	pc, lr | 
 | 146 |  | 
 | 147 | 8:	@ Division by a power of 2: determine what that divisor order is | 
 | 148 | 	@ then simply shift values around | 
 | 149 |  | 
 | 150 | #if __LINUX_ARM_ARCH__ >= 5 | 
 | 151 |  | 
 | 152 | 	clz	ip, r4 | 
 | 153 | 	rsb	ip, ip, #31 | 
 | 154 |  | 
 | 155 | #else | 
 | 156 |  | 
 | 157 | 	mov	yl, r4 | 
 | 158 | 	cmp	r4, #(1 << 16) | 
 | 159 | 	mov	ip, #0 | 
 | 160 | 	movhs	yl, yl, lsr #16 | 
 | 161 | 	movhs	ip, #16 | 
 | 162 |  | 
 | 163 | 	cmp	yl, #(1 << 8) | 
 | 164 | 	movhs	yl, yl, lsr #8 | 
 | 165 | 	addhs	ip, ip, #8 | 
 | 166 |  | 
 | 167 | 	cmp	yl, #(1 << 4) | 
 | 168 | 	movhs	yl, yl, lsr #4 | 
 | 169 | 	addhs	ip, ip, #4 | 
 | 170 |  | 
 | 171 | 	cmp	yl, #(1 << 2) | 
 | 172 | 	addhi	ip, ip, #3 | 
 | 173 | 	addls	ip, ip, yl, lsr #1 | 
 | 174 |  | 
 | 175 | #endif | 
 | 176 |  | 
 | 177 | 	mov	yh, xh, lsr ip | 
 | 178 | 	mov	yl, xl, lsr ip | 
 | 179 | 	rsb	ip, ip, #32 | 
 | 180 | 	orr	yl, yl, xh, lsl ip | 
 | 181 | 	mov	xh, xl, lsl ip | 
 | 182 | 	mov	xh, xh, lsr ip | 
 | 183 | 	mov	pc, lr | 
 | 184 |  | 
 | 185 | 	@ eq -> division by 1: obvious enough... | 
 | 186 | 9:	moveq	yl, xl | 
 | 187 | 	moveq	yh, xh | 
 | 188 | 	moveq	xh, #0 | 
 | 189 | 	moveq	pc, lr | 
 | 190 |  | 
 | 191 | 	@ Division by 0: | 
| Nicolas Pitre | 1d6760a | 2006-05-16 11:29:46 +0100 | [diff] [blame] | 192 | 	str	lr, [sp, #-8]! | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 193 | 	bl	__div0 | 
 | 194 |  | 
 | 195 | 	@ as wrong as it could be... | 
 | 196 | 	mov	yl, #0 | 
 | 197 | 	mov	yh, #0 | 
 | 198 | 	mov	xh, #0 | 
| Nicolas Pitre | 1d6760a | 2006-05-16 11:29:46 +0100 | [diff] [blame] | 199 | 	ldr	pc, [sp], #8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 200 |  |