| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  linux/arch/arm/lib/div64.S | 
|  | 3 | * | 
|  | 4 | *  Optimized computation of 64-bit dividend / 32-bit divisor | 
|  | 5 | * | 
|  | 6 | *  Author:	Nicolas Pitre | 
|  | 7 | *  Created:	Oct 5, 2003 | 
|  | 8 | *  Copyright:	Monta Vista Software, Inc. | 
|  | 9 | * | 
|  | 10 | *  This program is free software; you can redistribute it and/or modify | 
|  | 11 | *  it under the terms of the GNU General Public License version 2 as | 
|  | 12 | *  published by the Free Software Foundation. | 
|  | 13 | */ | 
|  | 14 |  | 
|  | 15 | #include <linux/linkage.h> | 
|  | 16 |  | 
|  | 17 | #ifdef __ARMEB__ | 
|  | 18 | #define xh r0 | 
|  | 19 | #define xl r1 | 
|  | 20 | #define yh r2 | 
|  | 21 | #define yl r3 | 
|  | 22 | #else | 
|  | 23 | #define xl r0 | 
|  | 24 | #define xh r1 | 
|  | 25 | #define yl r2 | 
|  | 26 | #define yh r3 | 
|  | 27 | #endif | 
|  | 28 |  | 
|  | 29 | /* | 
|  | 30 | * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. | 
|  | 31 | * | 
|  | 32 | * Note: Calling convention is totally non standard for optimal code. | 
|  | 33 | *       This is meant to be used by do_div() from include/asm/div64.h only. | 
|  | 34 | * | 
|  | 35 | * Input parameters: | 
|  | 36 | * 	xh-xl	= dividend (clobbered) | 
|  | 37 | * 	r4	= divisor (preserved) | 
|  | 38 | * | 
|  | 39 | * Output values: | 
|  | 40 | * 	yh-yl	= result | 
|  | 41 | * 	xh	= remainder | 
|  | 42 | * | 
|  | 43 | * Clobbered regs: xl, ip | 
|  | 44 | */ | 
|  | 45 |  | 
|  | 46 | ENTRY(__do_div64) | 
|  | 47 |  | 
|  | 48 | @ Test for easy paths first. | 
|  | 49 | subs	ip, r4, #1 | 
|  | 50 | bls	9f			@ divisor is 0 or 1 | 
|  | 51 | tst	ip, r4 | 
|  | 52 | beq	8f			@ divisor is power of 2 | 
|  | 53 |  | 
|  | 54 | @ See if we need to handle upper 32-bit result. | 
|  | 55 | cmp	xh, r4 | 
|  | 56 | mov	yh, #0 | 
|  | 57 | blo	3f | 
|  | 58 |  | 
|  | 59 | @ Align divisor with upper part of dividend. | 
|  | 60 | @ The aligned divisor is stored in yl preserving the original. | 
|  | 61 | @ The bit position is stored in ip. | 
|  | 62 |  | 
|  | 63 | #if __LINUX_ARM_ARCH__ >= 5 | 
|  | 64 |  | 
|  | 65 | clz	yl, r4 | 
|  | 66 | clz	ip, xh | 
|  | 67 | sub	yl, yl, ip | 
|  | 68 | mov	ip, #1 | 
|  | 69 | mov	ip, ip, lsl yl | 
|  | 70 | mov	yl, r4, lsl yl | 
|  | 71 |  | 
|  | 72 | #else | 
|  | 73 |  | 
|  | 74 | mov	yl, r4 | 
|  | 75 | mov	ip, #1 | 
|  | 76 | 1:	cmp	yl, #0x80000000 | 
|  | 77 | cmpcc	yl, xh | 
|  | 78 | movcc	yl, yl, lsl #1 | 
|  | 79 | movcc	ip, ip, lsl #1 | 
|  | 80 | bcc	1b | 
|  | 81 |  | 
|  | 82 | #endif | 
|  | 83 |  | 
|  | 84 | @ The division loop for needed upper bit positions. | 
|  | 85 | @ Break out early if dividend reaches 0. | 
|  | 86 | 2:	cmp	xh, yl | 
|  | 87 | orrcs	yh, yh, ip | 
|  | 88 | subcss	xh, xh, yl | 
|  | 89 | movnes	ip, ip, lsr #1 | 
|  | 90 | mov	yl, yl, lsr #1 | 
|  | 91 | bne	2b | 
|  | 92 |  | 
|  | 93 | @ See if we need to handle lower 32-bit result. | 
|  | 94 | 3:	cmp	xh, #0 | 
|  | 95 | mov	yl, #0 | 
|  | 96 | cmpeq	xl, r4 | 
|  | 97 | movlo	xh, xl | 
|  | 98 | movlo	pc, lr | 
|  | 99 |  | 
|  | 100 | @ The division loop for lower bit positions. | 
|  | 101 | @ Here we shift remainer bits leftwards rather than moving the | 
|  | 102 | @ divisor for comparisons, considering the carry-out bit as well. | 
|  | 103 | mov	ip, #0x80000000 | 
|  | 104 | 4:	movs	xl, xl, lsl #1 | 
|  | 105 | adcs	xh, xh, xh | 
|  | 106 | beq	6f | 
|  | 107 | cmpcc	xh, r4 | 
|  | 108 | 5:	orrcs	yl, yl, ip | 
|  | 109 | subcs	xh, xh, r4 | 
|  | 110 | movs	ip, ip, lsr #1 | 
|  | 111 | bne	4b | 
|  | 112 | mov	pc, lr | 
|  | 113 |  | 
|  | 114 | @ The top part of remainder became zero.  If carry is set | 
|  | 115 | @ (the 33th bit) this is a false positive so resume the loop. | 
|  | 116 | @ Otherwise, if lower part is also null then we are done. | 
|  | 117 | 6:	bcs	5b | 
|  | 118 | cmp	xl, #0 | 
|  | 119 | moveq	pc, lr | 
|  | 120 |  | 
|  | 121 | @ We still have remainer bits in the low part.  Bring them up. | 
|  | 122 |  | 
|  | 123 | #if __LINUX_ARM_ARCH__ >= 5 | 
|  | 124 |  | 
|  | 125 | clz	xh, xl			@ we know xh is zero here so... | 
|  | 126 | add	xh, xh, #1 | 
|  | 127 | mov	xl, xl, lsl xh | 
|  | 128 | mov	ip, ip, lsr xh | 
|  | 129 |  | 
|  | 130 | #else | 
|  | 131 |  | 
|  | 132 | 7:	movs	xl, xl, lsl #1 | 
|  | 133 | mov	ip, ip, lsr #1 | 
|  | 134 | bcc	7b | 
|  | 135 |  | 
|  | 136 | #endif | 
|  | 137 |  | 
|  | 138 | @ Current remainder is now 1.  It is worthless to compare with | 
|  | 139 | @ divisor at this point since divisor can not be smaller than 3 here. | 
|  | 140 | @ If possible, branch for another shift in the division loop. | 
|  | 141 | @ If no bit position left then we are done. | 
|  | 142 | movs	ip, ip, lsr #1 | 
|  | 143 | mov	xh, #1 | 
|  | 144 | bne	4b | 
|  | 145 | mov	pc, lr | 
|  | 146 |  | 
|  | 147 | 8:	@ Division by a power of 2: determine what that divisor order is | 
|  | 148 | @ then simply shift values around | 
|  | 149 |  | 
|  | 150 | #if __LINUX_ARM_ARCH__ >= 5 | 
|  | 151 |  | 
|  | 152 | clz	ip, r4 | 
|  | 153 | rsb	ip, ip, #31 | 
|  | 154 |  | 
|  | 155 | #else | 
|  | 156 |  | 
|  | 157 | mov	yl, r4 | 
|  | 158 | cmp	r4, #(1 << 16) | 
|  | 159 | mov	ip, #0 | 
|  | 160 | movhs	yl, yl, lsr #16 | 
|  | 161 | movhs	ip, #16 | 
|  | 162 |  | 
|  | 163 | cmp	yl, #(1 << 8) | 
|  | 164 | movhs	yl, yl, lsr #8 | 
|  | 165 | addhs	ip, ip, #8 | 
|  | 166 |  | 
|  | 167 | cmp	yl, #(1 << 4) | 
|  | 168 | movhs	yl, yl, lsr #4 | 
|  | 169 | addhs	ip, ip, #4 | 
|  | 170 |  | 
|  | 171 | cmp	yl, #(1 << 2) | 
|  | 172 | addhi	ip, ip, #3 | 
|  | 173 | addls	ip, ip, yl, lsr #1 | 
|  | 174 |  | 
|  | 175 | #endif | 
|  | 176 |  | 
|  | 177 | mov	yh, xh, lsr ip | 
|  | 178 | mov	yl, xl, lsr ip | 
|  | 179 | rsb	ip, ip, #32 | 
| Catalin Marinas | 8b59278 | 2009-07-24 12:32:57 +0100 | [diff] [blame] | 180 | ARM(	orr	yl, yl, xh, lsl ip	) | 
|  | 181 | THUMB(	lsl	xh, xh, ip		) | 
|  | 182 | THUMB(	orr	yl, yl, xh		) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | mov	xh, xl, lsl ip | 
|  | 184 | mov	xh, xh, lsr ip | 
|  | 185 | mov	pc, lr | 
|  | 186 |  | 
|  | 187 | @ eq -> division by 1: obvious enough... | 
|  | 188 | 9:	moveq	yl, xl | 
|  | 189 | moveq	yh, xh | 
|  | 190 | moveq	xh, #0 | 
|  | 191 | moveq	pc, lr | 
|  | 192 |  | 
|  | 193 | @ Division by 0: | 
| Nicolas Pitre | 1d6760a | 2006-05-16 11:29:46 +0100 | [diff] [blame] | 194 | str	lr, [sp, #-8]! | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 195 | bl	__div0 | 
|  | 196 |  | 
|  | 197 | @ as wrong as it could be... | 
|  | 198 | mov	yl, #0 | 
|  | 199 | mov	yh, #0 | 
|  | 200 | mov	xh, #0 | 
| Nicolas Pitre | 1d6760a | 2006-05-16 11:29:46 +0100 | [diff] [blame] | 201 | ldr	pc, [sp], #8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 202 |  | 
| Catalin Marinas | 93ed397 | 2008-08-28 11:22:32 +0100 | [diff] [blame] | 203 | ENDPROC(__do_div64) |