blob: e55c4842c290fa6267e9998eb9dc4bd28f8cfcef [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/div64.S
3 *
4 * Optimized computation of 64-bit dividend / 32-bit divisor
5 *
6 * Author: Nicolas Pitre
7 * Created: Oct 5, 2003
8 * Copyright: Monta Vista Software, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/linkage.h>
Laura Abbott533d03b2011-10-06 20:53:14 +010016#include <asm/unwind.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017
18#ifdef __ARMEB__
19#define xh r0
20#define xl r1
21#define yh r2
22#define yl r3
23#else
24#define xl r0
25#define xh r1
26#define yl r2
27#define yh r3
28#endif
29
30/*
31 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
32 *
33 * Note: Calling convention is totally non standard for optimal code.
34 * This is meant to be used by do_div() from include/asm/div64.h only.
35 *
36 * Input parameters:
37 * xh-xl = dividend (clobbered)
38 * r4 = divisor (preserved)
39 *
40 * Output values:
41 * yh-yl = result
42 * xh = remainder
43 *
44 * Clobbered regs: xl, ip
45 */
46
47ENTRY(__do_div64)
Laura Abbott533d03b2011-10-06 20:53:14 +010048UNWIND(.fnstart)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50 @ Test for easy paths first.
51 subs ip, r4, #1
52 bls 9f @ divisor is 0 or 1
53 tst ip, r4
54 beq 8f @ divisor is power of 2
55
56 @ See if we need to handle upper 32-bit result.
57 cmp xh, r4
58 mov yh, #0
59 blo 3f
60
61 @ Align divisor with upper part of dividend.
62 @ The aligned divisor is stored in yl preserving the original.
63 @ The bit position is stored in ip.
64
65#if __LINUX_ARM_ARCH__ >= 5
66
67 clz yl, r4
68 clz ip, xh
69 sub yl, yl, ip
70 mov ip, #1
71 mov ip, ip, lsl yl
72 mov yl, r4, lsl yl
73
74#else
75
76 mov yl, r4
77 mov ip, #1
781: cmp yl, #0x80000000
79 cmpcc yl, xh
80 movcc yl, yl, lsl #1
81 movcc ip, ip, lsl #1
82 bcc 1b
83
84#endif
85
86 @ The division loop for needed upper bit positions.
87 @ Break out early if dividend reaches 0.
882: cmp xh, yl
89 orrcs yh, yh, ip
90 subcss xh, xh, yl
91 movnes ip, ip, lsr #1
92 mov yl, yl, lsr #1
93 bne 2b
94
95 @ See if we need to handle lower 32-bit result.
963: cmp xh, #0
97 mov yl, #0
98 cmpeq xl, r4
99 movlo xh, xl
100 movlo pc, lr
101
102 @ The division loop for lower bit positions.
103 @ Here we shift remainer bits leftwards rather than moving the
104 @ divisor for comparisons, considering the carry-out bit as well.
105 mov ip, #0x80000000
1064: movs xl, xl, lsl #1
107 adcs xh, xh, xh
108 beq 6f
109 cmpcc xh, r4
1105: orrcs yl, yl, ip
111 subcs xh, xh, r4
112 movs ip, ip, lsr #1
113 bne 4b
114 mov pc, lr
115
116 @ The top part of remainder became zero. If carry is set
117 @ (the 33th bit) this is a false positive so resume the loop.
118 @ Otherwise, if lower part is also null then we are done.
1196: bcs 5b
120 cmp xl, #0
121 moveq pc, lr
122
123 @ We still have remainer bits in the low part. Bring them up.
124
125#if __LINUX_ARM_ARCH__ >= 5
126
127 clz xh, xl @ we know xh is zero here so...
128 add xh, xh, #1
129 mov xl, xl, lsl xh
130 mov ip, ip, lsr xh
131
132#else
133
1347: movs xl, xl, lsl #1
135 mov ip, ip, lsr #1
136 bcc 7b
137
138#endif
139
140 @ Current remainder is now 1. It is worthless to compare with
141 @ divisor at this point since divisor can not be smaller than 3 here.
142 @ If possible, branch for another shift in the division loop.
143 @ If no bit position left then we are done.
144 movs ip, ip, lsr #1
145 mov xh, #1
146 bne 4b
147 mov pc, lr
148
1498: @ Division by a power of 2: determine what that divisor order is
150 @ then simply shift values around
151
152#if __LINUX_ARM_ARCH__ >= 5
153
154 clz ip, r4
155 rsb ip, ip, #31
156
157#else
158
159 mov yl, r4
160 cmp r4, #(1 << 16)
161 mov ip, #0
162 movhs yl, yl, lsr #16
163 movhs ip, #16
164
165 cmp yl, #(1 << 8)
166 movhs yl, yl, lsr #8
167 addhs ip, ip, #8
168
169 cmp yl, #(1 << 4)
170 movhs yl, yl, lsr #4
171 addhs ip, ip, #4
172
173 cmp yl, #(1 << 2)
174 addhi ip, ip, #3
175 addls ip, ip, yl, lsr #1
176
177#endif
178
179 mov yh, xh, lsr ip
180 mov yl, xl, lsr ip
181 rsb ip, ip, #32
Catalin Marinas8b592782009-07-24 12:32:57 +0100182 ARM( orr yl, yl, xh, lsl ip )
183 THUMB( lsl xh, xh, ip )
184 THUMB( orr yl, yl, xh )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 mov xh, xl, lsl ip
186 mov xh, xh, lsr ip
187 mov pc, lr
188
189 @ eq -> division by 1: obvious enough...
1909: moveq yl, xl
191 moveq yh, xh
192 moveq xh, #0
193 moveq pc, lr
Laura Abbott533d03b2011-10-06 20:53:14 +0100194UNWIND(.fnend)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Laura Abbott533d03b2011-10-06 20:53:14 +0100196UNWIND(.fnstart)
197UNWIND(.pad #4)
198UNWIND(.save {lr})
199Ldiv0_64:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 @ Division by 0:
Nicolas Pitre1d6760a2006-05-16 11:29:46 +0100201 str lr, [sp, #-8]!
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 bl __div0
203
204 @ as wrong as it could be...
205 mov yl, #0
206 mov yh, #0
207 mov xh, #0
Nicolas Pitre1d6760a2006-05-16 11:29:46 +0100208 ldr pc, [sp], #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
Laura Abbott533d03b2011-10-06 20:53:14 +0100210UNWIND(.fnend)
Catalin Marinas93ed3972008-08-28 11:22:32 +0100211ENDPROC(__do_div64)