| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | @ libgcc1 routines for ARM cpu. | 
|  | 2 | @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) | 
|  | 3 |  | 
|  | 4 | /* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. | 
|  | 5 |  | 
|  | 6 | This file is free software; you can redistribute it and/or modify it | 
|  | 7 | under the terms of the GNU General Public License as published by the | 
|  | 8 | Free Software Foundation; either version 2, or (at your option) any | 
|  | 9 | later version. | 
|  | 10 |  | 
|  | 11 | In addition to the permissions in the GNU General Public License, the | 
|  | 12 | Free Software Foundation gives you unlimited permission to link the | 
|  | 13 | compiled version of this file with other programs, and to distribute | 
|  | 14 | those programs without any restriction coming from the use of this | 
|  | 15 | file.  (The General Public License restrictions do apply in other | 
|  | 16 | respects; for example, they cover modification of the file, and | 
|  | 17 | distribution when not linked into another program.) | 
|  | 18 |  | 
|  | 19 | This file is distributed in the hope that it will be useful, but | 
|  | 20 | WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 22 | General Public License for more details. | 
|  | 23 |  | 
|  | 24 | You should have received a copy of the GNU General Public License | 
|  | 25 | along with this program; see the file COPYING.  If not, write to | 
|  | 26 | the Free Software Foundation, 59 Temple Place - Suite 330, | 
|  | 27 | Boston, MA 02111-1307, USA.  */ | 
|  | 28 |  | 
|  | 29 | /* As a special exception, if you link this library with other files, | 
|  | 30 | some of which are compiled with GCC, to produce an executable, | 
|  | 31 | this library does not by itself cause the resulting executable | 
|  | 32 | to be covered by the GNU General Public License. | 
|  | 33 | This exception does not however invalidate any other reasons why | 
|  | 34 | the executable file might be covered by the GNU General Public License. | 
|  | 35 | */ | 
|  | 36 | /* This code is derived from gcc 2.95.3 */ | 
|  | 37 | /* I Molton     29/07/01 */ | 
|  | 38 |  | 
|  | 39 | #include <linux/linkage.h> | 
|  | 40 | #include <asm/assembler.h> | 
|  | 41 | #include <asm/hardware.h> | 
|  | 42 | #include <linux/config.h> | 
|  | 43 |  | 
|  | 44 | #define RET	movs | 
|  | 45 | #define RETc(x)	mov##x##s | 
|  | 46 | #define RETCOND ^ | 
|  | 47 |  | 
|  | 48 | dividend	.req	r0 | 
|  | 49 | divisor		.req	r1 | 
|  | 50 | result		.req	r2 | 
|  | 51 | overdone        .req    r2 | 
|  | 52 | curbit		.req	r3 | 
|  | 53 | ip		.req	r12 | 
|  | 54 | sp		.req	r13 | 
|  | 55 | lr		.req	r14 | 
|  | 56 | pc		.req	r15 | 
|  | 57 |  | 
|  | 58 | ENTRY(__udivsi3) | 
|  | 59 | cmp	divisor, #0 | 
|  | 60 | beq	Ldiv0 | 
|  | 61 | mov	curbit, #1 | 
|  | 62 | mov	result, #0 | 
|  | 63 | cmp	dividend, divisor | 
|  | 64 | bcc	Lgot_result_udivsi3 | 
|  | 65 | 1: | 
|  | 66 | @ Unless the divisor is very big, shift it up in multiples of | 
|  | 67 | @ four bits, since this is the amount of unwinding in the main | 
|  | 68 | @ division loop.  Continue shifting until the divisor is | 
|  | 69 | @ larger than the dividend. | 
|  | 70 | cmp	divisor, #0x10000000 | 
|  | 71 | cmpcc	divisor, dividend | 
|  | 72 | movcc	divisor, divisor, lsl #4 | 
|  | 73 | movcc	curbit, curbit, lsl #4 | 
|  | 74 | bcc	1b | 
|  | 75 |  | 
|  | 76 | 2: | 
|  | 77 | @ For very big divisors, we must shift it a bit at a time, or | 
|  | 78 | @ we will be in danger of overflowing. | 
|  | 79 | cmp	divisor, #0x80000000 | 
|  | 80 | cmpcc	divisor, dividend | 
|  | 81 | movcc	divisor, divisor, lsl #1 | 
|  | 82 | movcc	curbit, curbit, lsl #1 | 
|  | 83 | bcc	2b | 
|  | 84 |  | 
|  | 85 | 3: | 
|  | 86 | @ Test for possible subtractions, and note which bits | 
|  | 87 | @ are done in the result.  On the final pass, this may subtract | 
|  | 88 | @ too much from the dividend, but the result will be ok, since the | 
|  | 89 | @ "bit" will have been shifted out at the bottom. | 
|  | 90 | cmp	dividend, divisor | 
|  | 91 | subcs	dividend, dividend, divisor | 
|  | 92 | orrcs	result, result, curbit | 
|  | 93 | cmp	dividend, divisor, lsr #1 | 
|  | 94 | subcs	dividend, dividend, divisor, lsr #1 | 
|  | 95 | orrcs	result, result, curbit, lsr #1 | 
|  | 96 | cmp	dividend, divisor, lsr #2 | 
|  | 97 | subcs	dividend, dividend, divisor, lsr #2 | 
|  | 98 | orrcs	result, result, curbit, lsr #2 | 
|  | 99 | cmp	dividend, divisor, lsr #3 | 
|  | 100 | subcs	dividend, dividend, divisor, lsr #3 | 
|  | 101 | orrcs	result, result, curbit, lsr #3 | 
|  | 102 | cmp	dividend, #0			@ Early termination? | 
|  | 103 | movnes	curbit, curbit, lsr #4		@ No, any more bits to do? | 
|  | 104 | movne	divisor, divisor, lsr #4 | 
|  | 105 | bne	3b | 
|  | 106 | Lgot_result_udivsi3: | 
|  | 107 | mov	r0, result | 
|  | 108 | RET	pc, lr | 
|  | 109 |  | 
|  | 110 | Ldiv0: | 
|  | 111 | str	lr, [sp, #-4]! | 
|  | 112 | bl	__div0 | 
|  | 113 | mov	r0, #0			@ about as wrong as it could be | 
|  | 114 | ldmia	sp!, {pc}RETCOND | 
|  | 115 |  | 
|  | 116 | /* __umodsi3 ----------------------- */ | 
|  | 117 |  | 
|  | 118 | ENTRY(__umodsi3) | 
|  | 119 | cmp	divisor, #0 | 
|  | 120 | beq	Ldiv0 | 
|  | 121 | mov	curbit, #1 | 
|  | 122 | cmp	dividend, divisor | 
|  | 123 | RETc(cc)	pc, lr | 
|  | 124 | 1: | 
|  | 125 | @ Unless the divisor is very big, shift it up in multiples of | 
|  | 126 | @ four bits, since this is the amount of unwinding in the main | 
|  | 127 | @ division loop.  Continue shifting until the divisor is | 
|  | 128 | @ larger than the dividend. | 
|  | 129 | cmp	divisor, #0x10000000 | 
|  | 130 | cmpcc	divisor, dividend | 
|  | 131 | movcc	divisor, divisor, lsl #4 | 
|  | 132 | movcc	curbit, curbit, lsl #4 | 
|  | 133 | bcc	1b | 
|  | 134 |  | 
|  | 135 | 2: | 
|  | 136 | @ For very big divisors, we must shift it a bit at a time, or | 
|  | 137 | @ we will be in danger of overflowing. | 
|  | 138 | cmp	divisor, #0x80000000 | 
|  | 139 | cmpcc	divisor, dividend | 
|  | 140 | movcc	divisor, divisor, lsl #1 | 
|  | 141 | movcc	curbit, curbit, lsl #1 | 
|  | 142 | bcc	2b | 
|  | 143 |  | 
|  | 144 | 3: | 
|  | 145 | @ Test for possible subtractions.  On the final pass, this may | 
|  | 146 | @ subtract too much from the dividend, so keep track of which | 
|  | 147 | @ subtractions are done, we can fix them up afterwards... | 
|  | 148 | mov	overdone, #0 | 
|  | 149 | cmp	dividend, divisor | 
|  | 150 | subcs	dividend, dividend, divisor | 
|  | 151 | cmp	dividend, divisor, lsr #1 | 
|  | 152 | subcs	dividend, dividend, divisor, lsr #1 | 
|  | 153 | orrcs	overdone, overdone, curbit, ror #1 | 
|  | 154 | cmp	dividend, divisor, lsr #2 | 
|  | 155 | subcs	dividend, dividend, divisor, lsr #2 | 
|  | 156 | orrcs	overdone, overdone, curbit, ror #2 | 
|  | 157 | cmp	dividend, divisor, lsr #3 | 
|  | 158 | subcs	dividend, dividend, divisor, lsr #3 | 
|  | 159 | orrcs	overdone, overdone, curbit, ror #3 | 
|  | 160 | mov	ip, curbit | 
|  | 161 | cmp	dividend, #0			@ Early termination? | 
|  | 162 | movnes	curbit, curbit, lsr #4		@ No, any more bits to do? | 
|  | 163 | movne	divisor, divisor, lsr #4 | 
|  | 164 | bne	3b | 
|  | 165 |  | 
|  | 166 | @ Any subtractions that we should not have done will be recorded in | 
|  | 167 | @ the top three bits of "overdone".  Exactly which were not needed | 
|  | 168 | @ are governed by the position of the bit, stored in ip. | 
|  | 169 | @ If we terminated early, because dividend became zero, | 
|  | 170 | @ then none of the below will match, since the bit in ip will not be | 
|  | 171 | @ in the bottom nibble. | 
|  | 172 | ands	overdone, overdone, #0xe0000000 | 
|  | 173 | RETc(eq)	pc, lr				@ No fixups needed | 
|  | 174 | tst	overdone, ip, ror #3 | 
|  | 175 | addne	dividend, dividend, divisor, lsr #3 | 
|  | 176 | tst	overdone, ip, ror #2 | 
|  | 177 | addne	dividend, dividend, divisor, lsr #2 | 
|  | 178 | tst	overdone, ip, ror #1 | 
|  | 179 | addne	dividend, dividend, divisor, lsr #1 | 
|  | 180 | RET	pc, lr | 
|  | 181 |  | 
|  | 182 | ENTRY(__divsi3) | 
|  | 183 | eor	ip, dividend, divisor		@ Save the sign of the result. | 
|  | 184 | mov	curbit, #1 | 
|  | 185 | mov	result, #0 | 
|  | 186 | cmp	divisor, #0 | 
|  | 187 | rsbmi	divisor, divisor, #0		@ Loops below use unsigned. | 
|  | 188 | beq	Ldiv0 | 
|  | 189 | cmp	dividend, #0 | 
|  | 190 | rsbmi	dividend, dividend, #0 | 
|  | 191 | cmp	dividend, divisor | 
|  | 192 | bcc	Lgot_result_divsi3 | 
|  | 193 |  | 
|  | 194 | 1: | 
|  | 195 | @ Unless the divisor is very big, shift it up in multiples of | 
|  | 196 | @ four bits, since this is the amount of unwinding in the main | 
|  | 197 | @ division loop.  Continue shifting until the divisor is | 
|  | 198 | @ larger than the dividend. | 
|  | 199 | cmp	divisor, #0x10000000 | 
|  | 200 | cmpcc	divisor, dividend | 
|  | 201 | movcc	divisor, divisor, lsl #4 | 
|  | 202 | movcc	curbit, curbit, lsl #4 | 
|  | 203 | bcc	1b | 
|  | 204 |  | 
|  | 205 | 2: | 
|  | 206 | @ For very big divisors, we must shift it a bit at a time, or | 
|  | 207 | @ we will be in danger of overflowing. | 
|  | 208 | cmp	divisor, #0x80000000 | 
|  | 209 | cmpcc	divisor, dividend | 
|  | 210 | movcc	divisor, divisor, lsl #1 | 
|  | 211 | movcc	curbit, curbit, lsl #1 | 
|  | 212 | bcc	2b | 
|  | 213 |  | 
|  | 214 | 3: | 
|  | 215 | @ Test for possible subtractions, and note which bits | 
|  | 216 | @ are done in the result.  On the final pass, this may subtract | 
|  | 217 | @ too much from the dividend, but the result will be ok, since the | 
|  | 218 | @ "bit" will have been shifted out at the bottom. | 
|  | 219 | cmp	dividend, divisor | 
|  | 220 | subcs	dividend, dividend, divisor | 
|  | 221 | orrcs	result, result, curbit | 
|  | 222 | cmp	dividend, divisor, lsr #1 | 
|  | 223 | subcs	dividend, dividend, divisor, lsr #1 | 
|  | 224 | orrcs	result, result, curbit, lsr #1 | 
|  | 225 | cmp	dividend, divisor, lsr #2 | 
|  | 226 | subcs	dividend, dividend, divisor, lsr #2 | 
|  | 227 | orrcs	result, result, curbit, lsr #2 | 
|  | 228 | cmp	dividend, divisor, lsr #3 | 
|  | 229 | subcs	dividend, dividend, divisor, lsr #3 | 
|  | 230 | orrcs	result, result, curbit, lsr #3 | 
|  | 231 | cmp	dividend, #0			@ Early termination? | 
|  | 232 | movnes	curbit, curbit, lsr #4		@ No, any more bits to do? | 
|  | 233 | movne	divisor, divisor, lsr #4 | 
|  | 234 | bne	3b | 
|  | 235 | Lgot_result_divsi3: | 
|  | 236 | mov	r0, result | 
|  | 237 | cmp	ip, #0 | 
|  | 238 | rsbmi	r0, r0, #0 | 
|  | 239 | RET	pc, lr | 
|  | 240 |  | 
|  | 241 | ENTRY(__modsi3) | 
|  | 242 | mov	curbit, #1 | 
|  | 243 | cmp	divisor, #0 | 
|  | 244 | rsbmi	divisor, divisor, #0		@ Loops below use unsigned. | 
|  | 245 | beq	Ldiv0 | 
|  | 246 | @ Need to save the sign of the dividend, unfortunately, we need | 
|  | 247 | @ ip later on; this is faster than pushing lr and using that. | 
|  | 248 | str	dividend, [sp, #-4]! | 
|  | 249 | cmp	dividend, #0 | 
|  | 250 | rsbmi	dividend, dividend, #0 | 
|  | 251 | cmp	dividend, divisor | 
|  | 252 | bcc	Lgot_result_modsi3 | 
|  | 253 |  | 
|  | 254 | 1: | 
|  | 255 | @ Unless the divisor is very big, shift it up in multiples of | 
|  | 256 | @ four bits, since this is the amount of unwinding in the main | 
|  | 257 | @ division loop.  Continue shifting until the divisor is | 
|  | 258 | @ larger than the dividend. | 
|  | 259 | cmp	divisor, #0x10000000 | 
|  | 260 | cmpcc	divisor, dividend | 
|  | 261 | movcc	divisor, divisor, lsl #4 | 
|  | 262 | movcc	curbit, curbit, lsl #4 | 
|  | 263 | bcc	1b | 
|  | 264 |  | 
|  | 265 | 2: | 
|  | 266 | @ For very big divisors, we must shift it a bit at a time, or | 
|  | 267 | @ we will be in danger of overflowing. | 
|  | 268 | cmp	divisor, #0x80000000 | 
|  | 269 | cmpcc	divisor, dividend | 
|  | 270 | movcc	divisor, divisor, lsl #1 | 
|  | 271 | movcc	curbit, curbit, lsl #1 | 
|  | 272 | bcc	2b | 
|  | 273 |  | 
|  | 274 | 3: | 
|  | 275 | @ Test for possible subtractions.  On the final pass, this may | 
|  | 276 | @ subtract too much from the dividend, so keep track of which | 
|  | 277 | @ subtractions are done, we can fix them up afterwards... | 
|  | 278 | mov	overdone, #0 | 
|  | 279 | cmp	dividend, divisor | 
|  | 280 | subcs	dividend, dividend, divisor | 
|  | 281 | cmp	dividend, divisor, lsr #1 | 
|  | 282 | subcs	dividend, dividend, divisor, lsr #1 | 
|  | 283 | orrcs	overdone, overdone, curbit, ror #1 | 
|  | 284 | cmp	dividend, divisor, lsr #2 | 
|  | 285 | subcs	dividend, dividend, divisor, lsr #2 | 
|  | 286 | orrcs	overdone, overdone, curbit, ror #2 | 
|  | 287 | cmp	dividend, divisor, lsr #3 | 
|  | 288 | subcs	dividend, dividend, divisor, lsr #3 | 
|  | 289 | orrcs	overdone, overdone, curbit, ror #3 | 
|  | 290 | mov	ip, curbit | 
|  | 291 | cmp	dividend, #0			@ Early termination? | 
|  | 292 | movnes	curbit, curbit, lsr #4		@ No, any more bits to do? | 
|  | 293 | movne	divisor, divisor, lsr #4 | 
|  | 294 | bne	3b | 
|  | 295 |  | 
|  | 296 | @ Any subtractions that we should not have done will be recorded in | 
|  | 297 | @ the top three bits of "overdone".  Exactly which were not needed | 
|  | 298 | @ are governed by the position of the bit, stored in ip. | 
|  | 299 | @ If we terminated early, because dividend became zero, | 
|  | 300 | @ then none of the below will match, since the bit in ip will not be | 
|  | 301 | @ in the bottom nibble. | 
|  | 302 | ands	overdone, overdone, #0xe0000000 | 
|  | 303 | beq	Lgot_result_modsi3 | 
|  | 304 | tst	overdone, ip, ror #3 | 
|  | 305 | addne	dividend, dividend, divisor, lsr #3 | 
|  | 306 | tst	overdone, ip, ror #2 | 
|  | 307 | addne	dividend, dividend, divisor, lsr #2 | 
|  | 308 | tst	overdone, ip, ror #1 | 
|  | 309 | addne	dividend, dividend, divisor, lsr #1 | 
|  | 310 | Lgot_result_modsi3: | 
|  | 311 | ldr	ip, [sp], #4 | 
|  | 312 | cmp	ip, #0 | 
|  | 313 | rsbmi	dividend, dividend, #0 | 
|  | 314 | RET	pc, lr |