| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * arch/alpha/lib/divide.S | 
 | 3 |  * | 
 | 4 |  * (C) 1995 Linus Torvalds | 
 | 5 |  * | 
 | 6 |  * Alpha division.. | 
 | 7 |  */ | 
 | 8 |  | 
 | 9 | /* | 
 | 10 |  * The alpha chip doesn't provide hardware division, so we have to do it | 
 | 11 |  * by hand.  The compiler expects the functions | 
 | 12 |  * | 
 | 13 |  *	__divqu: 64-bit unsigned long divide | 
 | 14 |  *	__remqu: 64-bit unsigned long remainder | 
 | 15 |  *	__divqs/__remqs: signed 64-bit | 
 | 16 |  *	__divlu/__remlu: unsigned 32-bit | 
 | 17 |  *	__divls/__remls: signed 32-bit | 
 | 18 |  * | 
 | 19 |  * These are not normal C functions: instead of the normal | 
 | 20 |  * calling sequence, these expect their arguments in registers | 
 | 21 |  * $24 and $25, and return the result in $27. Register $28 may | 
 | 22 |  * be clobbered (assembly temporary), anything else must be saved.  | 
 | 23 |  * | 
 | 24 |  * In short: painful. | 
 | 25 |  * | 
 | 26 |  * This is a rather simple bit-at-a-time algorithm: it's very good | 
 | 27 |  * at dividing random 64-bit numbers, but the more usual case where | 
 | 28 |  * the divisor is small is handled better by the DEC algorithm | 
 | 29 |  * using lookup tables. This uses much less memory, though, and is | 
 | 30 |  * nicer on the cache.. Besides, I don't know the copyright status | 
 | 31 |  * of the DEC code. | 
 | 32 |  */ | 
 | 33 |  | 
 | 34 | /* | 
 | 35 |  * My temporaries: | 
 | 36 |  *	$0 - current bit | 
 | 37 |  *	$1 - shifted divisor | 
 | 38 |  *	$2 - modulus/quotient | 
 | 39 |  * | 
 | 40 |  *	$23 - return address | 
 | 41 |  *	$24 - dividend | 
 | 42 |  *	$25 - divisor | 
 | 43 |  * | 
 | 44 |  *	$27 - quotient/modulus | 
 | 45 |  *	$28 - compare status | 
 | 46 |  */ | 
 | 47 |  | 
 | 48 | #define halt .long 0 | 
 | 49 |  | 
 | 50 | /* | 
 | 51 |  * Select function type and registers | 
 | 52 |  */ | 
 | 53 | #define mask	$0 | 
 | 54 | #define divisor	$1 | 
 | 55 | #define compare $28 | 
 | 56 | #define tmp1	$3 | 
 | 57 | #define tmp2	$4 | 
 | 58 |  | 
 | 59 | #ifdef DIV | 
 | 60 | #define DIV_ONLY(x,y...) x,##y | 
 | 61 | #define MOD_ONLY(x,y...) | 
 | 62 | #define func(x) __div##x | 
 | 63 | #define modulus $2 | 
 | 64 | #define quotient $27 | 
 | 65 | #define GETSIGN(x) xor $24,$25,x | 
 | 66 | #define STACK 48 | 
 | 67 | #else | 
 | 68 | #define DIV_ONLY(x,y...) | 
 | 69 | #define MOD_ONLY(x,y...) x,##y | 
 | 70 | #define func(x) __rem##x | 
 | 71 | #define modulus $27 | 
 | 72 | #define quotient $2 | 
 | 73 | #define GETSIGN(x) bis $24,$24,x | 
 | 74 | #define STACK 32 | 
 | 75 | #endif | 
 | 76 |  | 
 | 77 | /* | 
 | 78 |  * For 32-bit operations, we need to extend to 64-bit | 
 | 79 |  */ | 
 | 80 | #ifdef INTSIZE | 
 | 81 | #define ufunction func(lu) | 
 | 82 | #define sfunction func(l) | 
 | 83 | #define LONGIFY(x) zapnot x,15,x | 
 | 84 | #define SLONGIFY(x) addl x,0,x | 
 | 85 | #else | 
 | 86 | #define ufunction func(qu) | 
 | 87 | #define sfunction func(q) | 
 | 88 | #define LONGIFY(x) | 
 | 89 | #define SLONGIFY(x) | 
 | 90 | #endif | 
 | 91 |  | 
 | 92 | .set noat | 
 | 93 | .align	3 | 
 | 94 | .globl	ufunction | 
 | 95 | .ent	ufunction | 
 | 96 | ufunction: | 
 | 97 | 	subq	$30,STACK,$30 | 
 | 98 | 	.frame	$30,STACK,$23 | 
 | 99 | 	.prologue 0 | 
 | 100 |  | 
 | 101 | 7:	stq	$1, 0($30) | 
 | 102 | 	bis	$25,$25,divisor | 
 | 103 | 	stq	$2, 8($30) | 
 | 104 | 	bis	$24,$24,modulus | 
 | 105 | 	stq	$0,16($30) | 
 | 106 | 	bis	$31,$31,quotient | 
 | 107 | 	LONGIFY(divisor) | 
 | 108 | 	stq	tmp1,24($30) | 
 | 109 | 	LONGIFY(modulus) | 
 | 110 | 	bis	$31,1,mask | 
 | 111 | 	DIV_ONLY(stq tmp2,32($30)) | 
 | 112 | 	beq	divisor, 9f			/* div by zero */ | 
 | 113 |  | 
 | 114 | #ifdef INTSIZE | 
 | 115 | 	/* | 
 | 116 | 	 * shift divisor left, using 3-bit shifts for | 
 | 117 | 	 * 32-bit divides as we can't overflow. Three-bit | 
 | 118 | 	 * shifts will result in looping three times less | 
 | 119 | 	 * here, but can result in two loops more later. | 
 | 120 | 	 * Thus using a large shift isn't worth it (and | 
 | 121 | 	 * s8add pairs better than a sll..) | 
 | 122 | 	 */ | 
 | 123 | 1:	cmpult	divisor,modulus,compare | 
 | 124 | 	s8addq	divisor,$31,divisor | 
 | 125 | 	s8addq	mask,$31,mask | 
 | 126 | 	bne	compare,1b | 
 | 127 | #else | 
 | 128 | 1:	cmpult	divisor,modulus,compare | 
 | 129 | 	blt     divisor, 2f | 
 | 130 | 	addq	divisor,divisor,divisor | 
 | 131 | 	addq	mask,mask,mask | 
 | 132 | 	bne	compare,1b | 
 | 133 | 	unop | 
 | 134 | #endif | 
 | 135 |  | 
 | 136 | 	/* ok, start to go right again.. */ | 
 | 137 | 2:	DIV_ONLY(addq quotient,mask,tmp2) | 
 | 138 | 	srl	mask,1,mask | 
 | 139 | 	cmpule	divisor,modulus,compare | 
 | 140 | 	subq	modulus,divisor,tmp1 | 
 | 141 | 	DIV_ONLY(cmovne compare,tmp2,quotient) | 
 | 142 | 	srl	divisor,1,divisor | 
 | 143 | 	cmovne	compare,tmp1,modulus | 
 | 144 | 	bne	mask,2b | 
 | 145 |  | 
 | 146 | 9:	ldq	$1, 0($30) | 
 | 147 | 	ldq	$2, 8($30) | 
 | 148 | 	ldq	$0,16($30) | 
 | 149 | 	ldq	tmp1,24($30) | 
 | 150 | 	DIV_ONLY(ldq tmp2,32($30)) | 
 | 151 | 	addq	$30,STACK,$30 | 
 | 152 | 	ret	$31,($23),1 | 
 | 153 | 	.end	ufunction | 
 | 154 |  | 
 | 155 | /* | 
 | 156 |  * Uhh.. Ugly signed division. I'd rather not have it at all, but | 
 | 157 |  * it's needed in some circumstances. There are different ways to | 
 | 158 |  * handle this, really. This does: | 
 | 159 |  * 	-a / b = a / -b = -(a / b) | 
 | 160 |  *	-a % b = -(a % b) | 
 | 161 |  *	a % -b = a % b | 
 | 162 |  * which is probably not the best solution, but at least should | 
 | 163 |  * have the property that (x/y)*y + (x%y) = x. | 
 | 164 |  */ | 
 | 165 | .align 3 | 
 | 166 | .globl	sfunction | 
 | 167 | .ent	sfunction | 
 | 168 | sfunction: | 
 | 169 | 	subq	$30,STACK,$30 | 
 | 170 | 	.frame	$30,STACK,$23 | 
 | 171 | 	.prologue 0 | 
 | 172 | 	bis	$24,$25,$28 | 
 | 173 | 	SLONGIFY($28) | 
 | 174 | 	bge	$28,7b | 
 | 175 | 	stq	$24,0($30) | 
 | 176 | 	subq	$31,$24,$28 | 
 | 177 | 	stq	$25,8($30) | 
 | 178 | 	cmovlt	$24,$28,$24	/* abs($24) */ | 
 | 179 | 	stq	$23,16($30) | 
 | 180 | 	subq	$31,$25,$28 | 
 | 181 | 	stq	tmp1,24($30) | 
 | 182 | 	cmovlt	$25,$28,$25	/* abs($25) */ | 
 | 183 | 	unop | 
 | 184 | 	bsr	$23,ufunction | 
 | 185 | 	ldq	$24,0($30) | 
 | 186 | 	ldq	$25,8($30) | 
 | 187 | 	GETSIGN($28) | 
 | 188 | 	subq	$31,$27,tmp1 | 
 | 189 | 	SLONGIFY($28) | 
 | 190 | 	ldq	$23,16($30) | 
 | 191 | 	cmovlt	$28,tmp1,$27 | 
 | 192 | 	ldq	tmp1,24($30) | 
 | 193 | 	addq	$30,STACK,$30 | 
 | 194 | 	ret	$31,($23),1 | 
 | 195 | 	.end	sfunction |