| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | 	.file	"wm_shrx.S" | 
 | 2 | /*---------------------------------------------------------------------------+ | 
 | 3 |  |  wm_shrx.S                                                                | | 
 | 4 |  |                                                                           | | 
 | 5 |  | 64 bit right shift functions                                              | | 
 | 6 |  |                                                                           | | 
 | 7 |  | Copyright (C) 1992,1995                                                   | | 
 | 8 |  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | 
 | 9 |  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au | | 
 | 10 |  |                                                                           | | 
 | 11 |  | Call from C as:                                                           | | 
 | 12 |  |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            | | 
 | 13 |  | and                                                                       | | 
 | 14 |  |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           | | 
 | 15 |  |                                                                           | | 
 | 16 |  +---------------------------------------------------------------------------*/ | 
 | 17 |  | 
 | 18 | #include "fpu_emu.h" | 
 | 19 |  | 
 | 20 | .text | 
 | 21 | /*---------------------------------------------------------------------------+ | 
 | 22 |  |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            | | 
 | 23 |  |                                                                           | | 
 | 24 |  |   Extended shift right function.                                          | | 
 | 25 |  |   Fastest for small shifts.                                               | | 
 | 26 |  |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           | | 
 | 27 |  |   right by the number of bits specified by the second arg (arg2).         | | 
 | 28 |  |   Forms a 96 bit quantity from the 64 bit arg and eax:                    | | 
 | 29 |  |                [  64 bit arg ][ eax ]                                     | | 
 | 30 |  |            shift right  --------->                                        | | 
 | 31 |  |   The eax register is initialized to 0 before the shifting.               | | 
 | 32 |  |   Results returned in the 64 bit arg and eax.                             | | 
 | 33 |  +---------------------------------------------------------------------------*/ | 
 | 34 |  | 
 | 35 | ENTRY(FPU_shrx) | 
 | 36 | 	push	%ebp | 
 | 37 | 	movl	%esp,%ebp | 
 | 38 | 	pushl	%esi | 
 | 39 | 	movl	PARAM2,%ecx | 
 | 40 | 	movl	PARAM1,%esi | 
 | 41 | 	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */ | 
 | 42 | 	jnc	L_more_than_31 | 
 | 43 |  | 
 | 44 | /* less than 32 bits */ | 
 | 45 | 	pushl	%ebx | 
 | 46 | 	movl	(%esi),%ebx	/* lsl */ | 
 | 47 | 	movl	4(%esi),%edx	/* msl */ | 
 | 48 | 	xorl	%eax,%eax	/* extension */ | 
 | 49 | 	shrd	%cl,%ebx,%eax | 
 | 50 | 	shrd	%cl,%edx,%ebx | 
 | 51 | 	shr	%cl,%edx | 
 | 52 | 	movl	%ebx,(%esi) | 
 | 53 | 	movl	%edx,4(%esi) | 
 | 54 | 	popl	%ebx | 
 | 55 | 	popl	%esi | 
 | 56 | 	leave | 
 | 57 | 	ret | 
 | 58 |  | 
 | 59 | L_more_than_31: | 
 | 60 | 	cmpl	$64,%ecx | 
 | 61 | 	jnc	L_more_than_63 | 
 | 62 |  | 
 | 63 | 	subb	$32,%cl | 
 | 64 | 	movl	(%esi),%eax	/* lsl */ | 
 | 65 | 	movl	4(%esi),%edx	/* msl */ | 
 | 66 | 	shrd	%cl,%edx,%eax | 
 | 67 | 	shr	%cl,%edx | 
 | 68 | 	movl	%edx,(%esi) | 
 | 69 | 	movl	$0,4(%esi) | 
 | 70 | 	popl	%esi | 
 | 71 | 	leave | 
 | 72 | 	ret | 
 | 73 |  | 
 | 74 | L_more_than_63: | 
 | 75 | 	cmpl	$96,%ecx | 
 | 76 | 	jnc	L_more_than_95 | 
 | 77 |  | 
 | 78 | 	subb	$64,%cl | 
 | 79 | 	movl	4(%esi),%eax	/* msl */ | 
 | 80 | 	shr	%cl,%eax | 
 | 81 | 	xorl	%edx,%edx | 
 | 82 | 	movl	%edx,(%esi) | 
 | 83 | 	movl	%edx,4(%esi) | 
 | 84 | 	popl	%esi | 
 | 85 | 	leave | 
 | 86 | 	ret | 
 | 87 |  | 
 | 88 | L_more_than_95: | 
 | 89 | 	xorl	%eax,%eax | 
 | 90 | 	movl	%eax,(%esi) | 
 | 91 | 	movl	%eax,4(%esi) | 
 | 92 | 	popl	%esi | 
 | 93 | 	leave | 
 | 94 | 	ret | 
 | 95 |  | 
 | 96 |  | 
 | 97 | /*---------------------------------------------------------------------------+ | 
 | 98 |  |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           | | 
 | 99 |  |                                                                           | | 
 | 100 |  |   Extended shift right function (optimized for small floating point       | | 
 | 101 |  |   integers).                                                              | | 
 | 102 |  |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           | | 
 | 103 |  |   right by the number of bits specified by the second arg (arg2).         | | 
 | 104 |  |   Forms a 96 bit quantity from the 64 bit arg and eax:                    | | 
 | 105 |  |                [  64 bit arg ][ eax ]                                     | | 
 | 106 |  |            shift right  --------->                                        | | 
 | 107 |  |   The eax register is initialized to 0 before the shifting.               | | 
 | 108 |  |   The lower 8 bits of eax are lost and replaced by a flag which is        | | 
 | 109 |  |   set (to 0x01) if any bit, apart from the first one, is set in the       | | 
 | 110 |  |   part which has been shifted out of the arg.                             | | 
 | 111 |  |   Results returned in the 64 bit arg and eax.                             | | 
 | 112 |  +---------------------------------------------------------------------------*/ | 
 | 113 | ENTRY(FPU_shrxs) | 
 | 114 | 	push	%ebp | 
 | 115 | 	movl	%esp,%ebp | 
 | 116 | 	pushl	%esi | 
 | 117 | 	pushl	%ebx | 
 | 118 | 	movl	PARAM2,%ecx | 
 | 119 | 	movl	PARAM1,%esi | 
 | 120 | 	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */ | 
 | 121 | 	jnc	Ls_more_than_63 | 
 | 122 |  | 
 | 123 | 	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */ | 
 | 124 | 	jc	Ls_less_than_32 | 
 | 125 |  | 
 | 126 | /* We got here without jumps by assuming that the most common requirement | 
 | 127 |    is for small integers */ | 
 | 128 | /* Shift by [32..63] bits */ | 
 | 129 | 	subb	$32,%cl | 
 | 130 | 	movl	(%esi),%eax	/* lsl */ | 
 | 131 | 	movl	4(%esi),%edx	/* msl */ | 
 | 132 | 	xorl	%ebx,%ebx | 
 | 133 | 	shrd	%cl,%eax,%ebx | 
 | 134 | 	shrd	%cl,%edx,%eax | 
 | 135 | 	shr	%cl,%edx | 
 | 136 | 	orl	%ebx,%ebx		/* test these 32 bits */ | 
 | 137 | 	setne	%bl | 
 | 138 | 	test	$0x7fffffff,%eax	/* and 31 bits here */ | 
 | 139 | 	setne	%bh | 
 | 140 | 	orw	%bx,%bx			/* Any of the 63 bit set ? */ | 
 | 141 | 	setne	%al | 
 | 142 | 	movl	%edx,(%esi) | 
 | 143 | 	movl	$0,4(%esi) | 
 | 144 | 	popl	%ebx | 
 | 145 | 	popl	%esi | 
 | 146 | 	leave | 
 | 147 | 	ret | 
 | 148 |  | 
 | 149 | /* Shift by [0..31] bits */ | 
 | 150 | Ls_less_than_32: | 
 | 151 | 	movl	(%esi),%ebx	/* lsl */ | 
 | 152 | 	movl	4(%esi),%edx	/* msl */ | 
 | 153 | 	xorl	%eax,%eax	/* extension */ | 
 | 154 | 	shrd	%cl,%ebx,%eax | 
 | 155 | 	shrd	%cl,%edx,%ebx | 
 | 156 | 	shr	%cl,%edx | 
 | 157 | 	test	$0x7fffffff,%eax	/* only need to look at eax here */ | 
 | 158 | 	setne	%al | 
 | 159 | 	movl	%ebx,(%esi) | 
 | 160 | 	movl	%edx,4(%esi) | 
 | 161 | 	popl	%ebx | 
 | 162 | 	popl	%esi | 
 | 163 | 	leave | 
 | 164 | 	ret | 
 | 165 |  | 
 | 166 | /* Shift by [64..95] bits */ | 
 | 167 | Ls_more_than_63: | 
 | 168 | 	cmpl	$96,%ecx | 
 | 169 | 	jnc	Ls_more_than_95 | 
 | 170 |  | 
 | 171 | 	subb	$64,%cl | 
 | 172 | 	movl	(%esi),%ebx	/* lsl */ | 
 | 173 | 	movl	4(%esi),%eax	/* msl */ | 
 | 174 | 	xorl	%edx,%edx	/* extension */ | 
 | 175 | 	shrd	%cl,%ebx,%edx | 
 | 176 | 	shrd	%cl,%eax,%ebx | 
 | 177 | 	shr	%cl,%eax | 
 | 178 | 	orl	%ebx,%edx | 
 | 179 | 	setne	%bl | 
 | 180 | 	test	$0x7fffffff,%eax	/* only need to look at eax here */ | 
 | 181 | 	setne	%bh | 
 | 182 | 	orw	%bx,%bx | 
 | 183 | 	setne	%al | 
 | 184 | 	xorl	%edx,%edx | 
 | 185 | 	movl	%edx,(%esi)	/* set to zero */ | 
 | 186 | 	movl	%edx,4(%esi)	/* set to zero */ | 
 | 187 | 	popl	%ebx | 
 | 188 | 	popl	%esi | 
 | 189 | 	leave | 
 | 190 | 	ret | 
 | 191 |  | 
 | 192 | Ls_more_than_95: | 
 | 193 | /* Shift by [96..inf) bits */ | 
 | 194 | 	xorl	%eax,%eax | 
 | 195 | 	movl	(%esi),%ebx | 
 | 196 | 	orl	4(%esi),%ebx | 
 | 197 | 	setne	%al | 
 | 198 | 	xorl	%ebx,%ebx | 
 | 199 | 	movl	%ebx,(%esi) | 
 | 200 | 	movl	%ebx,4(%esi) | 
 | 201 | 	popl	%ebx | 
 | 202 | 	popl	%esi | 
 | 203 | 	leave | 
 | 204 | 	ret |