| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | .file	"wm_shrx.S" | 
|  | 2 | /*---------------------------------------------------------------------------+ | 
|  | 3 | |  wm_shrx.S                                                                | | 
|  | 4 | |                                                                           | | 
|  | 5 | | 64 bit right shift functions                                              | | 
|  | 6 | |                                                                           | | 
|  | 7 | | Copyright (C) 1992,1995                                                   | | 
|  | 8 | |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | 
|  | 9 | |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au | | 
|  | 10 | |                                                                           | | 
|  | 11 | | Call from C as:                                                           | | 
|  | 12 | |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            | | 
|  | 13 | | and                                                                       | | 
|  | 14 | |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           | | 
|  | 15 | |                                                                           | | 
|  | 16 | +---------------------------------------------------------------------------*/ | 
|  | 17 |  | 
|  | 18 | #include "fpu_emu.h" | 
|  | 19 |  | 
|  | 20 | .text | 
|  | 21 | /*---------------------------------------------------------------------------+ | 
|  | 22 | |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            | | 
|  | 23 | |                                                                           | | 
|  | 24 | |   Extended shift right function.                                          | | 
|  | 25 | |   Fastest for small shifts.                                               | | 
|  | 26 | |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           | | 
|  | 27 | |   right by the number of bits specified by the second arg (arg2).         | | 
|  | 28 | |   Forms a 96 bit quantity from the 64 bit arg and eax:                    | | 
|  | 29 | |                [  64 bit arg ][ eax ]                                     | | 
|  | 30 | |            shift right  --------->                                        | | 
|  | 31 | |   The eax register is initialized to 0 before the shifting.               | | 
|  | 32 | |   Results returned in the 64 bit arg and eax.                             | | 
|  | 33 | +---------------------------------------------------------------------------*/ | 
|  | 34 |  | 
|  | 35 | ENTRY(FPU_shrx) | 
|  | 36 | push	%ebp | 
|  | 37 | movl	%esp,%ebp | 
|  | 38 | pushl	%esi | 
|  | 39 | movl	PARAM2,%ecx | 
|  | 40 | movl	PARAM1,%esi | 
|  | 41 | cmpl	$32,%ecx	/* shrd only works for 0..31 bits */ | 
|  | 42 | jnc	L_more_than_31 | 
|  | 43 |  | 
|  | 44 | /* less than 32 bits */ | 
|  | 45 | pushl	%ebx | 
|  | 46 | movl	(%esi),%ebx	/* lsl */ | 
|  | 47 | movl	4(%esi),%edx	/* msl */ | 
|  | 48 | xorl	%eax,%eax	/* extension */ | 
|  | 49 | shrd	%cl,%ebx,%eax | 
|  | 50 | shrd	%cl,%edx,%ebx | 
|  | 51 | shr	%cl,%edx | 
|  | 52 | movl	%ebx,(%esi) | 
|  | 53 | movl	%edx,4(%esi) | 
|  | 54 | popl	%ebx | 
|  | 55 | popl	%esi | 
|  | 56 | leave | 
|  | 57 | ret | 
|  | 58 |  | 
|  | 59 | L_more_than_31: | 
|  | 60 | cmpl	$64,%ecx | 
|  | 61 | jnc	L_more_than_63 | 
|  | 62 |  | 
|  | 63 | subb	$32,%cl | 
|  | 64 | movl	(%esi),%eax	/* lsl */ | 
|  | 65 | movl	4(%esi),%edx	/* msl */ | 
|  | 66 | shrd	%cl,%edx,%eax | 
|  | 67 | shr	%cl,%edx | 
|  | 68 | movl	%edx,(%esi) | 
|  | 69 | movl	$0,4(%esi) | 
|  | 70 | popl	%esi | 
|  | 71 | leave | 
|  | 72 | ret | 
|  | 73 |  | 
|  | 74 | L_more_than_63: | 
|  | 75 | cmpl	$96,%ecx | 
|  | 76 | jnc	L_more_than_95 | 
|  | 77 |  | 
|  | 78 | subb	$64,%cl | 
|  | 79 | movl	4(%esi),%eax	/* msl */ | 
|  | 80 | shr	%cl,%eax | 
|  | 81 | xorl	%edx,%edx | 
|  | 82 | movl	%edx,(%esi) | 
|  | 83 | movl	%edx,4(%esi) | 
|  | 84 | popl	%esi | 
|  | 85 | leave | 
|  | 86 | ret | 
|  | 87 |  | 
|  | 88 | L_more_than_95: | 
|  | 89 | xorl	%eax,%eax | 
|  | 90 | movl	%eax,(%esi) | 
|  | 91 | movl	%eax,4(%esi) | 
|  | 92 | popl	%esi | 
|  | 93 | leave | 
|  | 94 | ret | 
|  | 95 |  | 
|  | 96 |  | 
|  | 97 | /*---------------------------------------------------------------------------+ | 
|  | 98 | |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           | | 
|  | 99 | |                                                                           | | 
|  | 100 | |   Extended shift right function (optimized for small floating point       | | 
|  | 101 | |   integers).                                                              | | 
|  | 102 | |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           | | 
|  | 103 | |   right by the number of bits specified by the second arg (arg2).         | | 
|  | 104 | |   Forms a 96 bit quantity from the 64 bit arg and eax:                    | | 
|  | 105 | |                [  64 bit arg ][ eax ]                                     | | 
|  | 106 | |            shift right  --------->                                        | | 
|  | 107 | |   The eax register is initialized to 0 before the shifting.               | | 
|  | 108 | |   The lower 8 bits of eax are lost and replaced by a flag which is        | | 
|  | 109 | |   set (to 0x01) if any bit, apart from the first one, is set in the       | | 
|  | 110 | |   part which has been shifted out of the arg.                             | | 
|  | 111 | |   Results returned in the 64 bit arg and eax.                             | | 
|  | 112 | +---------------------------------------------------------------------------*/ | 
|  | 113 | ENTRY(FPU_shrxs) | 
|  | 114 | push	%ebp | 
|  | 115 | movl	%esp,%ebp | 
|  | 116 | pushl	%esi | 
|  | 117 | pushl	%ebx | 
|  | 118 | movl	PARAM2,%ecx | 
|  | 119 | movl	PARAM1,%esi | 
|  | 120 | cmpl	$64,%ecx	/* shrd only works for 0..31 bits */ | 
|  | 121 | jnc	Ls_more_than_63 | 
|  | 122 |  | 
|  | 123 | cmpl	$32,%ecx	/* shrd only works for 0..31 bits */ | 
|  | 124 | jc	Ls_less_than_32 | 
|  | 125 |  | 
|  | 126 | /* We got here without jumps by assuming that the most common requirement | 
|  | 127 | is for small integers */ | 
|  | 128 | /* Shift by [32..63] bits */ | 
|  | 129 | subb	$32,%cl | 
|  | 130 | movl	(%esi),%eax	/* lsl */ | 
|  | 131 | movl	4(%esi),%edx	/* msl */ | 
|  | 132 | xorl	%ebx,%ebx | 
|  | 133 | shrd	%cl,%eax,%ebx | 
|  | 134 | shrd	%cl,%edx,%eax | 
|  | 135 | shr	%cl,%edx | 
|  | 136 | orl	%ebx,%ebx		/* test these 32 bits */ | 
|  | 137 | setne	%bl | 
|  | 138 | test	$0x7fffffff,%eax	/* and 31 bits here */ | 
|  | 139 | setne	%bh | 
|  | 140 | orw	%bx,%bx			/* Any of the 63 bit set ? */ | 
|  | 141 | setne	%al | 
|  | 142 | movl	%edx,(%esi) | 
|  | 143 | movl	$0,4(%esi) | 
|  | 144 | popl	%ebx | 
|  | 145 | popl	%esi | 
|  | 146 | leave | 
|  | 147 | ret | 
|  | 148 |  | 
|  | 149 | /* Shift by [0..31] bits */ | 
|  | 150 | Ls_less_than_32: | 
|  | 151 | movl	(%esi),%ebx	/* lsl */ | 
|  | 152 | movl	4(%esi),%edx	/* msl */ | 
|  | 153 | xorl	%eax,%eax	/* extension */ | 
|  | 154 | shrd	%cl,%ebx,%eax | 
|  | 155 | shrd	%cl,%edx,%ebx | 
|  | 156 | shr	%cl,%edx | 
|  | 157 | test	$0x7fffffff,%eax	/* only need to look at eax here */ | 
|  | 158 | setne	%al | 
|  | 159 | movl	%ebx,(%esi) | 
|  | 160 | movl	%edx,4(%esi) | 
|  | 161 | popl	%ebx | 
|  | 162 | popl	%esi | 
|  | 163 | leave | 
|  | 164 | ret | 
|  | 165 |  | 
|  | 166 | /* Shift by [64..95] bits */ | 
|  | 167 | Ls_more_than_63: | 
|  | 168 | cmpl	$96,%ecx | 
|  | 169 | jnc	Ls_more_than_95 | 
|  | 170 |  | 
|  | 171 | subb	$64,%cl | 
|  | 172 | movl	(%esi),%ebx	/* lsl */ | 
|  | 173 | movl	4(%esi),%eax	/* msl */ | 
|  | 174 | xorl	%edx,%edx	/* extension */ | 
|  | 175 | shrd	%cl,%ebx,%edx | 
|  | 176 | shrd	%cl,%eax,%ebx | 
|  | 177 | shr	%cl,%eax | 
|  | 178 | orl	%ebx,%edx | 
|  | 179 | setne	%bl | 
|  | 180 | test	$0x7fffffff,%eax	/* only need to look at eax here */ | 
|  | 181 | setne	%bh | 
|  | 182 | orw	%bx,%bx | 
|  | 183 | setne	%al | 
|  | 184 | xorl	%edx,%edx | 
|  | 185 | movl	%edx,(%esi)	/* set to zero */ | 
|  | 186 | movl	%edx,4(%esi)	/* set to zero */ | 
|  | 187 | popl	%ebx | 
|  | 188 | popl	%esi | 
|  | 189 | leave | 
|  | 190 | ret | 
|  | 191 |  | 
|  | 192 | Ls_more_than_95: | 
|  | 193 | /* Shift by [96..inf) bits */ | 
|  | 194 | xorl	%eax,%eax | 
|  | 195 | movl	(%esi),%ebx | 
|  | 196 | orl	4(%esi),%ebx | 
|  | 197 | setne	%al | 
|  | 198 | xorl	%ebx,%ebx | 
|  | 199 | movl	%ebx,(%esi) | 
|  | 200 | movl	%ebx,4(%esi) | 
|  | 201 | popl	%ebx | 
|  | 202 | popl	%esi | 
|  | 203 | leave | 
|  | 204 | ret |