| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 1 | #include <asm/processor.h> | 
| Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 2 | #include <asm/ppc_asm.h> | 
| Paul Mackerras | b3b8dc6 | 2005-10-10 22:20:10 +1000 | [diff] [blame] | 3 | #include <asm/reg.h> | 
| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 4 | #include <asm/asm-offsets.h> | 
|  | 5 | #include <asm/cputable.h> | 
|  | 6 | #include <asm/thread_info.h> | 
|  | 7 | #include <asm/page.h> | 
|  | 8 |  | 
|  | 9 | /* | 
|  | 10 | * load_up_altivec(unused, unused, tsk) | 
|  | 11 | * Disable VMX for the task which had it previously, | 
|  | 12 | * and save its vector registers in its thread_struct. | 
|  | 13 | * Enables the VMX for use in the kernel on return. | 
|  | 14 | * On SMP we know the VMX is free, since we give it up every | 
|  | 15 | * switch (ie, no lazy save of the vector registers). | 
|  | 16 | */ | 
|  | 17 | _GLOBAL(load_up_altivec) | 
|  | 18 | mfmsr	r5			/* grab the current MSR */ | 
|  | 19 | oris	r5,r5,MSR_VEC@h | 
|  | 20 | MTMSRD(r5)			/* enable use of AltiVec now */ | 
|  | 21 | isync | 
|  | 22 |  | 
|  | 23 | /* | 
|  | 24 | * For SMP, we don't do lazy VMX switching because it just gets too | 
|  | 25 | * horrendously complex, especially when a task switches from one CPU | 
|  | 26 | * to another.  Instead we call giveup_altvec in switch_to. | 
|  | 27 | * VRSAVE isn't dealt with here, that is done in the normal context | 
|  | 28 | * switch code. Note that we could rely on vrsave value to eventually | 
|  | 29 | * avoid saving all of the VREGs here... | 
|  | 30 | */ | 
|  | 31 | #ifndef CONFIG_SMP | 
|  | 32 | LOAD_REG_ADDRBASE(r3, last_task_used_altivec) | 
|  | 33 | toreal(r3) | 
|  | 34 | PPC_LL	r4,ADDROFF(last_task_used_altivec)(r3) | 
|  | 35 | PPC_LCMPI	0,r4,0 | 
|  | 36 | beq	1f | 
|  | 37 |  | 
|  | 38 | /* Save VMX state to last_task_used_altivec's THREAD struct */ | 
|  | 39 | toreal(r4) | 
|  | 40 | addi	r4,r4,THREAD | 
|  | 41 | SAVE_32VRS(0,r5,r4) | 
|  | 42 | mfvscr	vr0 | 
|  | 43 | li	r10,THREAD_VSCR | 
|  | 44 | stvx	vr0,r10,r4 | 
|  | 45 | /* Disable VMX for last_task_used_altivec */ | 
|  | 46 | PPC_LL	r5,PT_REGS(r4) | 
|  | 47 | toreal(r5) | 
|  | 48 | PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 49 | lis	r10,MSR_VEC@h | 
|  | 50 | andc	r4,r4,r10 | 
|  | 51 | PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 52 | 1: | 
|  | 53 | #endif /* CONFIG_SMP */ | 
|  | 54 |  | 
|  | 55 | /* Hack: if we get an altivec unavailable trap with VRSAVE | 
|  | 56 | * set to all zeros, we assume this is a broken application | 
|  | 57 | * that fails to set it properly, and thus we switch it to | 
|  | 58 | * all 1's | 
|  | 59 | */ | 
|  | 60 | mfspr	r4,SPRN_VRSAVE | 
|  | 61 | cmpdi	0,r4,0 | 
|  | 62 | bne+	1f | 
|  | 63 | li	r4,-1 | 
|  | 64 | mtspr	SPRN_VRSAVE,r4 | 
|  | 65 | 1: | 
|  | 66 | /* enable use of VMX after return */ | 
|  | 67 | #ifdef CONFIG_PPC32 | 
| Benjamin Herrenschmidt | ee43eb7 | 2009-07-14 20:52:54 +0000 | [diff] [blame] | 68 | mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */ | 
| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 69 | oris	r9,r9,MSR_VEC@h | 
|  | 70 | #else | 
|  | 71 | ld	r4,PACACURRENT(r13) | 
|  | 72 | addi	r5,r4,THREAD		/* Get THREAD */ | 
|  | 73 | oris	r12,r12,MSR_VEC@h | 
|  | 74 | std	r12,_MSR(r1) | 
|  | 75 | #endif | 
|  | 76 | li	r4,1 | 
|  | 77 | li	r10,THREAD_VSCR | 
|  | 78 | stw	r4,THREAD_USED_VR(r5) | 
|  | 79 | lvx	vr0,r10,r5 | 
|  | 80 | mtvscr	vr0 | 
|  | 81 | REST_32VRS(0,r4,r5) | 
|  | 82 | #ifndef CONFIG_SMP | 
| Andreas Schwab | 0115cb5 | 2009-07-10 11:17:36 +0000 | [diff] [blame] | 83 | /* Update last_task_used_altivec to 'current' */ | 
| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 84 | subi	r4,r5,THREAD		/* Back to 'current' */ | 
|  | 85 | fromreal(r4) | 
| Andreas Schwab | 0115cb5 | 2009-07-10 11:17:36 +0000 | [diff] [blame] | 86 | PPC_STL	r4,ADDROFF(last_task_used_altivec)(r3) | 
| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 87 | #endif /* CONFIG_SMP */ | 
|  | 88 | /* restore registers and return */ | 
|  | 89 | blr | 
|  | 90 |  | 
|  | 91 | /* | 
|  | 92 | * giveup_altivec(tsk) | 
|  | 93 | * Disable VMX for the task given as the argument, | 
|  | 94 | * and save the vector registers in its thread_struct. | 
|  | 95 | * Enables the VMX for use in the kernel on return. | 
|  | 96 | */ | 
|  | 97 | _GLOBAL(giveup_altivec) | 
|  | 98 | mfmsr	r5 | 
|  | 99 | oris	r5,r5,MSR_VEC@h | 
|  | 100 | SYNC | 
|  | 101 | MTMSRD(r5)			/* enable use of VMX now */ | 
|  | 102 | isync | 
|  | 103 | PPC_LCMPI	0,r3,0 | 
|  | 104 | beqlr-				/* if no previous owner, done */ | 
|  | 105 | addi	r3,r3,THREAD		/* want THREAD of task */ | 
|  | 106 | PPC_LL	r5,PT_REGS(r3) | 
|  | 107 | PPC_LCMPI	0,r5,0 | 
|  | 108 | SAVE_32VRS(0,r4,r3) | 
|  | 109 | mfvscr	vr0 | 
|  | 110 | li	r4,THREAD_VSCR | 
|  | 111 | stvx	vr0,r4,r3 | 
|  | 112 | beq	1f | 
|  | 113 | PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 114 | #ifdef CONFIG_VSX | 
|  | 115 | BEGIN_FTR_SECTION | 
|  | 116 | lis	r3,(MSR_VEC|MSR_VSX)@h | 
|  | 117 | FTR_SECTION_ELSE | 
|  | 118 | lis	r3,MSR_VEC@h | 
|  | 119 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) | 
|  | 120 | #else | 
|  | 121 | lis	r3,MSR_VEC@h | 
|  | 122 | #endif | 
|  | 123 | andc	r4,r4,r3		/* disable FP for previous task */ | 
|  | 124 | PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 125 | 1: | 
|  | 126 | #ifndef CONFIG_SMP | 
|  | 127 | li	r5,0 | 
|  | 128 | LOAD_REG_ADDRBASE(r4,last_task_used_altivec) | 
|  | 129 | PPC_STL	r5,ADDROFF(last_task_used_altivec)(r4) | 
|  | 130 | #endif /* CONFIG_SMP */ | 
|  | 131 | blr | 
|  | 132 |  | 
|  | 133 | #ifdef CONFIG_VSX | 
|  | 134 |  | 
|  | 135 | #ifdef CONFIG_PPC32 | 
|  | 136 | #error This asm code isn't ready for 32-bit kernels | 
|  | 137 | #endif | 
|  | 138 |  | 
|  | 139 | /* | 
|  | 140 | * load_up_vsx(unused, unused, tsk) | 
|  | 141 | * Disable VSX for the task which had it previously, | 
|  | 142 | * and save its vector registers in its thread_struct. | 
|  | 143 | * Reuse the fp and vsx saves, but first check to see if they have | 
|  | 144 | * been saved already. | 
|  | 145 | */ | 
|  | 146 | _GLOBAL(load_up_vsx) | 
|  | 147 | /* Load FP and VSX registers if they haven't been done yet */ | 
|  | 148 | andi.	r5,r12,MSR_FP | 
|  | 149 | beql+	load_up_fpu		/* skip if already loaded */ | 
|  | 150 | andis.	r5,r12,MSR_VEC@h | 
|  | 151 | beql+	load_up_altivec		/* skip if already loaded */ | 
|  | 152 |  | 
|  | 153 | #ifndef CONFIG_SMP | 
|  | 154 | ld	r3,last_task_used_vsx@got(r2) | 
|  | 155 | ld	r4,0(r3) | 
|  | 156 | cmpdi	0,r4,0 | 
|  | 157 | beq	1f | 
|  | 158 | /* Disable VSX for last_task_used_vsx */ | 
|  | 159 | addi	r4,r4,THREAD | 
|  | 160 | ld	r5,PT_REGS(r4) | 
|  | 161 | ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 162 | lis	r6,MSR_VSX@h | 
|  | 163 | andc	r6,r4,r6 | 
|  | 164 | std	r6,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 165 | 1: | 
|  | 166 | #endif /* CONFIG_SMP */ | 
|  | 167 | ld	r4,PACACURRENT(r13) | 
|  | 168 | addi	r4,r4,THREAD		/* Get THREAD */ | 
|  | 169 | li	r6,1 | 
|  | 170 | stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ | 
|  | 171 | /* enable use of VSX after return */ | 
|  | 172 | oris	r12,r12,MSR_VSX@h | 
|  | 173 | std	r12,_MSR(r1) | 
|  | 174 | #ifndef CONFIG_SMP | 
| Andreas Schwab | 0115cb5 | 2009-07-10 11:17:36 +0000 | [diff] [blame] | 175 | /* Update last_task_used_vsx to 'current' */ | 
| Benjamin Herrenschmidt | e821ea7 | 2009-06-02 21:17:37 +0000 | [diff] [blame] | 176 | ld	r4,PACACURRENT(r13) | 
|  | 177 | std	r4,0(r3) | 
|  | 178 | #endif /* CONFIG_SMP */ | 
|  | 179 | b	fast_exception_return | 
|  | 180 |  | 
|  | 181 | /* | 
|  | 182 | * __giveup_vsx(tsk) | 
|  | 183 | * Disable VSX for the task given as the argument. | 
|  | 184 | * Does NOT save vsx registers. | 
|  | 185 | * Enables the VSX for use in the kernel on return. | 
|  | 186 | */ | 
|  | 187 | _GLOBAL(__giveup_vsx) | 
|  | 188 | mfmsr	r5 | 
|  | 189 | oris	r5,r5,MSR_VSX@h | 
|  | 190 | mtmsrd	r5			/* enable use of VSX now */ | 
|  | 191 | isync | 
|  | 192 |  | 
|  | 193 | cmpdi	0,r3,0 | 
|  | 194 | beqlr-				/* if no previous owner, done */ | 
|  | 195 | addi	r3,r3,THREAD		/* want THREAD of task */ | 
|  | 196 | ld	r5,PT_REGS(r3) | 
|  | 197 | cmpdi	0,r5,0 | 
|  | 198 | beq	1f | 
|  | 199 | ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 200 | lis	r3,MSR_VSX@h | 
|  | 201 | andc	r4,r4,r3		/* disable VSX for previous task */ | 
|  | 202 | std	r4,_MSR-STACK_FRAME_OVERHEAD(r5) | 
|  | 203 | 1: | 
|  | 204 | #ifndef CONFIG_SMP | 
|  | 205 | li	r5,0 | 
|  | 206 | ld	r4,last_task_used_vsx@got(r2) | 
|  | 207 | std	r5,0(r4) | 
|  | 208 | #endif /* CONFIG_SMP */ | 
|  | 209 | blr | 
|  | 210 |  | 
|  | 211 | #endif /* CONFIG_VSX */ | 
|  | 212 |  | 
| Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 213 |  | 
|  | 214 | /* | 
|  | 215 | * The routines below are in assembler so we can closely control the | 
|  | 216 | * usage of floating-point registers.  These routines must be called | 
|  | 217 | * with preempt disabled. | 
|  | 218 | */ | 
|  | 219 | #ifdef CONFIG_PPC32 | 
|  | 220 | .data | 
|  | 221 | fpzero: | 
|  | 222 | .long	0 | 
|  | 223 | fpone: | 
|  | 224 | .long	0x3f800000	/* 1.0 in single-precision FP */ | 
|  | 225 | fphalf: | 
|  | 226 | .long	0x3f000000	/* 0.5 in single-precision FP */ | 
|  | 227 |  | 
|  | 228 | #define LDCONST(fr, name)	\ | 
|  | 229 | lis	r11,name@ha;	\ | 
|  | 230 | lfs	fr,name@l(r11) | 
|  | 231 | #else | 
|  | 232 |  | 
|  | 233 | .section ".toc","aw" | 
|  | 234 | fpzero: | 
|  | 235 | .tc	FD_0_0[TC],0 | 
|  | 236 | fpone: | 
|  | 237 | .tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */ | 
|  | 238 | fphalf: | 
|  | 239 | .tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */ | 
|  | 240 |  | 
|  | 241 | #define LDCONST(fr, name)	\ | 
|  | 242 | lfd	fr,name@toc(r2) | 
|  | 243 | #endif | 
|  | 244 |  | 
|  | 245 | .text | 
|  | 246 | /* | 
|  | 247 | * Internal routine to enable floating point and set FPSCR to 0. | 
|  | 248 | * Don't call it from C; it doesn't use the normal calling convention. | 
|  | 249 | */ | 
|  | 250 | fpenable: | 
|  | 251 | #ifdef CONFIG_PPC32 | 
|  | 252 | stwu	r1,-64(r1) | 
|  | 253 | #else | 
|  | 254 | stdu	r1,-64(r1) | 
|  | 255 | #endif | 
|  | 256 | mfmsr	r10 | 
|  | 257 | ori	r11,r10,MSR_FP | 
|  | 258 | mtmsr	r11 | 
|  | 259 | isync | 
|  | 260 | stfd	fr0,24(r1) | 
|  | 261 | stfd	fr1,16(r1) | 
|  | 262 | stfd	fr31,8(r1) | 
|  | 263 | LDCONST(fr1, fpzero) | 
|  | 264 | mffs	fr31 | 
| Anton Blanchard | 3a2c48c | 2006-06-10 20:18:39 +1000 | [diff] [blame] | 265 | MTFSF_L(fr1) | 
| Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 266 | blr | 
|  | 267 |  | 
|  | 268 | fpdisable: | 
|  | 269 | mtlr	r12 | 
| Anton Blanchard | 3a2c48c | 2006-06-10 20:18:39 +1000 | [diff] [blame] | 270 | MTFSF_L(fr31) | 
| Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 271 | lfd	fr31,8(r1) | 
|  | 272 | lfd	fr1,16(r1) | 
|  | 273 | lfd	fr0,24(r1) | 
|  | 274 | mtmsr	r10 | 
|  | 275 | isync | 
|  | 276 | addi	r1,r1,64 | 
|  | 277 | blr | 
|  | 278 |  | 
|  | 279 | /* | 
|  | 280 | * Vector add, floating point. | 
|  | 281 | */ | 
|  | 282 | _GLOBAL(vaddfp) | 
|  | 283 | mflr	r12 | 
|  | 284 | bl	fpenable | 
|  | 285 | li	r0,4 | 
|  | 286 | mtctr	r0 | 
|  | 287 | li	r6,0 | 
|  | 288 | 1:	lfsx	fr0,r4,r6 | 
|  | 289 | lfsx	fr1,r5,r6 | 
|  | 290 | fadds	fr0,fr0,fr1 | 
|  | 291 | stfsx	fr0,r3,r6 | 
|  | 292 | addi	r6,r6,4 | 
|  | 293 | bdnz	1b | 
|  | 294 | b	fpdisable | 
|  | 295 |  | 
|  | 296 | /* | 
|  | 297 | * Vector subtract, floating point. | 
|  | 298 | */ | 
|  | 299 | _GLOBAL(vsubfp) | 
|  | 300 | mflr	r12 | 
|  | 301 | bl	fpenable | 
|  | 302 | li	r0,4 | 
|  | 303 | mtctr	r0 | 
|  | 304 | li	r6,0 | 
|  | 305 | 1:	lfsx	fr0,r4,r6 | 
|  | 306 | lfsx	fr1,r5,r6 | 
|  | 307 | fsubs	fr0,fr0,fr1 | 
|  | 308 | stfsx	fr0,r3,r6 | 
|  | 309 | addi	r6,r6,4 | 
|  | 310 | bdnz	1b | 
|  | 311 | b	fpdisable | 
|  | 312 |  | 
|  | 313 | /* | 
|  | 314 | * Vector multiply and add, floating point. | 
|  | 315 | */ | 
|  | 316 | _GLOBAL(vmaddfp) | 
|  | 317 | mflr	r12 | 
|  | 318 | bl	fpenable | 
|  | 319 | stfd	fr2,32(r1) | 
|  | 320 | li	r0,4 | 
|  | 321 | mtctr	r0 | 
|  | 322 | li	r7,0 | 
|  | 323 | 1:	lfsx	fr0,r4,r7 | 
|  | 324 | lfsx	fr1,r5,r7 | 
|  | 325 | lfsx	fr2,r6,r7 | 
|  | 326 | fmadds	fr0,fr0,fr2,fr1 | 
|  | 327 | stfsx	fr0,r3,r7 | 
|  | 328 | addi	r7,r7,4 | 
|  | 329 | bdnz	1b | 
|  | 330 | lfd	fr2,32(r1) | 
|  | 331 | b	fpdisable | 
|  | 332 |  | 
|  | 333 | /* | 
|  | 334 | * Vector negative multiply and subtract, floating point. | 
|  | 335 | */ | 
|  | 336 | _GLOBAL(vnmsubfp) | 
|  | 337 | mflr	r12 | 
|  | 338 | bl	fpenable | 
|  | 339 | stfd	fr2,32(r1) | 
|  | 340 | li	r0,4 | 
|  | 341 | mtctr	r0 | 
|  | 342 | li	r7,0 | 
|  | 343 | 1:	lfsx	fr0,r4,r7 | 
|  | 344 | lfsx	fr1,r5,r7 | 
|  | 345 | lfsx	fr2,r6,r7 | 
|  | 346 | fnmsubs	fr0,fr0,fr2,fr1 | 
|  | 347 | stfsx	fr0,r3,r7 | 
|  | 348 | addi	r7,r7,4 | 
|  | 349 | bdnz	1b | 
|  | 350 | lfd	fr2,32(r1) | 
|  | 351 | b	fpdisable | 
|  | 352 |  | 
|  | 353 | /* | 
|  | 354 | * Vector reciprocal estimate.  We just compute 1.0/x. | 
|  | 355 | * r3 -> destination, r4 -> source. | 
|  | 356 | */ | 
|  | 357 | _GLOBAL(vrefp) | 
|  | 358 | mflr	r12 | 
|  | 359 | bl	fpenable | 
|  | 360 | li	r0,4 | 
|  | 361 | LDCONST(fr1, fpone) | 
|  | 362 | mtctr	r0 | 
|  | 363 | li	r6,0 | 
|  | 364 | 1:	lfsx	fr0,r4,r6 | 
|  | 365 | fdivs	fr0,fr1,fr0 | 
|  | 366 | stfsx	fr0,r3,r6 | 
|  | 367 | addi	r6,r6,4 | 
|  | 368 | bdnz	1b | 
|  | 369 | b	fpdisable | 
|  | 370 |  | 
|  | 371 | /* | 
|  | 372 | * Vector reciprocal square-root estimate, floating point. | 
|  | 373 | * We use the frsqrte instruction for the initial estimate followed | 
|  | 374 | * by 2 iterations of Newton-Raphson to get sufficient accuracy. | 
|  | 375 | * r3 -> destination, r4 -> source. | 
|  | 376 | */ | 
|  | 377 | _GLOBAL(vrsqrtefp) | 
|  | 378 | mflr	r12 | 
|  | 379 | bl	fpenable | 
|  | 380 | stfd	fr2,32(r1) | 
|  | 381 | stfd	fr3,40(r1) | 
|  | 382 | stfd	fr4,48(r1) | 
|  | 383 | stfd	fr5,56(r1) | 
|  | 384 | li	r0,4 | 
|  | 385 | LDCONST(fr4, fpone) | 
|  | 386 | LDCONST(fr5, fphalf) | 
|  | 387 | mtctr	r0 | 
|  | 388 | li	r6,0 | 
|  | 389 | 1:	lfsx	fr0,r4,r6 | 
|  | 390 | frsqrte	fr1,fr0		/* r = frsqrte(s) */ | 
|  | 391 | fmuls	fr3,fr1,fr0	/* r * s */ | 
|  | 392 | fmuls	fr2,fr1,fr5	/* r * 0.5 */ | 
|  | 393 | fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */ | 
|  | 394 | fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */ | 
|  | 395 | fmuls	fr3,fr1,fr0	/* r * s */ | 
|  | 396 | fmuls	fr2,fr1,fr5	/* r * 0.5 */ | 
|  | 397 | fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */ | 
|  | 398 | fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */ | 
|  | 399 | stfsx	fr1,r3,r6 | 
|  | 400 | addi	r6,r6,4 | 
|  | 401 | bdnz	1b | 
|  | 402 | lfd	fr5,56(r1) | 
|  | 403 | lfd	fr4,48(r1) | 
|  | 404 | lfd	fr3,40(r1) | 
|  | 405 | lfd	fr2,32(r1) | 
|  | 406 | b	fpdisable |