| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #include <linux/module.h> | 
 | 2 | #include <linux/types.h> | 
 | 3 | #include <linux/kernel.h> | 
 | 4 | #include <linux/sched.h> | 
 | 5 |  | 
 | 6 | #include <asm/uaccess.h> | 
 | 7 |  | 
 | 8 | #include "sfp-util.h" | 
 | 9 | #include <math-emu/soft-fp.h> | 
 | 10 | #include <math-emu/single.h> | 
 | 11 | #include <math-emu/double.h> | 
 | 12 |  | 
 | 13 | #define	OPC_PAL		0x00 | 
 | 14 | #define OPC_INTA	0x10 | 
 | 15 | #define OPC_INTL	0x11 | 
 | 16 | #define OPC_INTS	0x12 | 
 | 17 | #define OPC_INTM	0x13 | 
 | 18 | #define OPC_FLTC	0x14 | 
 | 19 | #define OPC_FLTV	0x15 | 
 | 20 | #define OPC_FLTI	0x16 | 
 | 21 | #define OPC_FLTL	0x17 | 
 | 22 | #define OPC_MISC	0x18 | 
 | 23 | #define	OPC_JSR		0x1a | 
 | 24 |  | 
 | 25 | #define FOP_SRC_S	0 | 
 | 26 | #define FOP_SRC_T	2 | 
 | 27 | #define FOP_SRC_Q	3 | 
 | 28 |  | 
 | 29 | #define FOP_FNC_ADDx	0 | 
 | 30 | #define FOP_FNC_CVTQL	0 | 
 | 31 | #define FOP_FNC_SUBx	1 | 
 | 32 | #define FOP_FNC_MULx	2 | 
 | 33 | #define FOP_FNC_DIVx	3 | 
 | 34 | #define FOP_FNC_CMPxUN	4 | 
 | 35 | #define FOP_FNC_CMPxEQ	5 | 
 | 36 | #define FOP_FNC_CMPxLT	6 | 
 | 37 | #define FOP_FNC_CMPxLE	7 | 
 | 38 | #define FOP_FNC_SQRTx	11 | 
 | 39 | #define FOP_FNC_CVTxS	12 | 
 | 40 | #define FOP_FNC_CVTxT	14 | 
 | 41 | #define FOP_FNC_CVTxQ	15 | 
 | 42 |  | 
 | 43 | #define MISC_TRAPB	0x0000 | 
 | 44 | #define MISC_EXCB	0x0400 | 
 | 45 |  | 
 | 46 | extern unsigned long alpha_read_fp_reg (unsigned long reg); | 
 | 47 | extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); | 
 | 48 | extern unsigned long alpha_read_fp_reg_s (unsigned long reg); | 
 | 49 | extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val); | 
 | 50 |  | 
 | 51 |  | 
 | 52 | #ifdef MODULE | 
 | 53 |  | 
 | 54 | MODULE_DESCRIPTION("FP Software completion module"); | 
 | 55 |  | 
 | 56 | extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); | 
 | 57 | extern long (*alpha_fp_emul) (unsigned long pc); | 
 | 58 |  | 
 | 59 | static long (*save_emul_imprecise)(struct pt_regs *, unsigned long); | 
 | 60 | static long (*save_emul) (unsigned long pc); | 
 | 61 |  | 
 | 62 | long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); | 
 | 63 | long do_alpha_fp_emul(unsigned long); | 
 | 64 |  | 
 | 65 | int init_module(void) | 
 | 66 | { | 
 | 67 | 	save_emul_imprecise = alpha_fp_emul_imprecise; | 
 | 68 | 	save_emul = alpha_fp_emul; | 
 | 69 | 	alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise; | 
 | 70 | 	alpha_fp_emul = do_alpha_fp_emul; | 
 | 71 | 	return 0; | 
 | 72 | } | 
 | 73 |  | 
 | 74 | void cleanup_module(void) | 
 | 75 | { | 
 | 76 | 	alpha_fp_emul_imprecise = save_emul_imprecise; | 
 | 77 | 	alpha_fp_emul = save_emul; | 
 | 78 | } | 
 | 79 |  | 
 | 80 | #undef  alpha_fp_emul_imprecise | 
 | 81 | #define alpha_fp_emul_imprecise		do_alpha_fp_emul_imprecise | 
 | 82 | #undef  alpha_fp_emul | 
 | 83 | #define alpha_fp_emul			do_alpha_fp_emul | 
 | 84 |  | 
 | 85 | #endif /* MODULE */ | 
 | 86 |  | 
 | 87 |  | 
 | 88 | /* | 
 | 89 |  * Emulate the floating point instruction at address PC.  Returns -1 if the | 
 | 90 |  * instruction to be emulated is illegal (such as with the opDEC trap), else | 
 | 91 |  * the SI_CODE for a SIGFPE signal, else 0 if everything's ok. | 
 | 92 |  * | 
 | 93 |  * Notice that the kernel does not and cannot use FP regs.  This is good | 
 | 94 |  * because it means that instead of saving/restoring all fp regs, we simply | 
 | 95 |  * stick the result of the operation into the appropriate register. | 
 | 96 |  */ | 
 | 97 | long | 
 | 98 | alpha_fp_emul (unsigned long pc) | 
 | 99 | { | 
 | 100 | 	FP_DECL_EX; | 
 | 101 | 	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); | 
 | 102 | 	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); | 
 | 103 |  | 
 | 104 | 	unsigned long fa, fb, fc, func, mode, src; | 
 | 105 | 	unsigned long res, va, vb, vc, swcr, fpcr; | 
 | 106 | 	__u32 insn; | 
 | 107 | 	long si_code; | 
 | 108 |  | 
 | 109 | 	get_user(insn, (__u32 __user *)pc); | 
 | 110 | 	fc     = (insn >>  0) & 0x1f;	/* destination register */ | 
 | 111 | 	fb     = (insn >> 16) & 0x1f; | 
 | 112 | 	fa     = (insn >> 21) & 0x1f; | 
 | 113 | 	func   = (insn >>  5) & 0xf; | 
 | 114 | 	src    = (insn >>  9) & 0x3; | 
 | 115 | 	mode   = (insn >> 11) & 0x3; | 
 | 116 | 	 | 
 | 117 | 	fpcr = rdfpcr(); | 
 | 118 | 	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr); | 
 | 119 |  | 
 | 120 | 	if (mode == 3) { | 
 | 121 | 		/* Dynamic -- get rounding mode from fpcr.  */ | 
 | 122 | 		mode = (fpcr >> FPCR_DYN_SHIFT) & 3; | 
 | 123 | 	} | 
 | 124 |  | 
 | 125 | 	switch (src) { | 
 | 126 | 	case FOP_SRC_S: | 
 | 127 | 		va = alpha_read_fp_reg_s(fa); | 
 | 128 | 		vb = alpha_read_fp_reg_s(fb); | 
 | 129 | 		 | 
 | 130 | 		FP_UNPACK_SP(SA, &va); | 
 | 131 | 		FP_UNPACK_SP(SB, &vb); | 
 | 132 |  | 
 | 133 | 		switch (func) { | 
 | 134 | 		case FOP_FNC_SUBx: | 
 | 135 | 			FP_SUB_S(SR, SA, SB); | 
 | 136 | 			goto pack_s; | 
 | 137 |  | 
 | 138 | 		case FOP_FNC_ADDx: | 
 | 139 | 			FP_ADD_S(SR, SA, SB); | 
 | 140 | 			goto pack_s; | 
 | 141 |  | 
 | 142 | 		case FOP_FNC_MULx: | 
 | 143 | 			FP_MUL_S(SR, SA, SB); | 
 | 144 | 			goto pack_s; | 
 | 145 |  | 
 | 146 | 		case FOP_FNC_DIVx: | 
 | 147 | 			FP_DIV_S(SR, SA, SB); | 
 | 148 | 			goto pack_s; | 
 | 149 |  | 
 | 150 | 		case FOP_FNC_SQRTx: | 
 | 151 | 			FP_SQRT_S(SR, SB); | 
 | 152 | 			goto pack_s; | 
 | 153 | 		} | 
 | 154 | 		goto bad_insn; | 
 | 155 |  | 
 | 156 | 	case FOP_SRC_T: | 
 | 157 | 		va = alpha_read_fp_reg(fa); | 
 | 158 | 		vb = alpha_read_fp_reg(fb); | 
 | 159 |  | 
 | 160 | 		if ((func & ~3) == FOP_FNC_CMPxUN) { | 
 | 161 | 			FP_UNPACK_RAW_DP(DA, &va); | 
 | 162 | 			FP_UNPACK_RAW_DP(DB, &vb); | 
 | 163 | 			if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) { | 
 | 164 | 				FP_SET_EXCEPTION(FP_EX_DENORM); | 
 | 165 | 				if (FP_DENORM_ZERO) | 
 | 166 | 					_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1); | 
 | 167 | 			} | 
 | 168 | 			if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) { | 
 | 169 | 				FP_SET_EXCEPTION(FP_EX_DENORM); | 
 | 170 | 				if (FP_DENORM_ZERO) | 
 | 171 | 					_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1); | 
 | 172 | 			} | 
 | 173 | 			FP_CMP_D(res, DA, DB, 3); | 
 | 174 | 			vc = 0x4000000000000000UL; | 
 | 175 | 			/* CMPTEQ, CMPTUN don't trap on QNaN, | 
 | 176 | 			   while CMPTLT and CMPTLE do */ | 
 | 177 | 			if (res == 3 | 
 | 178 | 			    && ((func & 3) >= 2 | 
 | 179 | 				|| FP_ISSIGNAN_D(DA) | 
 | 180 | 				|| FP_ISSIGNAN_D(DB))) { | 
 | 181 | 				FP_SET_EXCEPTION(FP_EX_INVALID); | 
 | 182 | 			} | 
 | 183 | 			switch (func) { | 
 | 184 | 			case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break; | 
 | 185 | 			case FOP_FNC_CMPxEQ: if (res) vc = 0; break; | 
 | 186 | 			case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break; | 
 | 187 | 			case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break; | 
 | 188 | 			} | 
 | 189 | 			goto done_d; | 
 | 190 | 		} | 
 | 191 |  | 
 | 192 | 		FP_UNPACK_DP(DA, &va); | 
 | 193 | 		FP_UNPACK_DP(DB, &vb); | 
 | 194 |  | 
 | 195 | 		switch (func) { | 
 | 196 | 		case FOP_FNC_SUBx: | 
 | 197 | 			FP_SUB_D(DR, DA, DB); | 
 | 198 | 			goto pack_d; | 
 | 199 |  | 
 | 200 | 		case FOP_FNC_ADDx: | 
 | 201 | 			FP_ADD_D(DR, DA, DB); | 
 | 202 | 			goto pack_d; | 
 | 203 |  | 
 | 204 | 		case FOP_FNC_MULx: | 
 | 205 | 			FP_MUL_D(DR, DA, DB); | 
 | 206 | 			goto pack_d; | 
 | 207 |  | 
 | 208 | 		case FOP_FNC_DIVx: | 
 | 209 | 			FP_DIV_D(DR, DA, DB); | 
 | 210 | 			goto pack_d; | 
 | 211 |  | 
 | 212 | 		case FOP_FNC_SQRTx: | 
 | 213 | 			FP_SQRT_D(DR, DB); | 
 | 214 | 			goto pack_d; | 
 | 215 |  | 
 | 216 | 		case FOP_FNC_CVTxS: | 
 | 217 | 			/* It is irritating that DEC encoded CVTST with | 
 | 218 | 			   SRC == T_floating.  It is also interesting that | 
 | 219 | 			   the bit used to tell the two apart is /U... */ | 
 | 220 | 			if (insn & 0x2000) { | 
 | 221 | 				FP_CONV(S,D,1,1,SR,DB); | 
 | 222 | 				goto pack_s; | 
 | 223 | 			} else { | 
 | 224 | 				vb = alpha_read_fp_reg_s(fb); | 
 | 225 | 				FP_UNPACK_SP(SB, &vb); | 
 | 226 | 				DR_c = DB_c; | 
 | 227 | 				DR_s = DB_s; | 
 | 228 | 				DR_e = DB_e; | 
 | 229 | 				DR_f = SB_f << (52 - 23); | 
 | 230 | 				goto pack_d; | 
 | 231 | 			} | 
 | 232 |  | 
 | 233 | 		case FOP_FNC_CVTxQ: | 
 | 234 | 			if (DB_c == FP_CLS_NAN | 
 | 235 | 			    && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) { | 
 | 236 | 			  /* AAHB Table B-2 says QNaN should not trigger INV */ | 
 | 237 | 				vc = 0; | 
 | 238 | 			} else | 
 | 239 | 				FP_TO_INT_ROUND_D(vc, DB, 64, 2); | 
 | 240 | 			goto done_d; | 
 | 241 | 		} | 
 | 242 | 		goto bad_insn; | 
 | 243 |  | 
 | 244 | 	case FOP_SRC_Q: | 
 | 245 | 		vb = alpha_read_fp_reg(fb); | 
 | 246 |  | 
 | 247 | 		switch (func) { | 
 | 248 | 		case FOP_FNC_CVTQL: | 
 | 249 | 			/* Notice: We can get here only due to an integer | 
 | 250 | 			   overflow.  Such overflows are reported as invalid | 
 | 251 | 			   ops.  We return the result the hw would have | 
 | 252 | 			   computed.  */ | 
 | 253 | 			vc = ((vb & 0xc0000000) << 32 |	/* sign and msb */ | 
 | 254 | 			      (vb & 0x3fffffff) << 29);	/* rest of the int */ | 
 | 255 | 			FP_SET_EXCEPTION (FP_EX_INVALID); | 
 | 256 | 			goto done_d; | 
 | 257 |  | 
 | 258 | 		case FOP_FNC_CVTxS: | 
 | 259 | 			FP_FROM_INT_S(SR, ((long)vb), 64, long); | 
 | 260 | 			goto pack_s; | 
 | 261 |  | 
 | 262 | 		case FOP_FNC_CVTxT: | 
 | 263 | 			FP_FROM_INT_D(DR, ((long)vb), 64, long); | 
 | 264 | 			goto pack_d; | 
 | 265 | 		} | 
 | 266 | 		goto bad_insn; | 
 | 267 | 	} | 
 | 268 | 	goto bad_insn; | 
 | 269 |  | 
 | 270 | pack_s: | 
 | 271 | 	FP_PACK_SP(&vc, SR); | 
 | 272 | 	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) | 
 | 273 | 		vc = 0; | 
 | 274 | 	alpha_write_fp_reg_s(fc, vc); | 
 | 275 | 	goto done; | 
 | 276 |  | 
 | 277 | pack_d: | 
 | 278 | 	FP_PACK_DP(&vc, DR); | 
 | 279 | 	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) | 
 | 280 | 		vc = 0; | 
 | 281 | done_d: | 
 | 282 | 	alpha_write_fp_reg(fc, vc); | 
 | 283 | 	goto done; | 
 | 284 |  | 
 | 285 | 	/* | 
 | 286 | 	 * Take the appropriate action for each possible | 
 | 287 | 	 * floating-point result: | 
 | 288 | 	 * | 
 | 289 | 	 *	- Set the appropriate bits in the FPCR | 
 | 290 | 	 *	- If the specified exception is enabled in the FPCR, | 
 | 291 | 	 *	  return.  The caller (entArith) will dispatch | 
 | 292 | 	 *	  the appropriate signal to the translated program. | 
 | 293 | 	 * | 
 | 294 | 	 * In addition, properly track the exception state in software | 
 | 295 | 	 * as described in the Alpha Architecture Handbook section 4.7.7.3. | 
 | 296 | 	 */ | 
 | 297 | done: | 
 | 298 | 	if (_fex) { | 
 | 299 | 		/* Record exceptions in software control word.  */ | 
 | 300 | 		swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); | 
 | 301 | 		current_thread_info()->ieee_state | 
 | 302 | 		  |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); | 
 | 303 |  | 
 | 304 | 		/* Update hardware control register.  */ | 
 | 305 | 		fpcr &= (~FPCR_MASK | FPCR_DYN_MASK); | 
 | 306 | 		fpcr |= ieee_swcr_to_fpcr(swcr); | 
 | 307 | 		wrfpcr(fpcr); | 
 | 308 |  | 
 | 309 | 		/* Do we generate a signal?  */ | 
 | 310 | 		_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK; | 
 | 311 | 		si_code = 0; | 
 | 312 | 		if (_fex) { | 
 | 313 | 			if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND; | 
 | 314 | 			if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES; | 
 | 315 | 			if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND; | 
 | 316 | 			if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF; | 
 | 317 | 			if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV; | 
 | 318 | 			if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV; | 
 | 319 | 		} | 
 | 320 |  | 
 | 321 | 		return si_code; | 
 | 322 | 	} | 
 | 323 |  | 
 | 324 | 	/* We used to write the destination register here, but DEC FORTRAN | 
 | 325 | 	   requires that the result *always* be written... so we do the write | 
 | 326 | 	   immediately after the operations above.  */ | 
 | 327 |  | 
 | 328 | 	return 0; | 
 | 329 |  | 
 | 330 | bad_insn: | 
 | 331 | 	printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n", | 
 | 332 | 	       insn, pc); | 
 | 333 | 	return -1; | 
 | 334 | } | 
 | 335 |  | 
 | 336 | long | 
 | 337 | alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) | 
 | 338 | { | 
 | 339 | 	unsigned long trigger_pc = regs->pc - 4; | 
 | 340 | 	unsigned long insn, opcode, rc, si_code = 0; | 
 | 341 |  | 
 | 342 | 	/* | 
 | 343 | 	 * Turn off the bits corresponding to registers that are the | 
 | 344 | 	 * target of instructions that set bits in the exception | 
 | 345 | 	 * summary register.  We have some slack doing this because a | 
 | 346 | 	 * register that is the target of a trapping instruction can | 
 | 347 | 	 * be written at most once in the trap shadow. | 
 | 348 | 	 * | 
 | 349 | 	 * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all | 
 | 350 | 	 * bound the trap shadow, so we need not look any further than | 
 | 351 | 	 * up to the first occurrence of such an instruction. | 
 | 352 | 	 */ | 
 | 353 | 	while (write_mask) { | 
 | 354 | 		get_user(insn, (__u32 __user *)(trigger_pc)); | 
 | 355 | 		opcode = insn >> 26; | 
 | 356 | 		rc = insn & 0x1f; | 
 | 357 |  | 
 | 358 | 		switch (opcode) { | 
 | 359 | 		      case OPC_PAL: | 
 | 360 | 		      case OPC_JSR: | 
 | 361 | 		      case 0x30 ... 0x3f:	/* branches */ | 
 | 362 | 			goto egress; | 
 | 363 |  | 
 | 364 | 		      case OPC_MISC: | 
 | 365 | 			switch (insn & 0xffff) { | 
 | 366 | 			      case MISC_TRAPB: | 
 | 367 | 			      case MISC_EXCB: | 
 | 368 | 				goto egress; | 
 | 369 |  | 
 | 370 | 			      default: | 
 | 371 | 				break; | 
 | 372 | 			} | 
 | 373 | 			break; | 
 | 374 |  | 
 | 375 | 		      case OPC_INTA: | 
 | 376 | 		      case OPC_INTL: | 
 | 377 | 		      case OPC_INTS: | 
 | 378 | 		      case OPC_INTM: | 
 | 379 | 			write_mask &= ~(1UL << rc); | 
 | 380 | 			break; | 
 | 381 |  | 
 | 382 | 		      case OPC_FLTC: | 
 | 383 | 		      case OPC_FLTV: | 
 | 384 | 		      case OPC_FLTI: | 
 | 385 | 		      case OPC_FLTL: | 
 | 386 | 			write_mask &= ~(1UL << (rc + 32)); | 
 | 387 | 			break; | 
 | 388 | 		} | 
 | 389 | 		if (!write_mask) { | 
 | 390 | 			/* Re-execute insns in the trap-shadow.  */ | 
 | 391 | 			regs->pc = trigger_pc + 4; | 
 | 392 | 			si_code = alpha_fp_emul(trigger_pc); | 
 | 393 | 			goto egress; | 
 | 394 | 		} | 
 | 395 | 		trigger_pc -= 4; | 
 | 396 | 	} | 
 | 397 |  | 
 | 398 | egress: | 
 | 399 | 	return si_code; | 
 | 400 | } |