Blame - arch/ppc64/kernel/vecemu.c - android_kernel_htc_msm8960

blob: cb207629f21f0f6917d3ee7754e48477b7204b74 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Routines to emulate some Altivec/VMX instructions, specifically
				3	* those that can trap when given denormalized operands in Java mode.
				4	*/
				5	#include <linux/kernel.h>
				6	#include <linux/errno.h>
				7	#include <linux/sched.h>
				8	#include <asm/ptrace.h>
				9	#include <asm/processor.h>
				10	#include <asm/uaccess.h>
				11
				12	/* Functions in vector.S */
				13	extern void vaddfp(vector128 dst, vector128 a, vector128 *b);
				14	extern void vsubfp(vector128 dst, vector128 a, vector128 *b);
				15	extern void vmaddfp(vector128 dst, vector128 a, vector128 b, vector128 c);
				16	extern void vnmsubfp(vector128 dst, vector128 a, vector128 b, vector128 c);
				17	extern void vrefp(vector128 dst, vector128 src);
				18	extern void vrsqrtefp(vector128 dst, vector128 src);
				19	extern void vexptep(vector128 dst, vector128 src);
				20
				21	static unsigned int exp2s[8] = {
				22	0x800000,
				23	0x8b95c2,
				24	0x9837f0,
				25	0xa5fed7,
				26	0xb504f3,
				27	0xc5672a,
				28	0xd744fd,
				29	0xeac0c7
				30	};
				31
				32	/*
				33	* Computes an estimate of 2^x. The `s' argument is the 32-bit
				34	* single-precision floating-point representation of x.
				35	*/
				36	static unsigned int eexp2(unsigned int s)
				37	{
				38	int exp, pwr;
				39	unsigned int mant, frac;
				40
				41	/* extract exponent field from input */
				42	exp = ((s >> 23) & 0xff) - 127;
				43	if (exp > 7) {
				44	/* check for NaN input */
				45	if (exp == 128 && (s & 0x7fffff) != 0)
				46	return s \| 0x400000; /* return QNaN */
				47	/* 2^-big = 0, 2^+big = +Inf */
				48	return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
				49	}
				50	if (exp < -23)
				51	return 0x3f800000; /* 1.0 */
				52
				53	/* convert to fixed point integer in 9.23 representation */
				54	pwr = (s & 0x7fffff) \| 0x800000;
				55	if (exp > 0)
				56	pwr <<= exp;
				57	else
				58	pwr >>= -exp;
				59	if (s & 0x80000000)
				60	pwr = -pwr;
				61
				62	/* extract integer part, which becomes exponent part of result */
				63	exp = (pwr >> 23) + 126;
				64	if (exp >= 254)
				65	return 0x7f800000;
				66	if (exp < -23)
				67	return 0;
				68
				69	/* table lookup on top 3 bits of fraction to get mantissa */
				70	mant = exp2s[(pwr >> 20) & 7];
				71
				72	/* linear interpolation using remaining 20 bits of fraction */
				73	asm("mulhwu %0,%1,%2" : "=r" (frac)
				74	: "r" (pwr << 12), "r" (0x172b83ff));
				75	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
				76	mant += frac;
				77
				78	if (exp >= 0)
				79	return mant + (exp << 23);
				80
				81	/* denormalized result */
				82	exp = -exp;
				83	mant += 1 << (exp - 1);
				84	return mant >> exp;
				85	}
				86
				87	/*
				88	* Computes an estimate of log_2(x). The `s' argument is the 32-bit
				89	* single-precision floating-point representation of x.
				90	*/
				91	static unsigned int elog2(unsigned int s)
				92	{
				93	int exp, mant, lz, frac;
				94
				95	exp = s & 0x7f800000;
				96	mant = s & 0x7fffff;
				97	if (exp == 0x7f800000) { /* Inf or NaN */
				98	if (mant != 0)
				99	s \|= 0x400000; /* turn NaN into QNaN */
				100	return s;
				101	}
				102	if ((exp \| mant) == 0) /* +0 or -0 */
				103	return 0xff800000; /* return -Inf */
				104
				105	if (exp == 0) {
				106	/* denormalized */
				107	asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
				108	mant <<= lz - 8;
				109	exp = (-118 - lz) << 23;
				110	} else {
				111	mant \|= 0x800000;
				112	exp -= 127 << 23;
				113	}
				114
				115	if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
				116	exp \|= 0x400000; /* 0.5 * 2^23 */
				117	asm("mulhwu %0,%1,%2" : "=r" (mant)
				118	: "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
				119	}
				120	if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
				121	exp \|= 0x200000; /* 0.25 * 2^23 */
				122	asm("mulhwu %0,%1,%2" : "=r" (mant)
				123	: "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
				124	}
				125	if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
				126	exp \|= 0x100000; /* 0.125 * 2^23 */
				127	asm("mulhwu %0,%1,%2" : "=r" (mant)
				128	: "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
				129	}
				130	if (mant > 0x800000) { /* 1.0 * 2^23 */
				131	/* calculate (mant - 1) * 1.381097463 */
				132	/* 1.381097463 == 0.125 / (2^0.125 - 1) */
				133	asm("mulhwu %0,%1,%2" : "=r" (frac)
				134	: "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
				135	exp += frac;
				136	}
				137	s = exp & 0x80000000;
				138	if (exp != 0) {
				139	if (s)
				140	exp = -exp;
				141	asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
				142	lz = 8 - lz;
				143	if (lz > 0)
				144	exp >>= lz;
				145	else if (lz < 0)
				146	exp <<= -lz;
				147	s += ((lz + 126) << 23) + exp;
				148	}
				149	return s;
				150	}
				151
				152	#define VSCR_SAT 1
				153
				154	static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
				155	{
				156	int exp, mant;
				157
				158	exp = (x >> 23) & 0xff;
				159	mant = x & 0x7fffff;
				160	if (exp == 255 && mant != 0)
				161	return 0; /* NaN -> 0 */
				162	exp = exp - 127 + scale;
				163	if (exp < 0)
				164	return 0; /* round towards zero */
				165	if (exp >= 31) {
				166	/* saturate, unless the result would be -2^31 */
				167	if (x + (scale << 23) != 0xcf000000)
				168	*vscrp \|= VSCR_SAT;
				169	return (x & 0x80000000)? 0x80000000: 0x7fffffff;
				170	}
				171	mant \|= 0x800000;
				172	mant = (mant << 7) >> (30 - exp);
				173	return (x & 0x80000000)? -mant: mant;
				174	}
				175
				176	static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
				177	{
				178	int exp;
				179	unsigned int mant;
				180
				181	exp = (x >> 23) & 0xff;
				182	mant = x & 0x7fffff;
				183	if (exp == 255 && mant != 0)
				184	return 0; /* NaN -> 0 */
				185	exp = exp - 127 + scale;
				186	if (exp < 0)
				187	return 0; /* round towards zero */
				188	if (x & 0x80000000) {
				189	/* negative => saturate to 0 */
				190	*vscrp \|= VSCR_SAT;
				191	return 0;
				192	}
				193	if (exp >= 32) {
				194	/* saturate */
				195	*vscrp \|= VSCR_SAT;
				196	return 0xffffffff;
				197	}
				198	mant \|= 0x800000;
				199	mant = (mant << 8) >> (31 - exp);
				200	return mant;
				201	}
				202
				203	/* Round to floating integer, towards 0 */
				204	static unsigned int rfiz(unsigned int x)
				205	{
				206	int exp;
				207
				208	exp = ((x >> 23) & 0xff) - 127;
				209	if (exp == 128 && (x & 0x7fffff) != 0)
				210	return x \| 0x400000; /* NaN -> make it a QNaN */
				211	if (exp >= 23)
				212	return x; /* it's an integer already (or Inf) */
				213	if (exp < 0)
				214	return x & 0x80000000; /* \|x\| < 1.0 rounds to 0 */
				215	return x & ~(0x7fffff >> exp);
				216	}
				217
				218	/* Round to floating integer, towards +/- Inf */
				219	static unsigned int rfii(unsigned int x)
				220	{
				221	int exp, mask;
				222
				223	exp = ((x >> 23) & 0xff) - 127;
				224	if (exp == 128 && (x & 0x7fffff) != 0)
				225	return x \| 0x400000; /* NaN -> make it a QNaN */
				226	if (exp >= 23)
				227	return x; /* it's an integer already (or Inf) */
				228	if ((x & 0x7fffffff) == 0)
				229	return x; /* +/-0 -> +/-0 */
				230	if (exp < 0)
				231	/* 0 < \|x\| < 1.0 rounds to +/- 1.0 */
				232	return (x & 0x80000000) \| 0x3f800000;
				233	mask = 0x7fffff >> exp;
				234	/* mantissa overflows into exponent - that's OK,
				235	it can't overflow into the sign bit */
				236	return (x + mask) & ~mask;
				237	}
				238
				239	/* Round to floating integer, to nearest */
				240	static unsigned int rfin(unsigned int x)
				241	{
				242	int exp, half;
				243
				244	exp = ((x >> 23) & 0xff) - 127;
				245	if (exp == 128 && (x & 0x7fffff) != 0)
				246	return x \| 0x400000; /* NaN -> make it a QNaN */
				247	if (exp >= 23)
				248	return x; /* it's an integer already (or Inf) */
				249	if (exp < -1)
				250	return x & 0x80000000; /* \|x\| < 0.5 -> +/-0 */
				251	if (exp == -1)
				252	/* 0.5 <= \|x\| < 1.0 rounds to +/- 1.0 */
				253	return (x & 0x80000000) \| 0x3f800000;
				254	half = 0x400000 >> exp;
				255	/* add 0.5 to the magnitude and chop off the fraction bits */
				256	return (x + half) & ~(0x7fffff >> exp);
				257	}
				258
				259	int
				260	emulate_altivec(struct pt_regs *regs)
				261	{
				262	unsigned int instr, i;
				263	unsigned int va, vb, vc, vd;
				264	vector128 *vrs;
				265
				266	if (get_user(instr, (unsigned int __user *) regs->nip))
				267	return -EFAULT;
				268	if ((instr >> 26) != 4)
				269	return -EINVAL; /* not an altivec instruction */
				270	vd = (instr >> 21) & 0x1f;
				271	va = (instr >> 16) & 0x1f;
				272	vb = (instr >> 11) & 0x1f;
				273	vc = (instr >> 6) & 0x1f;
				274
				275	vrs = current->thread.vr;
				276	switch (instr & 0x3f) {
				277	case 10:
				278	switch (vc) {
				279	case 0: /* vaddfp */
				280	vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
				281	break;
				282	case 1: /* vsubfp */
				283	vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
				284	break;
				285	case 4: /* vrefp */
				286	vrefp(&vrs[vd], &vrs[vb]);
				287	break;
				288	case 5: /* vrsqrtefp */
				289	vrsqrtefp(&vrs[vd], &vrs[vb]);
				290	break;
				291	case 6: /* vexptefp */
				292	for (i = 0; i < 4; ++i)
				293	vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
				294	break;
				295	case 7: /* vlogefp */
				296	for (i = 0; i < 4; ++i)
				297	vrs[vd].u[i] = elog2(vrs[vb].u[i]);
				298	break;
				299	case 8: /* vrfin */
				300	for (i = 0; i < 4; ++i)
				301	vrs[vd].u[i] = rfin(vrs[vb].u[i]);
				302	break;
				303	case 9: /* vrfiz */
				304	for (i = 0; i < 4; ++i)
				305	vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
				306	break;
				307	case 10: /* vrfip */
				308	for (i = 0; i < 4; ++i) {
				309	u32 x = vrs[vb].u[i];
				310	x = (x & 0x80000000)? rfiz(x): rfii(x);
				311	vrs[vd].u[i] = x;
				312	}
				313	break;
				314	case 11: /* vrfim */
				315	for (i = 0; i < 4; ++i) {
				316	u32 x = vrs[vb].u[i];
				317	x = (x & 0x80000000)? rfii(x): rfiz(x);
				318	vrs[vd].u[i] = x;
				319	}
				320	break;
				321	case 14: /* vctuxs */
				322	for (i = 0; i < 4; ++i)
				323	vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
				324	&current->thread.vscr.u[3]);
				325	break;
				326	case 15: /* vctsxs */
				327	for (i = 0; i < 4; ++i)
				328	vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
				329	&current->thread.vscr.u[3]);
				330	break;
				331	default:
				332	return -EINVAL;
				333	}
				334	break;
				335	case 46: /* vmaddfp */
				336	vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
				337	break;
				338	case 47: /* vnmsubfp */
				339	vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
				340	break;
				341	default:
				342	return -EINVAL;
				343	}
				344
				345	return 0;
				346	}