Bionic: x86: Fix libm macro definitions

http://code.google.com/p/android/issues/detail?id=19276

GCC would remove inline asm due to lack of knowledge
of FPU register changes.

Change-Id: I9f9e8623fa6580843b7cd8178439ace8c2db2d51
Signed-off-by: Mark D Horn <mark.d.horn@intel.com>
Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
Author: Jingwei Zhang <jingwei.zhang@intel.com>
diff --git a/libm/i387/fenv.c b/libm/i387/fenv.c
index aabe270..89ddc55 100644
--- a/libm/i387/fenv.c
+++ b/libm/i387/fenv.c
@@ -31,16 +31,46 @@
 #include "npx.h"
 #include "fenv.h"
 
+/*
+ * As compared to the x87 control word, the SSE unit's control word
+ * has the rounding control bits offset by 3 and the exception mask
+ * bits offset by 7.
+ */
+#define	_SSE_ROUND_SHIFT	3
+#define	_SSE_EMASK_SHIFT	7
+
 const fenv_t __fe_dfl_env = {
-	__INITIAL_NPXCW__,
-	0x0000,
-	0x0000,
-	0x1f80,
-	0xffffffff,
+	__INITIAL_NPXCW__, /*__control*/
+	0x0000,            /*__mxcsr_hi*/
+	0x0000,            /*__status*/
+	0x1f80,            /*__mxcsr_lo*/
+	0xffffffff,        /*__tag*/
 	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff }
+	  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
 };
 
+#define __fldcw(__cw)           __asm __volatile("fldcw %0" : : "m" (__cw))
+#define __fldenv(__env)         __asm __volatile("fldenv %0" : : "m" (__env))
+#define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
+				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
+				"st(5)", "st(6)", "st(7)")
+#define __fnclex()              __asm __volatile("fnclex")
+#define __fnstenv(__env)        __asm __volatile("fnstenv %0" : "=m" (*(__env)))
+#define __fnstcw(__cw)          __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
+#define __fnstsw(__sw)          __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
+#define __fwait()               __asm __volatile("fwait")
+#define __ldmxcsr(__csr)        __asm __volatile("ldmxcsr %0" : : "m" (__csr))
+#define __stmxcsr(__csr)        __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
+
+/* After testing for SSE support once, we cache the result in __has_sse. */
+enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
+#ifdef __SSE__
+#define __HAS_SSE()     1
+#else
+#define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \
+                         (__has_sse == __SSE_UNK && __test_sse()))
+#endif
+
 enum __sse_support __has_sse =
 #ifdef __SSE__
 	__SSE_YES;
@@ -48,6 +78,7 @@
 	__SSE_UNK;
 #endif
 
+#ifndef __SSE__
 #define	getfl(x)	__asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
 #define	setfl(x)	__asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
 #define	cpuid_dx(x)	__asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \
@@ -82,23 +113,27 @@
 	__has_sse = __SSE_NO;
 	return (0);
 }
+#endif /* __SSE__ */
 
 int
 fesetexceptflag(const fexcept_t *flagp, int excepts)
 {
 	fenv_t env;
-	int mxcsr;
+	__uint32_t mxcsr;
 
-	__fnstenv(&env);
-	env.__status &= ~excepts;
-	env.__status |= *flagp & excepts;
-	__fldenv(env);
-
-	if (__HAS_SSE()) {
-		__stmxcsr(&mxcsr);
-		mxcsr &= ~excepts;
-		mxcsr |= *flagp & excepts;
-		__ldmxcsr(mxcsr);
+	excepts &= FE_ALL_EXCEPT;
+	if (excepts) { /* Do nothing if excepts is 0 */
+		__fnstenv(&env);
+		env.__status &= ~excepts;
+		env.__status |= *flagp & excepts;
+		__fnclex();
+		__fldenv(env);
+		if (__HAS_SSE()) {
+			__stmxcsr(&mxcsr);
+			mxcsr &= ~excepts;
+			mxcsr |= *flagp & excepts;
+			__ldmxcsr(mxcsr);
+		}
 	}
 
 	return (0);
@@ -117,32 +152,38 @@
 int
 fegetenv(fenv_t *envp)
 {
-	int control, mxcsr;
+	__uint32_t mxcsr;
 
-	/*
-	 * fnstenv masks all exceptions, so we need to save and
-	 * restore the control word to avoid this side effect.
-	 */
-	__fnstcw(&control);
 	__fnstenv(envp);
+	/*
+	 * fnstenv masks all exceptions, so we need to restore
+	 * the old control word to avoid this side effect.
+	 */
+	__fldcw(envp->__control);
 	if (__HAS_SSE()) {
 		__stmxcsr(&mxcsr);
-		__set_mxcsr(*envp, mxcsr);
+		envp->__mxcsr_hi = mxcsr >> 16;
+		envp->__mxcsr_lo = mxcsr & 0xffff;
 	}
-	__fldcw(control);
 	return (0);
 }
 
 int
 feholdexcept(fenv_t *envp)
 {
-	int mxcsr;
+	__uint32_t mxcsr;
+	fenv_t env;
 
-	__fnstenv(envp);
+	__fnstenv(&env);
+	*envp = env;
+	env.__status &= ~FE_ALL_EXCEPT;
+	env.__control |= FE_ALL_EXCEPT;
 	__fnclex();
+	__fldenv(env);
 	if (__HAS_SSE()) {
 		__stmxcsr(&mxcsr);
-		__set_mxcsr(*envp, mxcsr);
+		envp->__mxcsr_hi = mxcsr >> 16;
+		envp->__mxcsr_lo = mxcsr & 0xffff;
 		mxcsr &= ~FE_ALL_EXCEPT;
 		mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
 		__ldmxcsr(mxcsr);
@@ -153,60 +194,198 @@
 int
 feupdateenv(const fenv_t *envp)
 {
-	int mxcsr;
-	short status;
+	__uint32_t mxcsr;
+	__uint16_t status;
 
 	__fnstsw(&status);
-	if (__HAS_SSE())
+	if (__HAS_SSE()) {
 		__stmxcsr(&mxcsr);
-	else
+	} else {
 		mxcsr = 0;
+	}
 	fesetenv(envp);
 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
 	return (0);
 }
 
 int
-__feenableexcept(int mask)
+feenableexcept(int mask)
 {
-	int mxcsr, control, omask;
+	__uint32_t mxcsr;
+	__uint16_t control, omask;
 
 	mask &= FE_ALL_EXCEPT;
 	__fnstcw(&control);
-	if (__HAS_SSE())
-		__stmxcsr(&mxcsr);
-	else
-		mxcsr = 0;
-	omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
-	control &= ~mask;
-	__fldcw(control);
 	if (__HAS_SSE()) {
-		mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
-		__ldmxcsr(mxcsr);
+		__stmxcsr(&mxcsr);
+	} else {
+		mxcsr = 0;
 	}
-	return (~omask);
+	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+	if (mask) {
+		control &= ~mask;
+		__fldcw(control);
+		if (__HAS_SSE()) {
+			mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
+			__ldmxcsr(mxcsr);
+		}
+	}
+	return (omask);
 }
 
 int
-__fedisableexcept(int mask)
+fedisableexcept(int mask)
 {
-	int mxcsr, control, omask;
+	__uint32_t mxcsr;
+	__uint16_t control, omask;
 
 	mask &= FE_ALL_EXCEPT;
 	__fnstcw(&control);
-	if (__HAS_SSE())
-		__stmxcsr(&mxcsr);
-	else
-		mxcsr = 0;
-	omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
-	control |= mask;
-	__fldcw(control);
 	if (__HAS_SSE()) {
-		mxcsr |= mask << _SSE_EMASK_SHIFT;
-		__ldmxcsr(mxcsr);
+		__stmxcsr(&mxcsr);
+	} else {
+		mxcsr = 0;
 	}
-	return (~omask);
+	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+	if (mask) {
+		control |= mask;
+		__fldcw(control);
+		if (__HAS_SSE()) {
+			mxcsr |= mask << _SSE_EMASK_SHIFT;
+			__ldmxcsr(mxcsr);
+		}
+	}
+	return (omask);
 }
 
-__weak_reference(__feenableexcept, feenableexcept);
-__weak_reference(__fedisableexcept, fedisableexcept);
+int
+feclearexcept(int excepts)
+{
+	fenv_t env;
+	__uint32_t mxcsr;
+
+	excepts &= FE_ALL_EXCEPT;
+	if (excepts) { /* Do nothing if excepts is 0 */
+		__fnstenv(&env);
+		env.__status &= ~excepts;
+		__fnclex();
+		__fldenv(env);
+		if (__HAS_SSE()) {
+			__stmxcsr(&mxcsr);
+			mxcsr &= ~excepts;
+			__ldmxcsr(mxcsr);
+		}
+	}
+	return (0);
+}
+
+int
+fegetexceptflag(fexcept_t *flagp, int excepts)
+{
+	__uint32_t mxcsr;
+	__uint16_t status;
+
+	excepts &= FE_ALL_EXCEPT;
+	__fnstsw(&status);
+	if (__HAS_SSE()) {
+		__stmxcsr(&mxcsr);
+	} else {
+		mxcsr = 0;
+	}
+	*flagp = (status | mxcsr) & excepts;
+	return (0);
+}
+
+int
+fetestexcept(int excepts)
+{
+	__uint32_t mxcsr;
+	__uint16_t status;
+
+	excepts &= FE_ALL_EXCEPT;
+	if (excepts) { /* Do nothing if excepts is 0 */
+		__fnstsw(&status);
+		if (__HAS_SSE()) {
+			__stmxcsr(&mxcsr);
+		} else {
+			mxcsr = 0;
+		}
+		return ((status | mxcsr) & excepts);
+	}
+	return (0);
+}
+
+int
+fegetround(void)
+{
+	__uint16_t control;
+
+	/*
+	 * We assume that the x87 and the SSE unit agree on the
+	 * rounding mode.  Reading the control word on the x87 turns
+	 * out to be about 5 times faster than reading it on the SSE
+	 * unit on an Opteron 244.
+	 */
+	__fnstcw(&control);
+	return (control & _ROUND_MASK);
+}
+
+int
+fesetround(int round)
+{
+	__uint32_t mxcsr;
+	__uint16_t control;
+
+	if (round & ~_ROUND_MASK) {
+		return (-1);
+	} else {
+		__fnstcw(&control);
+		control &= ~_ROUND_MASK;
+		control |= round;
+		__fldcw(control);
+		if (__HAS_SSE()) {
+			__stmxcsr(&mxcsr);
+			mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
+			mxcsr |= round << _SSE_ROUND_SHIFT;
+			__ldmxcsr(mxcsr);
+		}
+		return (0);
+	}
+}
+
+int
+fesetenv(const fenv_t *envp)
+{
+	fenv_t env = *envp;
+	__uint32_t mxcsr;
+
+	mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
+	env.__mxcsr_hi = 0xffff;
+	env.__mxcsr_lo = 0xffff;
+	/*
+	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
+	 * instruction clobbers the i387 register stack.  This happens because
+	 * we restore the tag word from the saved environment.  Normally, this
+	 * would happen anyway and we wouldn't care, because the ABI allows
+	 * function calls to clobber the i387 regs.  However, fesetenv() is
+	 * inlined, so we need to be more careful.
+	 */
+	__fldenvx(env);
+	if (__HAS_SSE()) {
+		__ldmxcsr(mxcsr);
+	}
+	return (0);
+}
+
+int
+fegetexcept(void)
+{
+	__uint16_t control;
+
+	/*
+	 * We assume that the masks for the x87 and the SSE unit are
+	 * the same.
+	 */
+	__fnstcw(&control);
+	return (~control & FE_ALL_EXCEPT);
+}