libm: Add hardware sqrt, ceil, floor and trunc for x86 & x86_64

Add hardware implementations for sqrt, ceil, floor and trunc for
x86 and x86_64. These routines, and in particular sqrt are much
faster than the BSD C language versions of these functions.

Fixed whitespace errors.

Revised x86 versions with respect to alignment.
Rebased for Android 5.0

Change-Id: I86bdb520ce5e589b0cf63778f353fbd3263c8f0e
Author: James Rose <james.rose@intel.com>
Signed-off-by: James Rose <james.rose@intel.com>
diff --git a/libm/Android.mk b/libm/Android.mk
index 15c390c..418ec59 100644
--- a/libm/Android.mk
+++ b/libm/Android.mk
@@ -331,45 +331,73 @@
 # -----------------------------------------------------------------------------
 LOCAL_SRC_FILES_x86 += \
     i387/fenv.c \
-    upstream-freebsd/lib/msun/src/e_sqrt.c \
-    upstream-freebsd/lib/msun/src/e_sqrtf.c \
-    upstream-freebsd/lib/msun/src/s_ceil.c \
-    upstream-freebsd/lib/msun/src/s_ceilf.c \
     upstream-freebsd/lib/msun/src/s_fma.c \
     upstream-freebsd/lib/msun/src/s_fmaf.c \
-    upstream-freebsd/lib/msun/src/s_floor.c \
-    upstream-freebsd/lib/msun/src/s_floorf.c \
     upstream-freebsd/lib/msun/src/s_llrint.c \
     upstream-freebsd/lib/msun/src/s_llrintf.c \
     upstream-freebsd/lib/msun/src/s_lrint.c \
     upstream-freebsd/lib/msun/src/s_lrintf.c \
     upstream-freebsd/lib/msun/src/s_rint.c \
     upstream-freebsd/lib/msun/src/s_rintf.c \
+    x86/sqrt.S \
+    x86/sqrtf.S \
+
+ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
+LOCAL_SRC_FILES_x86 += \
+    x86/ceil.S \
+    x86/ceilf.S \
+    x86/floor.S \
+    x86/floorf.S \
+    x86/trunc.S \
+    x86/truncf.S \
+
+else
+LOCAL_SRC_FILES_x86 += \
+    upstream-freebsd/lib/msun/src/s_ceil.c \
+    upstream-freebsd/lib/msun/src/s_ceilf.c \
+    upstream-freebsd/lib/msun/src/s_floor.c \
+    upstream-freebsd/lib/msun/src/s_floorf.c \
     upstream-freebsd/lib/msun/src/s_trunc.c \
     upstream-freebsd/lib/msun/src/s_truncf.c \
 
+endif
+
 # -----------------------------------------------------------------------------
 # x86_64
 # -----------------------------------------------------------------------------
 LOCAL_SRC_FILES_x86_64 += \
     amd64/fenv.c \
-    upstream-freebsd/lib/msun/src/e_sqrt.c \
-    upstream-freebsd/lib/msun/src/e_sqrtf.c \
-    upstream-freebsd/lib/msun/src/s_ceil.c \
-    upstream-freebsd/lib/msun/src/s_ceilf.c \
     upstream-freebsd/lib/msun/src/s_fma.c \
     upstream-freebsd/lib/msun/src/s_fmaf.c \
-    upstream-freebsd/lib/msun/src/s_floor.c \
-    upstream-freebsd/lib/msun/src/s_floorf.c \
     upstream-freebsd/lib/msun/src/s_llrint.c \
     upstream-freebsd/lib/msun/src/s_llrintf.c \
     upstream-freebsd/lib/msun/src/s_lrint.c \
     upstream-freebsd/lib/msun/src/s_lrintf.c \
     upstream-freebsd/lib/msun/src/s_rint.c \
     upstream-freebsd/lib/msun/src/s_rintf.c \
+    x86_64/sqrt.S \
+    x86_64/sqrtf.S \
+
+ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
+LOCAL_SRC_FILES_x86_64 += \
+    x86_64/ceil.S \
+    x86_64/ceilf.S \
+    x86_64/floor.S \
+    x86_64/floorf.S \
+    x86_64/trunc.S \
+    x86_64/truncf.S \
+
+else
+LOCAL_SRC_FILES_x86_64 += \
+    upstream-freebsd/lib/msun/src/s_ceil.c \
+    upstream-freebsd/lib/msun/src/s_ceilf.c \
+    upstream-freebsd/lib/msun/src/s_floor.c \
+    upstream-freebsd/lib/msun/src/s_floorf.c \
     upstream-freebsd/lib/msun/src/s_trunc.c \
     upstream-freebsd/lib/msun/src/s_truncf.c \
 
+endif
+
 LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
 
 LOCAL_C_INCLUDES += $(LOCAL_PATH)/upstream-freebsd/lib/msun/src/