bionic: Do not use <sys/atomics.h> for platform code.

We're going to modify the __atomic_xxx implementation to provide
full memory barriers, to avoid problems for NDK machine code that
link to these functions.

First step is to remove their usage from our platform code.
We now use inlined versions of the same functions for a slight
performance boost.

+ remove obsolete atomics_x86.c (was never compiled)

NOTE: This improvement was benchmarked on various devices.
      Comparing a pthread mutex lock + atomic increment + unlock
      we get:

  - ARMv7 emulator, running on a 2.4 GHz Xeon:
       before: 396 ns    after: 288 ns

  - x86 emulator in KVM mode on same machine:
       before: 27 ns     after: 27 ns

  - Google Nexus S, in ARMv7 mode (single-core):
       before: 82 ns     after: 76 ns

  - Motorola Xoom, in ARMv7 mode (multi-core):
       before: 121 ns    after: 120 ns

The code has also been rebuilt in ARMv5TE mode for correctness.

Change-Id: Ic1dc72b173d59b2e7af901dd70d6a72fb2f64b17
diff --git a/libc/private/bionic_atomic_inline.h b/libc/private/bionic_atomic_inline.h
index 95766e1..821ad39 100644
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@@ -43,62 +43,21 @@
 extern "C" {
 #endif
 
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
+/* Define __ATOMIC_INLINE__ to control the inlining of all atomics
+ * functions declared here. For a slight performance boost, we want
+ * all of them to be always_inline
  */
+#define  __ATOMIC_INLINE__  static __inline__ __attribute__((always_inline))
 
 #ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#  include <bionic_atomic_arm.h>
+#elif defined(__i386__)
+#  include <bionic_atomic_x86.h>
 #else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
+#  include <bionic_atomic_gcc_builtin.h>
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
-#else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
-#endif
-
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
-#else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
-#endif
+#define ANDROID_MEMBAR_FULL  __bionic_memory_barrier
 
 #ifdef __cplusplus
 } // extern "C"