Merge "Add tgkill syscall."
diff --git a/libc/bionic/atomics_x86.c b/libc/bionic/atomics_x86.c
deleted file mode 100644
index fd60f4f..0000000
--- a/libc/bionic/atomics_x86.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/atomics.h>
-
-#define FUTEX_SYSCALL 240
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-int __futex_wait(volatile void *ftx, int val)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAIT),
-          "d" (val),
-          "S" (0)
-    );
-    return ret;
-}
-
-int __futex_wake(volatile void *ftx, int count)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAKE),
-          "d" (count)
-    );
-    return ret;
-}
-
-int __atomic_cmpxchg(int old, int new, volatile int* addr) {
-    int xchg;
-    asm volatile (
-        "lock;"
-        "cmpxchg %%ecx, (%%edx);"
-        "setne %%al;"
-        : "=a" (xchg)
-        : "a" (old),
-          "c" (new),
-          "d" (addr)
-    );
-    return xchg;
-}
-
-int __atomic_swap(int new, volatile int* addr) {
-    int old;
-    asm volatile (
-        "lock;"
-        "xchg %%ecx, (%%edx);"
-        : "=c" (old)
-        : "c" (new),
-          "d" (addr)
-    );
-    return old;
-}
-
-int __atomic_dec(volatile int* addr) {
-    int old;
-    do {
-        old = *addr;
-    } while (atomic_cmpxchg(old, old-1, addr));
-    return old;
-}
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index 10fb7b5..63d885a 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -81,9 +81,6 @@
 #define PTHREAD_ATTR_FLAG_USER_STACK    0x00000002
 
 #define DEFAULT_STACKSIZE (1024 * 1024)
-#define STACKBASE 0x10000000
-
-static uint8_t * gStackBase = (uint8_t *)STACKBASE;
 
 static pthread_mutex_t mmap_lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -252,7 +249,7 @@
 
     pthread_mutex_lock(&mmap_lock);
 
-    stack = mmap((void *)gStackBase, size,
+    stack = mmap(NULL, size,
                  PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
                  -1, 0);
@@ -692,7 +689,7 @@
             goto Exit;
         }
     }
-    while ( __atomic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
+    while ( __bionic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
                               (volatile int*)&thread->attr.flags ) != 0 );
 Exit:
     pthread_mutex_unlock(&gThreadListLock);
@@ -926,17 +923,17 @@
     int  shared = mutex->value & MUTEX_SHARED_MASK;
     /*
      * The common case is an unlocked mutex, so we begin by trying to
-     * change the lock's state from 0 to 1.  __atomic_cmpxchg() returns 0
+     * change the lock's state from 0 to 1.  __bionic_cmpxchg() returns 0
      * if it made the swap successfully.  If the result is nonzero, this
      * lock is already held by another thread.
      */
-    if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
+    if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
         /*
          * We want to go to sleep until the mutex is available, which
          * requires promoting it to state 2.  We need to swap in the new
          * state value and then wait until somebody wakes us up.
          *
-         * __atomic_swap() returns the previous value.  We swap 2 in and
+         * __bionic_swap() returns the previous value.  We swap 2 in and
          * see if we got zero back; if so, we have acquired the lock.  If
          * not, another thread still holds the lock and we wait again.
          *
@@ -947,7 +944,7 @@
          * that the mutex is in state 2 when we go to sleep on it, which
          * guarantees a wake-up call.
          */
-        while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
+        while (__bionic_swap(shared|2, &mutex->value ) != (shared|0))
             __futex_wait_ex(&mutex->value, shared, shared|2, 0);
     }
     ANDROID_MEMBAR_FULL();
@@ -967,10 +964,10 @@
 
     /*
      * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
-     * to release the lock.  __atomic_dec() returns the previous value;
+     * to release the lock.  __bionic_atomic_dec() returns the previous value;
      * if it wasn't 1 we have to do some additional work.
      */
-    if (__atomic_dec(&mutex->value) != (shared|1)) {
+    if (__bionic_atomic_dec(&mutex->value) != (shared|1)) {
         /*
          * Start by releasing the lock.  The decrement changed it from
          * "contended lock" to "uncontended lock", which means we still
@@ -1158,7 +1155,7 @@
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
@@ -1256,13 +1253,13 @@
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
         /* fast path for uncontended lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
 
         /* loop while needed */
-        while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
+        while (__bionic_swap(shared|2, &mutex->value) != (shared|0)) {
             if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
                 return EBUSY;
 
@@ -1431,7 +1428,7 @@
         long oldval = cond->value;
         long newval = ((oldval - COND_COUNTER_INCREMENT) & COND_COUNTER_MASK)
                       | flags;
-        if (__atomic_cmpxchg(oldval, newval, &cond->value) == 0)
+        if (__bionic_cmpxchg(oldval, newval, &cond->value) == 0)
             break;
     }
 
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index 96819ae..9bc8412 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -174,7 +174,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
     return ret;
@@ -198,7 +198,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
 
@@ -235,7 +235,7 @@
         else
             new = SEMCOUNT_INCREMENT(old);
     }
-    while ( __atomic_cmpxchg((int)(old|shared),
+    while ( __bionic_cmpxchg((int)(old|shared),
                              (int)(new|shared),
                              (volatile int*)pvalue) != 0);
 
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 9d05769..2015ac0 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -42,9 +42,13 @@
     int volatile value;
 } pthread_mutex_t;
 
-#define  PTHREAD_MUTEX_INITIALIZER             {0}
-#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {0x4000}
-#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {0x8000}
+#define  __PTHREAD_MUTEX_INIT_VALUE            0
+#define  __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE  0x4000
+#define  __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE 0x8000
+
+#define  PTHREAD_MUTEX_INITIALIZER             {__PTHREAD_MUTEX_INIT_VALUE}
+#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {__PTHREAD_RECURSIVE_MUTEX_INIT_VALUE}
+#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {__PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE}
 
 enum {
     PTHREAD_MUTEX_NORMAL = 0,
diff --git a/libc/kernel/common/media/soc2030.h b/libc/kernel/common/media/soc2030.h
index ad0ddfc..850ab13 100644
--- a/libc/kernel/common/media/soc2030.h
+++ b/libc/kernel/common/media/soc2030.h
@@ -12,7 +12,7 @@
 #ifndef __SOC2030_H__
 #define __SOC2030_H__
 
-#include <linux/ioctl.h>  
+#include <linux/ioctl.h>
 
 #define SOC2030_IOCTL_SET_MODE _IOWR('o', 1, struct soc2030_mode)
 #define SOC2030_IOCTL_GET_STATUS _IOC(_IOC_READ, 'o', 2, 10)
@@ -22,10 +22,11 @@
 #define SOC2030_IOCTL_SET_EFFECT _IOWR('o', 6, unsigned int)
 #define SOC2030_IOCTL_SET_WHITEBALANCE _IOWR('o', 7, unsigned int)
 #define SOC2030_IOCTL_SET_EXP_COMP _IOWR('o', 8, int)
+#define SOC2030_IOCTL_SET_LOCK _IOWR('o', 9, struct soc2030_lock)
 
 #define SOC2030_POLL_WAITMS 50
 #define SOC2030_MAX_RETRIES 3
-#define SOC2030_POLL_RETRIES 5
+#define SOC2030_POLL_RETRIES 7
 
 #define SOC2030_MAX_PRIVATE_SIZE 1024
 #define SOC2030_MAX_NUM_MODES 6
@@ -45,8 +46,21 @@
  WRITE_VAR_DATA,
  POLL_VAR_DATA,
  DELAY_MS,
+ WRITE_REG_VAR1,
+ WRITE_REG_VAR2,
+ WRITE_REG_VAR3,
+ WRITE_REG_VAR4,
+ READ_REG_VAR1,
+ READ_REG_VAR2,
+ READ_REG_VAR3,
+ READ_REG_VAR4,
 };
 
+#define REG_VAR1 (READ_REG_VAR1 - READ_REG_VAR1)
+#define REG_VAR2 (READ_REG_VAR2 - READ_REG_VAR1)
+#define REG_VAR3 (READ_REG_VAR3 - READ_REG_VAR1)
+#define REG_VAR4 (READ_REG_VAR4 - READ_REG_VAR1)
+
 enum {
  EFFECT_NONE,
  EFFECT_BW,
@@ -74,6 +88,14 @@
  __u16 val;
 };
 
+struct soc2030_lock {
+        __u8 aelock;
+        __u8 aerelock;
+        __u8 awblock;
+        __u8 awbrelock;
+        __u8 previewactive;
+};
+
 struct soc2030_mode {
  int xres;
  int yres;
@@ -82,5 +104,3 @@
 };
 
 #endif
-
-
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
new file mode 100644
index 0000000..275c1c9
--- /dev/null
+++ b/libc/private/bionic_atomic_arm.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_ARM_H
+#define BIONIC_ATOMIC_ARM_H
+
+#include <machine/cpu-features.h>
+
+/* Some of the harware instructions used below are not available in Thumb-1
+ * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
+ * problem, we're going to use the same technique than libatomics_ops,
+ * which is to temporarily switch to ARM, do the operation, then switch
+ * back to Thumb-1.
+ *
+ * This results in two 'bx' jumps, just like a normal function call, but
+ * everything is kept inlined, avoids loading or computing the function's
+ * address, and prevents a little I-cache trashing too.
+ *
+ * However, it is highly recommended to avoid compiling any C library source
+ * file that use these functions in Thumb-1 mode.
+ *
+ * Define three helper macros to implement this:
+ */
+#if defined(__thumb__) && !defined(__thumb2__)
+#  define  __ATOMIC_SWITCH_TO_ARM \
+            "adr r3, 5f\n" \
+            "bx  r3\n" \
+            ".align\n" \
+            ".arm\n" \
+        "5:\n"
+/* note: the leading \n below is intentional */
+#  define __ATOMIC_SWITCH_TO_THUMB \
+            "\n" \
+            "adr r3, 6f\n" \
+            "bx  r3\n" \
+            ".thumb" \
+        "6:\n"
+
+#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
+
+/* Warn the user that ARM mode should really be preferred! */
+#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
+
+#else
+#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
+#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
+#  define  __ATOMIC_CLOBBERS        /* nothing */
+#endif
+
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device. For the record, using a 'dmb'
+ * instruction on a Nexus One device can take up to 180 ns even if
+ * it is completely un-necessary on this device.
+ *
+ * NOTE: This is where the platform and NDK headers atomic headers are
+ *        going to diverge. With the NDK, we don't know if the generated
+ *        code is going to run on a single or multi-core device, so we
+ *        need to be cautious.
+ *
+ *        Fortunately, we can use the kernel helper function that is
+ *        mapped at address 0xffff0fa0 in all user process, and that
+ *        provides a device-specific barrier operation.
+ *
+ *        I.e. on single-core devices, the helper immediately returns,
+ *        on multi-core devices, it uses "dmb" or any other means to
+ *        perform a full-memory barrier.
+ *
+ * There are three cases to consider for the platform:
+ *
+ *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
+ *    - multi-core ARMv6         => use the coprocessor
+ *    - single core ARMv5TE/6/7  => do not use any hardware barrier
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+
+/* Sanity check, multi-core is only supported starting from ARMv6 */
+#  if __ARM_ARCH__ < 6
+#    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
+#  endif
+
+#  ifdef __ARM_HAVE_DMB
+/* For ARMv7-A, we can use the 'dmb' instruction directly */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+     * bother with __ATOMIC_SWITCH_TO_ARM */
+    __asm__ __volatile__ ( "dmb" : : : "memory" );
+}
+#  else /* !__ARM_HAVE_DMB */
+/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
+ * which requires the use of a general-purpose register, which is slightly
+ * less efficient.
+ */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __asm__ __volatile__ (
+        __SWITCH_TO_ARM
+        "mcr p15, 0, %0, c7, c10, 5"
+        __SWITCH_TO_THUMB
+        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
+}
+#  endif /* !__ARM_HAVE_DMB */
+#else /* !ANDROID_SMP */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif /* !ANDROID_SMP */
+
+/* Compare-and-swap, without any explicit barriers. Note that this functions
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ *
+ * There are two cases to consider:
+ *
+ *     - ARMv6+  => use LDREX/STREX instructions
+ *     - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
+ *
+ * LDREX/STREX are only available starting from ARMv6
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "mov %1, #0\n"
+            "teq %0, %4\n"
+#ifdef __thumb2__
+            "it eq\n"
+#endif
+            "strexeq %1, %5, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr), "Ir" (old_value), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#  else /* !__ARM_HAVE_LDREX_STREX */
+
+/* Use the handy kernel helper function mapped at 0xffff0fc0 */
+typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+
+__ATOMIC_INLINE__ int
+__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* Note: the kernel function returns 0 on success too */
+    return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    return __kernel_cmpxchg(old_value, new_value, ptr);
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Swap operation, without any explicit barriers.
+ * There are again two similar cases to consider:
+ *
+ *   ARMv6+ => use LDREX/STREX
+ *   < ARMv6 => use SWP instead.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "strex %1, %4, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+            : "r" (ptr), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else /* !__ARM_HAVE_LDREX_STREX */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    /* NOTE: SWP is available in Thumb-1 too */
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    return prev;
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Atomic increment - without any barriers
+ * This returns the old value
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "add %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev+1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+/* Atomic decrement - without any barriers
+ * This returns the old value.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "sub %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev-1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* SYS_ATOMICS_ARM_H */
diff --git a/libc/private/bionic_atomic_gcc_builtin.h b/libc/private/bionic_atomic_gcc_builtin.h
new file mode 100644
index 0000000..e7c5761
--- /dev/null
+++ b/libc/private/bionic_atomic_gcc_builtin.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
+#define BIONIC_ATOMIC_GCC_BUILTIN_H
+
+/* This header file is used by default if we don't have optimized atomic
+ * routines for a given platform. See bionic_atomic_arm.h and
+ * bionic_atomic_x86.h for examples.
+ */
+
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __sync_synchronize();
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* We must return 0 on success */
+    return __sync_bool_compare_and_swap(ptr, old_value, new_value) == 0;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    do {
+        prev = *ptr;
+        status = __sync_val_compare_and_swap(ptr, prev, new_value);
+    } while (status == 0);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, 1);
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, -1);
+}
+
+#endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */
diff --git a/libc/private/bionic_atomic_inline.h b/libc/private/bionic_atomic_inline.h
index 95766e1..821ad39 100644
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@@ -43,62 +43,21 @@
 extern "C" {
 #endif
 
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
+/* Define __ATOMIC_INLINE__ to control the inlining of all atomics
+ * functions declared here. For a slight performance boost, we want
+ * all of them to be always_inline
  */
+#define  __ATOMIC_INLINE__  static __inline__ __attribute__((always_inline))
 
 #ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#  include <bionic_atomic_arm.h>
+#elif defined(__i386__)
+#  include <bionic_atomic_x86.h>
 #else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
+#  include <bionic_atomic_gcc_builtin.h>
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
-#else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
-#endif
-
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
-#else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
-#endif
+#define ANDROID_MEMBAR_FULL  __bionic_memory_barrier
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/libc/private/bionic_atomic_x86.h b/libc/private/bionic_atomic_x86.h
new file mode 100644
index 0000000..aca0c4b
--- /dev/null
+++ b/libc/private/bionic_atomic_x86.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_X86_H
+#define BIONIC_ATOMIC_X86_H
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device.
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    __asm__ __volatile__ ( "mfence" : : : "memory" );
+}
+#else
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif
+
+/* Compare-and-swap, without any explicit barriers. Note that this function
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ */
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+
+/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    return new_value;
+}
+
+/* Atomic increment, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t *ptr)
+{
+    int increment = 1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+/* Atomic decrement, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t *ptr)
+{
+    int increment = -1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+#endif /* BIONIC_ATOMIC_X86_H */
diff --git a/libc/stdio/fileext.h b/libc/stdio/fileext.h
index 2d07043..b36a448 100644
--- a/libc/stdio/fileext.h
+++ b/libc/stdio/fileext.h
@@ -29,24 +29,41 @@
  * $Citrus$
  */
 
+#include <pthread.h>
+#include "wcio.h"
+
 /*
  * file extension
  */
 struct __sfileext {
 	struct	__sbuf _ub; /* ungetc buffer */
 	struct wchar_io_data _wcio;	/* wide char io status */
+	pthread_mutex_t _lock; /* file lock */
 };
 
+#define _FILEEXT_INITIALIZER  {{NULL,0},{0},PTHREAD_RECURSIVE_MUTEX_INITIALIZER}
+
 #define _EXT(fp) ((struct __sfileext *)((fp)->_ext._base))
 #define _UB(fp) _EXT(fp)->_ub
+#define _FLOCK(fp)  _EXT(fp)->_lock
 
 #define _FILEEXT_INIT(fp) \
 do { \
 	_UB(fp)._base = NULL; \
 	_UB(fp)._size = 0; \
 	WCIO_INIT(fp); \
+	_FLOCK_INIT(fp); \
 } while (0)
 
+/* Helper macros to avoid a function call when you know that fp is not NULL.
+ * Notice that we keep _FLOCK_INIT() fast by slightly breaking our pthread
+ * encapsulation.
+ */
+#define _FLOCK_INIT(fp)    _FLOCK(fp).value = __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE
+#define _FLOCK_LOCK(fp)    pthread_mutex_lock(&_FLOCK(fp))
+#define _FLOCK_TRYLOCK(fp) pthread_mutex_trylock(&_FLOCK(fp))
+#define _FLOCK_UNLOCK(fp)  pthread_mutex_unlock(&_FLOCK(fp))
+
 #define _FILEEXT_SETUP(f, fext) \
 do { \
 	(f)->_ext._base = (unsigned char *)(fext); \
diff --git a/libc/stdio/findfp.c b/libc/stdio/findfp.c
index a659c87..76ed5ee 100644
--- a/libc/stdio/findfp.c
+++ b/libc/stdio/findfp.c
@@ -58,7 +58,12 @@
 static struct glue *lastglue = &uglue;
 _THREAD_PRIVATE_MUTEX(__sfp_mutex);
 
-static struct __sfileext __sFext[3];
+static struct __sfileext __sFext[3] = {
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+};
+
 FILE __sF[3] = {
 	std(__SRD, STDIN_FILENO),		/* stdin */
 	std(__SWR, STDOUT_FILENO),		/* stdout */
diff --git a/libc/stdio/flockfile.c b/libc/stdio/flockfile.c
index e8c74c5..368fb15 100644
--- a/libc/stdio/flockfile.c
+++ b/libc/stdio/flockfile.c
@@ -31,122 +31,23 @@
  * we can't use the OpenBSD implementation which uses kernel-specific
  * APIs not available on Linux.
  *
- * Ideally, this would be trivially implemented by adding a
- * pthread_mutex_t field to struct __sFILE as defined in
- * <stdio.h>.
- *
- * However, since we don't want to bring pthread into the mix
- * as well as change the size of a public API/ABI structure,
- * we're going to store the data out-of-band.
- *
- * we use a hash-table to map FILE* pointers to recursive mutexes
- * fclose() will call __fremovelock() defined below to remove
- * a pointer from the table.
+ * Instead, we use a pthread_mutex_t within the FILE* internal state.
+ * See fileext.h for details.
  *
  * the behaviour, if fclose() is called while the corresponding
  * file is locked is totally undefined.
  */
 #include <stdio.h>
-#include <pthread.h>
 #include <string.h>
+#include <errno.h>
+#include "fileext.h"
 
-/* a node in the hash table */
-typedef struct FileLock {
-    struct FileLock*  next;
-    FILE*             file;
-    pthread_mutex_t   mutex;
-} FileLock;
-
-/* use a static hash table. We assume that we're not going to
- * lock a really large number of FILE* objects on an embedded
- * system.
- */
-#define  FILE_LOCK_BUCKETS  32
-
-typedef struct {
-    pthread_mutex_t   lock;
-    FileLock*         buckets[ FILE_LOCK_BUCKETS ];
-} LockTable;
-
-static LockTable*      _lockTable;
-static pthread_once_t  _lockTable_once = PTHREAD_ONCE_INIT;
-
-static void
-lock_table_init( void )
-{
-    _lockTable = malloc(sizeof(*_lockTable));
-    if (_lockTable != NULL) {
-        pthread_mutex_init(&_lockTable->lock, NULL);
-        memset(_lockTable->buckets, 0, sizeof(_lockTable->buckets));
-    }
-}
-
-static LockTable*
-lock_table_lock( void )
-{
-    pthread_once( &_lockTable_once, lock_table_init );
-    pthread_mutex_lock( &_lockTable->lock );
-    return _lockTable;
-}
-
-static void
-lock_table_unlock( LockTable*  t )
-{
-    pthread_mutex_unlock( &t->lock );
-}
-
-static FileLock**
-lock_table_lookup( LockTable*  t, FILE*  f )
-{
-    uint32_t    hash = (uint32_t)(void*)f;
-    FileLock**  pnode;
-
-    hash = (hash >> 2) ^ (hash << 17);
-    pnode = &t->buckets[hash % FILE_LOCK_BUCKETS];
-    for (;;) {
-        FileLock*  node = *pnode;
-        if (node == NULL || node->file == f)
-            break;
-        pnode = &node->next;
-    }
-    return pnode;
-}
 
 void
 flockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock == NULL) {
-            pthread_mutexattr_t  attr;
-
-            /* create a new node in the hash table */
-            lock = malloc(sizeof(*lock));
-            if (lock == NULL) {
-                lock_table_unlock(t);
-                return;
-            }
-            lock->next        = NULL;
-            lock->file        = fp;
-
-            pthread_mutexattr_init(&attr);
-            pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-            pthread_mutex_init( &lock->mutex, &attr );
-
-            *lookup           = lock;
-        }
-        lock_table_unlock(t);
-
-        /* we assume that another thread didn't destroy 'lock'
-        * by calling fclose() on the FILE*. This can happen if
-        * the client is *really* buggy, but we don't care about
-        * such code here.
-        */
-        pthread_mutex_lock(&lock->mutex);
+    if (fp != NULL) {
+        _FLOCK_LOCK(fp);
     }
 }
 
@@ -154,21 +55,13 @@
 int
 ftrylockfile(FILE *fp)
 {
-    int         ret = -1;
-    LockTable*  t   = lock_table_lock();
+    /* The specification for ftrylockfile() says it returns 0 on success,
+     * or non-zero on error. So return an errno code directly on error.
+     */
+    int  ret = EINVAL;
 
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        lock_table_unlock(t);
-
-        /* see above comment about why we assume that 'lock' can
-        * be accessed from here
-        */
-        if (lock != NULL && !pthread_mutex_trylock(&lock->mutex)) {
-            ret = 0;  /* signal success */
-        }
+    if (fp != NULL) {
+        ret = _FLOCK_TRYLOCK(fp);
     }
     return ret;
 }
@@ -176,35 +69,7 @@
 void
 funlockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL)
-            pthread_mutex_unlock(&lock->mutex);
-
-        lock_table_unlock(t);
-    }
-}
-
-
-/* called from fclose() to remove the file lock */
-__LIBC_HIDDEN__ void
-__fremovelock(FILE*  fp)
-{
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL) {
-            *lookup   = lock->next;
-            lock->file = NULL;
-        }
-        lock_table_unlock(t);
-        free(lock);
+    if (fp != NULL) {
+        _FLOCK_UNLOCK(fp);
     }
 }
diff --git a/libstdc++/src/one_time_construction.cpp b/libstdc++/src/one_time_construction.cpp
index 2a44c79..f3d7138 100644
--- a/libstdc++/src/one_time_construction.cpp
+++ b/libstdc++/src/one_time_construction.cpp
@@ -20,11 +20,11 @@
     // 6 untouched, wait and return 0
     // 1 untouched, return 0
 retry:
-    if (__atomic_cmpxchg(0, 0x2, gv) == 0) {
+    if (__bionic_cmpxchg(0, 0x2, gv) == 0) {
         ANDROID_MEMBAR_FULL();
         return 1;
     }
-    __atomic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
+    __bionic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
     __futex_wait(gv, 0x6, NULL);
 
     if(*gv != 1) // __cxa_guard_abort was called, let every thread try since there is no return code for this condition
@@ -39,7 +39,7 @@
     // 2 -> 1
     // 6 -> 1, and wake
     ANDROID_MEMBAR_FULL();
-    if (__atomic_cmpxchg(0x2, 0x1, gv) == 0) {
+    if (__bionic_cmpxchg(0x2, 0x1, gv) == 0) {
         return;
     }
 
diff --git a/linker/Android.mk b/linker/Android.mk
index 7793f8d..74c7453 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -10,27 +10,9 @@
 	dlfcn.c \
 	debugger.c
 
-ifeq ($(TARGET_ARCH),sh)
-# SH-4A series virtual address range from 0x00000000 to 0x7FFFFFFF.
-LINKER_TEXT_BASE := 0x70000100
-else
-# This is aligned to 4K page boundary so that both GNU ld and gold work.  Gold
-# actually produces a correct binary with starting address 0xB0000100 but the
-# extra objcopy step to rename symbols causes the resulting binary to be misaligned
-# and unloadable.  Increasing the alignment adds an extra 3840 bytes in padding
-# but switching to gold saves about 1M of space.
-LINKER_TEXT_BASE := 0xB0001000
-endif
+LOCAL_LDFLAGS := -shared
 
-# The maximum size set aside for the linker, from
-# LINKER_TEXT_BASE rounded down to a megabyte.
-LINKER_AREA_SIZE := 0x01000000
-
-LOCAL_LDFLAGS := -Wl,-Ttext,$(LINKER_TEXT_BASE)
-
-LOCAL_CFLAGS += -DPRELINK
-LOCAL_CFLAGS += -DLINKER_TEXT_BASE=$(LINKER_TEXT_BASE)
-LOCAL_CFLAGS += -DLINKER_AREA_SIZE=$(LINKER_AREA_SIZE)
+LOCAL_CFLAGS += -fno-stack-protector
 
 # Set LINKER_DEBUG to either 1 or 0
 #
diff --git a/linker/linker.c b/linker/linker.c
index 504fb93..c560507 100644
--- a/linker/linker.c
+++ b/linker/linker.c
@@ -313,15 +313,6 @@
     freelist = si;
 }
 
-#ifndef LINKER_TEXT_BASE
-#error "linker's makefile must define LINKER_TEXT_BASE"
-#endif
-#ifndef LINKER_AREA_SIZE
-#error "linker's makefile must define LINKER_AREA_SIZE"
-#endif
-#define LINKER_BASE ((LINKER_TEXT_BASE) & 0xfff00000)
-#define LINKER_TOP  (LINKER_BASE + (LINKER_AREA_SIZE))
-
 const char *addr_to_name(unsigned addr)
 {
     soinfo *si;
@@ -332,10 +323,6 @@
         }
     }
 
-    if((addr >= LINKER_BASE) && (addr < LINKER_TOP)){
-        return "linker";
-    }
-
     return "";
 }
 
@@ -354,12 +341,10 @@
     soinfo *si;
     unsigned addr = (unsigned)pc;
 
-    if ((addr < LINKER_BASE) || (addr >= LINKER_TOP)) {
-        for (si = solist; si != 0; si = si->next){
-            if ((addr >= si->base) && (addr < (si->base + si->size))) {
-                *pcount = si->ARM_exidx_count;
-                return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
-            }
+    for (si = solist; si != 0; si = si->next){
+        if ((addr >= si->base) && (addr < (si->base + si->size))) {
+            *pcount = si->ARM_exidx_count;
+            return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
         }
     }
    *pcount = 0;
@@ -420,6 +405,33 @@
     return NULL;
 }
 
+/*
+ * Essentially the same method as _elf_lookup() above, but only
+ * searches for LOCAL symbols
+ */
+static Elf32_Sym *_elf_lookup_local(soinfo *si, unsigned hash, const char *name)
+{
+    Elf32_Sym *symtab = si->symtab;
+    const char *strtab = si->strtab;
+    unsigned n = hash % si->nbucket;;
+
+    TRACE_TYPE(LOOKUP, "%5d LOCAL SEARCH %s in %s@0x%08x %08x %d\n", pid,
+               name, si->name, si->base, hash, hash % si->nbucket);
+    for(n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]){
+        Elf32_Sym *s = symtab + n;
+        if (strcmp(strtab + s->st_name, name)) continue;
+        if (ELF32_ST_BIND(s->st_info) != STB_LOCAL) continue;
+        /* no section == undefined */
+        if(s->st_shndx == 0) continue;
+
+        TRACE_TYPE(LOOKUP, "%5d FOUND LOCAL %s in %s (%08x) %d\n", pid,
+                   name, si->name, s->st_value, s->st_size);
+        return s;
+    }
+
+    return NULL;
+}
+
 static unsigned elfhash(const char *_name)
 {
     const unsigned char *name = (const unsigned char *) _name;
@@ -443,7 +455,17 @@
     soinfo *lsi = si;
     int i;
 
-    /* Look for symbols in the local scope first (the object who is
+    /* If we are trying to find a symbol for the linker itself, look
+     * for LOCAL symbols first. Avoid using LOCAL symbols for other
+     * shared libraries until we have a better understanding of what
+     * might break by doing so. */
+    if (si->flags & FLAG_LINKER) {
+        s = _elf_lookup_local(si, elf_hash, name);
+        if(s != NULL)
+            goto done;
+    }
+
+    /* Look for symbols in the local scope (the object who is
      * searching). This happens with C++ templates on i386 for some
      * reason.
      *
@@ -452,6 +474,7 @@
      * dynamic linking.  Some systems return the first definition found
      * and some the first non-weak definition.   This is system dependent.
      * Here we return the first definition found for simplicity.  */
+
     s = _elf_lookup(si, elf_hash, name);
     if(s != NULL)
         goto done;
@@ -1719,10 +1742,10 @@
     DEBUG("%5d si->base = 0x%08x si->flags = 0x%08x\n", pid,
           si->base, si->flags);
 
-    if (si->flags & FLAG_EXE) {
+    if (si->flags & (FLAG_EXE | FLAG_LINKER)) {
         /* Locate the needed program segments (DYNAMIC/ARM_EXIDX) for
-         * linkage info if this is the executable. If this was a
-         * dynamic lib, that would have been done at load time.
+         * linkage info if this is the executable or the linker itself. 
+         * If this was a dynamic lib, that would have been done at load time.
          *
          * TODO: It's unfortunate that small pieces of this are
          * repeated from the load_library routine. Refactor this just
@@ -2084,7 +2107,12 @@
 
 static void * __tls_area[ANDROID_TLS_SLOTS];
 
-unsigned __linker_init(unsigned **elfdata)
+/*
+ * This code is called after the linker has linked itself and
+ * fixed it's own GOT. It is safe to make references to externs
+ * and other non-local data at this point.
+ */
+static unsigned __linker_init_post_relocation(unsigned **elfdata)
 {
     static soinfo linker_soinfo;
 
@@ -2192,7 +2220,18 @@
         vecs += 2;
     }
 
-    si->base = (Elf32_Addr) si->phdr - si->phdr->p_vaddr;
+    /* Compute the value of si->base. We can't rely on the fact that
+     * the first entry is the PHDR because this will not be true
+     * for certain executables (e.g. some in the NDK unit test suite)
+     */
+    int nn;
+    si->base = 0;
+    for ( nn = 0; nn < si->phnum; nn++ ) {
+        if (si->phdr[nn].p_type == PT_PHDR) {
+            si->base = (Elf32_Addr) si->phdr - si->phdr[nn].p_vaddr;
+            break;
+        }
+    }
     si->dynamic = (unsigned *)-1;
     si->wrprotect_start = 0xffffffff;
     si->wrprotect_end = 0;
@@ -2261,3 +2300,69 @@
           si->entry);
     return si->entry;
 }
+
+/*
+ * Find the value of AT_BASE passed to us by the kernel. This is the load
+ * location of the linker.
+ */
+static unsigned find_linker_base(unsigned **elfdata) {
+    int argc = (int) *elfdata;
+    char **argv = (char**) (elfdata + 1);
+    unsigned *vecs = (unsigned*) (argv + argc + 1);
+    while (vecs[0] != 0) {
+        vecs++;
+    }
+
+    /* The end of the environment block is marked by two NULL pointers */
+    vecs++;
+
+    while(vecs[0]) {
+        if (vecs[0] == AT_BASE) {
+            return vecs[1];
+        }
+        vecs += 2;
+    }
+
+    return 0; // should never happen
+}
+
+/*
+ * This is the entry point for the linker, called from begin.S. This
+ * method is responsible for fixing the linker's own relocations, and
+ * then calling __linker_init_post_relocation().
+ *
+ * Because this method is called before the linker has fixed it's own
+ * relocations, any attempt to reference an extern variable, extern
+ * function, or other GOT reference will generate a segfault.
+ */
+unsigned __linker_init(unsigned **elfdata) {
+    unsigned linker_addr = find_linker_base(elfdata);
+    Elf32_Ehdr *elf_hdr = (Elf32_Ehdr *) linker_addr;
+    Elf32_Phdr *phdr =
+        (Elf32_Phdr *)((unsigned char *) linker_addr + elf_hdr->e_phoff);
+
+    soinfo linker_so;
+    memset(&linker_so, 0, sizeof(soinfo));
+
+    linker_so.base = linker_addr;
+    linker_so.dynamic = (unsigned *) -1;
+    linker_so.phdr = phdr;
+    linker_so.phnum = elf_hdr->e_phnum;
+    linker_so.flags |= FLAG_LINKER;
+    linker_so.wrprotect_start = 0xffffffff;
+    linker_so.wrprotect_end = 0;
+
+    if (link_image(&linker_so, 0)) {
+        // It would be nice to print an error message, but if the linker
+        // can't link itself, there's no guarantee that we'll be able to
+        // call write() (because it involves a GOT reference).
+        //
+        // This situation should never occur unless the linker itself
+        // is corrupt.
+        exit(-1);
+    }
+
+    // We have successfully fixed our own relocations. It's safe to run
+    // the main part of the linker now.
+    return __linker_init_post_relocation(elfdata);
+}
diff --git a/linker/linker.h b/linker/linker.h
index 7b1ba51..aa1e5e7 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -83,6 +83,7 @@
 #define FLAG_LINKED     0x00000001
 #define FLAG_ERROR      0x00000002
 #define FLAG_EXE        0x00000004 // The main executable
+#define FLAG_LINKER     0x00000010 // The linker itself
 
 #define SOINFO_NAME_LEN 128