Add stack unwinding directives to assembly leaf functions.

So that the real culprit of native crashes can surface in the stack trace.
diff --git a/libc/arch-arm/bionic/atomics_arm.S b/libc/arch-arm/bionic/atomics_arm.S
index b2da09f..f8b23e6 100644
--- a/libc/arch-arm/bionic/atomics_arm.S
+++ b/libc/arch-arm/bionic/atomics_arm.S
@@ -41,6 +41,8 @@
    .equ     kernel_cmpxchg, 0xFFFF0FC0
    .equ     kernel_atomic_base, 0xFFFF0FFF
 __atomic_dec:
+    .fnstart
+    .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r2, r0
 1: @ atomic_dec
@@ -53,8 +55,11 @@
     add     r0, r1, #1
     ldmia   sp!, {r4, lr}
     bx      lr
+    .fnend
 
 __atomic_inc:
+    .fnstart
+    .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r2, r0
 1: @ atomic_inc
@@ -67,9 +72,12 @@
     sub     r0, r1, #1
     ldmia   sp!, {r4, lr}
     bx      lr
+    .fnend
 
 /* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
 __atomic_cmpxchg:
+    .fnstart
+    .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r4, r0          /* r4 = save oldvalue */
 1: @ atomic_cmpxchg
@@ -84,6 +92,7 @@
 2: @ atomic_cmpxchg
     ldmia   sp!, {r4, lr}
     bx      lr
+    .fnend
 #else
 #define KUSER_CMPXCHG 0xffffffc0