Add stack unwinding directives to assembly leaf functions.

So that the real culprit of native crashes can surface in the stack trace.
diff --git a/libc/arch-arm/bionic/memcmp16.S b/libc/arch-arm/bionic/memcmp16.S
index 38d8b62..f398588 100644
--- a/libc/arch-arm/bionic/memcmp16.S
+++ b/libc/arch-arm/bionic/memcmp16.S
@@ -44,6 +44,7 @@
  */
 
 __memcmp16:
+        .fnstart
         PLD         (r0, #0)
         PLD         (r1, #0)
 
@@ -79,6 +80,7 @@
         bx          lr
 
 
+        .save {r4, lr}
         /* save registers */
 0:      stmfd       sp!, {r4, lr}
         
@@ -93,6 +95,7 @@
         /* restore registers and return */
         ldmnefd     sp!, {r4, lr}
         bxne        lr
+        .fnend