Remove pushes from memsets (krait/cortex-a9).

On the path that only uses r0 in both the krait and cortex-a9
memset, remove the push and use r3 instead.

In addition, for cortex-a9, remove the artificial function since
it's not needed since dwarf unwinding is now supported on arm.

Change-Id: Icd39cfb6b8350f44368e022063cd97a6b60d46da
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index a4fbe17..ae05965 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -69,10 +69,7 @@
 
 /* memset() returns its first argument.  */
 ENTRY(memset)
-        stmfd       sp!, {r0}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset r0, 0
-
+        mov         r3, r0
         vdup.8      q0, r1
 
         /* make sure we have at least 32 bytes to write */
@@ -82,7 +79,7 @@
 
 1:      /* The main loop writes 32 bytes at a time */
         subs        r2, r2, #32
-        vst1.8      {d0 - d3}, [r0]!
+        vst1.8      {d0 - d3}, [r3]!
         bhs         1b
 
 2:      /* less than 32 left */
@@ -91,18 +88,17 @@
         beq         3f
 
         // writes 16 bytes, 128-bits aligned
-        vst1.8      {d0, d1}, [r0]!
+        vst1.8      {d0, d1}, [r3]!
 3:      /* write up to 15-bytes (count in r2) */
         movs        ip, r2, lsl #29
         bcc         1f
-        vst1.8      {d0}, [r0]!
+        vst1.8      {d0}, [r3]!
 1:      bge         2f
-        vst1.32     {d0[0]}, [r0]!
+        vst1.32     {d0[0]}, [r3]!
 2:      movs        ip, r2, lsl #31
-        strbmi      r1, [r0], #1
-        strbcs      r1, [r0], #1
-        strbcs      r1, [r0], #1
-        ldmfd       sp!, {r0}
+        strbmi      r1, [r3], #1
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
         bx          lr
 END(memset)