setjmp/longjmp: avoid invalid values in the stack pointer.

arm64 was already being careful, but x86/x86-64 and 32-bit ARM could be
caught by a signal in a state where the stack pointer was mangled.

For 32-bit ARM I've taken care with the link register too, to avoid
potential issues with unwinding.

Bug: http://b/152210274
Test: treehugger
Change-Id: I1ce285b017a633c732dbe04743368f4cae27af85
diff --git a/libc/arch-arm/bionic/setjmp.S b/libc/arch-arm/bionic/setjmp.S
index 5fbcaf3..2579143 100644
--- a/libc/arch-arm/bionic/setjmp.S
+++ b/libc/arch-arm/bionic/setjmp.S
@@ -87,28 +87,6 @@
   b sigsetjmp
 END(_setjmp)
 
-#define MANGLE_REGISTERS 1
-#define USE_CHECKSUM 1
-
-.macro m_mangle_registers reg
-#if MANGLE_REGISTERS
-  eor r4, r4, \reg
-  eor r5, r5, \reg
-  eor r6, r6, \reg
-  eor r7, r7, \reg
-  eor r8, r8, \reg
-  eor r9, r9, \reg
-  eor r10, r10, \reg
-  eor r11, r11, \reg
-  eor r13, r13, \reg
-  eor r14, r14, \reg
-#endif
-.endm
-
-.macro m_unmangle_registers reg
-  m_mangle_registers \reg
-.endm
-
 .macro m_calculate_checksum dst, src, scratch
   mov \dst, #0
   .irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
@@ -167,12 +145,30 @@
 
   // Save core registers.
   add r1, r0, #(_JB_CORE_BASE * 4)
-  m_mangle_registers r2
-
-  // ARM deprecates using sp in the register list for stmia.
-  stmia r1, {r4-r11, lr}
-  str sp, [r1, #(9 * 4)]
-  m_unmangle_registers r2
+  // Mangle the easy registers in-place, write them out in one go, and unmangle
+  // them again.
+  eor r4, r4, r2
+  eor r5, r5, r2
+  eor r6, r6, r2
+  eor r7, r7, r2
+  eor r8, r8, r2
+  eor r9, r9, r2
+  eor r10, r10, r2
+  eor r11, r11, r2
+  stmia r1, {r4-r11}
+  eor r4, r4, r2
+  eor r5, r5, r2
+  eor r6, r6, r2
+  eor r7, r7, r2
+  eor r8, r8, r2
+  eor r9, r9, r2
+  eor r10, r10, r2
+  eor r11, r11, r2
+  // We need to avoid invalid values in sp or lr (http://b/152210274).
+  eor r3, lr, r2
+  str r3, [r1, #(8 * 4)]
+  eor r3, sp, r2
+  str r3, [r1, #(9 * 4)]
 
   // Save floating-point registers.
   add r1, r0, #(_JB_FLOAT_BASE * 4)
@@ -182,11 +178,9 @@
   fmrx r1, fpscr
   str r1, [r0, #(_JB_FLOAT_STATE * 4)]
 
-#if USE_CHECKSUM
   // Calculate the checksum.
   m_calculate_checksum r12, r0, r2
   str r12, [r0, #(_JB_CHECKSUM * 4)]
-#endif
 
   mov r0, #0
   bx lr
@@ -201,14 +195,11 @@
   .cfi_rel_offset r1, 4
   .cfi_rel_offset lr, 8
 
-#if USE_CHECKSUM
   // Check the checksum before doing anything.
   m_calculate_checksum r12, r0, r3
   ldr r2, [r0, #(_JB_CHECKSUM * 4)]
-
   teq r2, r12
   bne __bionic_setjmp_checksum_mismatch
-#endif
 
   // Fetch the signal flag.
   ldr r1, [r0, #(_JB_SIGFLAG * 4)]
@@ -245,10 +236,21 @@
   // Restore core registers.
   add r2, r0, #(_JB_CORE_BASE * 4)
 
-  // ARM deprecates using sp in the register list for ldmia.
-  ldmia r2, {r4-r11, lr}
-  ldr sp, [r2, #(9 * 4)]
-  m_unmangle_registers r3
+  // Do all the easy registers in one go.
+  ldmia r2, {r4-r11}
+  eor r4, r4, r3
+  eor r5, r5, r3
+  eor r6, r6, r3
+  eor r7, r7, r3
+  eor r8, r8, r3
+  eor r9, r9, r3
+  eor r10, r10, r3
+  eor r11, r11, r3
+  // We need to avoid invalid values in sp or lr (http://b/152210274).
+  ldr r0, [r2, #(8 * 4)]
+  eor lr, r0, r3
+  ldr r0, [r2, #(9 * 4)]
+  eor sp, r0, r3
 
   // Save the return value/address and check the setjmp cookie.
   stmfd sp!, {r1, lr}