x86 ptrace: unify syscall tracing

This unifies and cleans up the syscall tracing code on i386 and x86_64.

Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti.  The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.

The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks.  It behaves just like 32-bit.

Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.

Signed-off-by: Roland McGrath <roland@redhat.com>
diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h
index f13e62e..2bc162e 100644
--- a/include/asm-x86/calling.h
+++ b/include/asm-x86/calling.h
@@ -104,7 +104,7 @@
 	.endif
 	.endm
 
-	.macro LOAD_ARGS offset
+	.macro LOAD_ARGS offset, skiprax=0
 	movq \offset(%rsp),    %r11
 	movq \offset+8(%rsp),  %r10
 	movq \offset+16(%rsp), %r9
@@ -113,7 +113,10 @@
 	movq \offset+48(%rsp), %rdx
 	movq \offset+56(%rsp), %rsi
 	movq \offset+64(%rsp), %rdi
+	.if \skiprax
+	.else
 	movq \offset+72(%rsp), %rax
+	.endif
 	.endm
 
 #define REST_SKIP	6*8
@@ -165,4 +168,3 @@
 	.macro icebp
 	.byte 0xf1
 	.endm
-