blob: 08aa6b10933cd74232a3f406f8e6f706a294c245 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
Andi Kleen2e91a172006-09-26 10:52:29 +020023 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 */
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/linkage.h>
41#include <asm/segment.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
Sam Ravnborge2d5df92005-09-09 21:28:48 +020046#include <asm/asm-offsets.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
Andi Kleen5f8efbb2006-01-16 01:56:39 +010051#include <asm/page.h>
Ingo Molnar2601e642006-07-03 00:24:45 -070052#include <asm/irqflags.h>
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010053#include <asm/paravirt.h>
Abhishek Sagar395a59d2008-06-21 23:47:27 +053054#include <asm/ftrace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
Roland McGrath86a1c342008-06-23 15:37:04 -070056/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h>
58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
59#define __AUDIT_ARCH_64BIT 0x80000000
60#define __AUDIT_ARCH_LE 0x40000000
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 .code64
63
Steven Rostedt606576c2008-10-06 19:06:12 -040064#ifdef CONFIG_FUNCTION_TRACER
Steven Rostedtd61f82d2008-05-12 21:20:43 +020065#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount)
Steven Rostedtd61f82d2008-05-12 21:20:43 +020067 retq
68END(mcount)
69
70ENTRY(ftrace_caller)
Steven Rostedt60a7ecf2008-11-05 16:05:44 -050071 cmpl $0, function_trace_stop
72 jne ftrace_stub
Steven Rostedtd61f82d2008-05-12 21:20:43 +020073
74 /* taken from glibc */
75 subq $0x38, %rsp
76 movq %rax, (%rsp)
77 movq %rcx, 8(%rsp)
78 movq %rdx, 16(%rsp)
79 movq %rsi, 24(%rsp)
80 movq %rdi, 32(%rsp)
81 movq %r8, 40(%rsp)
82 movq %r9, 48(%rsp)
83
84 movq 0x38(%rsp), %rdi
85 movq 8(%rbp), %rsi
Abhishek Sagar395a59d2008-06-21 23:47:27 +053086 subq $MCOUNT_INSN_SIZE, %rdi
Steven Rostedtd61f82d2008-05-12 21:20:43 +020087
88.globl ftrace_call
89ftrace_call:
90 call ftrace_stub
91
92 movq 48(%rsp), %r9
93 movq 40(%rsp), %r8
94 movq 32(%rsp), %rdi
95 movq 24(%rsp), %rsi
96 movq 16(%rsp), %rdx
97 movq 8(%rsp), %rcx
98 movq (%rsp), %rax
99 addq $0x38, %rsp
100
101.globl ftrace_stub
102ftrace_stub:
103 retq
104END(ftrace_caller)
105
106#else /* ! CONFIG_DYNAMIC_FTRACE */
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200107ENTRY(mcount)
Steven Rostedt60a7ecf2008-11-05 16:05:44 -0500108 cmpl $0, function_trace_stop
109 jne ftrace_stub
110
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200111 cmpq $ftrace_stub, ftrace_trace_function
112 jnz trace
113.globl ftrace_stub
114ftrace_stub:
115 retq
116
117trace:
118 /* taken from glibc */
119 subq $0x38, %rsp
120 movq %rax, (%rsp)
121 movq %rcx, 8(%rsp)
122 movq %rdx, 16(%rsp)
123 movq %rsi, 24(%rsp)
124 movq %rdi, 32(%rsp)
125 movq %r8, 40(%rsp)
126 movq %r9, 48(%rsp)
127
128 movq 0x38(%rsp), %rdi
129 movq 8(%rbp), %rsi
Abhishek Sagar395a59d2008-06-21 23:47:27 +0530130 subq $MCOUNT_INSN_SIZE, %rdi
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200131
132 call *ftrace_trace_function
133
134 movq 48(%rsp), %r9
135 movq 40(%rsp), %r8
136 movq 32(%rsp), %rdi
137 movq 24(%rsp), %rsi
138 movq 16(%rsp), %rdx
139 movq 8(%rsp), %rcx
140 movq (%rsp), %rax
141 addq $0x38, %rsp
142
143 jmp ftrace_stub
144END(mcount)
Steven Rostedtd61f82d2008-05-12 21:20:43 +0200145#endif /* CONFIG_DYNAMIC_FTRACE */
Steven Rostedt606576c2008-10-06 19:06:12 -0400146#endif /* CONFIG_FUNCTION_TRACER */
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200147
Andi Kleendc37db42005-04-16 15:25:05 -0700148#ifndef CONFIG_PREEMPT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149#define retint_kernel retint_restore_args
150#endif
Ingo Molnar2601e642006-07-03 00:24:45 -0700151
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100152#ifdef CONFIG_PARAVIRT
Jeremy Fitzhardinge2be29982008-06-25 00:19:28 -0400153ENTRY(native_usergs_sysret64)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100154 swapgs
155 sysretq
156#endif /* CONFIG_PARAVIRT */
157
Ingo Molnar2601e642006-07-03 00:24:45 -0700158
159.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
160#ifdef CONFIG_TRACE_IRQFLAGS
161 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
162 jnc 1f
163 TRACE_IRQS_ON
1641:
165#endif
166.endm
167
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168/*
169 * C code is not supposed to know about undefined top of stack. Every time
170 * a C function with an pt_regs argument is called from the SYSCALL based
171 * fast path FIXUP_TOP_OF_STACK is needed.
172 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
173 * manipulation.
174 */
175
176 /* %rsp:at FRAMEEND */
177 .macro FIXUP_TOP_OF_STACK tmp
178 movq %gs:pda_oldrsp,\tmp
179 movq \tmp,RSP(%rsp)
180 movq $__USER_DS,SS(%rsp)
181 movq $__USER_CS,CS(%rsp)
182 movq $-1,RCX(%rsp)
183 movq R11(%rsp),\tmp /* get eflags */
184 movq \tmp,EFLAGS(%rsp)
185 .endm
186
187 .macro RESTORE_TOP_OF_STACK tmp,offset=0
188 movq RSP-\offset(%rsp),\tmp
189 movq \tmp,%gs:pda_oldrsp
190 movq EFLAGS-\offset(%rsp),\tmp
191 movq \tmp,R11-\offset(%rsp)
192 .endm
193
194 .macro FAKE_STACK_FRAME child_rip
195 /* push in order ss, rsp, eflags, cs, rip */
Andi Kleen3829ee62005-07-28 21:15:48 -0700196 xorl %eax, %eax
Jeremy Fitzhardingee04e0a62008-06-25 00:19:25 -0400197 pushq $__KERNEL_DS /* ss */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200199 /*CFI_REL_OFFSET ss,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 pushq %rax /* rsp */
201 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200202 CFI_REL_OFFSET rsp,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 pushq $(1<<9) /* eflags - interrupts on */
204 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200205 /*CFI_REL_OFFSET rflags,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 pushq $__KERNEL_CS /* cs */
207 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200208 /*CFI_REL_OFFSET cs,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 pushq \child_rip /* rip */
210 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200211 CFI_REL_OFFSET rip,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 pushq %rax /* orig rax */
213 CFI_ADJUST_CFA_OFFSET 8
214 .endm
215
216 .macro UNFAKE_STACK_FRAME
217 addq $8*6, %rsp
218 CFI_ADJUST_CFA_OFFSET -(6*8)
219 .endm
220
Jan Beulich7effaa82005-09-12 18:49:24 +0200221 .macro CFI_DEFAULT_STACK start=1
222 .if \start
223 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200224 CFI_SIGNAL_FRAME
Jan Beulich7effaa82005-09-12 18:49:24 +0200225 CFI_DEF_CFA rsp,SS+8
226 .else
227 CFI_DEF_CFA_OFFSET SS+8
228 .endif
229 CFI_REL_OFFSET r15,R15
230 CFI_REL_OFFSET r14,R14
231 CFI_REL_OFFSET r13,R13
232 CFI_REL_OFFSET r12,R12
233 CFI_REL_OFFSET rbp,RBP
234 CFI_REL_OFFSET rbx,RBX
235 CFI_REL_OFFSET r11,R11
236 CFI_REL_OFFSET r10,R10
237 CFI_REL_OFFSET r9,R9
238 CFI_REL_OFFSET r8,R8
239 CFI_REL_OFFSET rax,RAX
240 CFI_REL_OFFSET rcx,RCX
241 CFI_REL_OFFSET rdx,RDX
242 CFI_REL_OFFSET rsi,RSI
243 CFI_REL_OFFSET rdi,RDI
244 CFI_REL_OFFSET rip,RIP
245 /*CFI_REL_OFFSET cs,CS*/
246 /*CFI_REL_OFFSET rflags,EFLAGS*/
247 CFI_REL_OFFSET rsp,RSP
248 /*CFI_REL_OFFSET ss,SS*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 .endm
250/*
251 * A newly forked process directly context switches into this.
252 */
253/* rdi: prev */
254ENTRY(ret_from_fork)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 CFI_DEFAULT_STACK
Andi Kleen658fdbe2006-09-26 10:52:41 +0200256 push kernel_eflags(%rip)
Alexander van Heukelume0a5a5d2008-07-22 18:14:16 +0200257 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen658fdbe2006-09-26 10:52:41 +0200258 popf # reset kernel eflags
Alexander van Heukelume0a5a5d2008-07-22 18:14:16 +0200259 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 call schedule_tail
261 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300262 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 jnz rff_trace
264rff_action:
265 RESTORE_REST
266 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
267 je int_ret_from_sys_call
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300268 testl $_TIF_IA32,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 jnz int_ret_from_sys_call
270 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
271 jmp ret_from_sys_call
272rff_trace:
273 movq %rsp,%rdi
274 call syscall_trace_leave
275 GET_THREAD_INFO(%rcx)
276 jmp rff_action
277 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200278END(ret_from_fork)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
280/*
281 * System call entry. Upto 6 arguments in registers are supported.
282 *
283 * SYSCALL does not save anything on the stack and does not change the
284 * stack pointer.
285 */
286
287/*
288 * Register setup:
289 * rax system call number
290 * rdi arg0
291 * rcx return address for syscall/sysret, C arg3
292 * rsi arg1
293 * rdx arg2
294 * r10 arg3 (--> moved to rcx for C)
295 * r8 arg4
296 * r9 arg5
297 * r11 eflags for syscall/sysret, temporary for C
298 * r12-r15,rbp,rbx saved by C code, not touched.
299 *
300 * Interrupts are off on entry.
301 * Only called from user space.
302 *
303 * XXX if we had a free scratch register we could save the RSP into the stack frame
304 * and report it properly in ps. Unfortunately we haven't.
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200305 *
306 * When user can change the frames always force IRET. That is because
307 * it deals with uncanonical addresses better. SYSRET has trouble
308 * with them due to bugs in both AMD and Intel CPUs.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 */
310
311ENTRY(system_call)
Jan Beulich7effaa82005-09-12 18:49:24 +0200312 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200313 CFI_SIGNAL_FRAME
Jan Beulichdffead42006-06-26 13:57:38 +0200314 CFI_DEF_CFA rsp,PDA_STACKOFFSET
Jan Beulich7effaa82005-09-12 18:49:24 +0200315 CFI_REGISTER rip,rcx
316 /*CFI_REGISTER rflags,r11*/
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100317 SWAPGS_UNSAFE_STACK
318 /*
319 * A hypervisor implementation might want to use a label
320 * after the swapgs, so that it can do the swapgs
321 * for the guest and jump here on syscall.
322 */
323ENTRY(system_call_after_swapgs)
324
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 movq %rsp,%gs:pda_oldrsp
326 movq %gs:pda_kernelstack,%rsp
Ingo Molnar2601e642006-07-03 00:24:45 -0700327 /*
328 * No need to follow this irqs off/on section - it's straight
329 * and short:
330 */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100331 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 SAVE_ARGS 8,1
333 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
Jan Beulich7effaa82005-09-12 18:49:24 +0200334 movq %rcx,RIP-ARGOFFSET(%rsp)
335 CFI_REL_OFFSET rip,RIP-ARGOFFSET
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 GET_THREAD_INFO(%rcx)
Roland McGrathd4d67152008-07-09 02:38:07 -0700337 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 jnz tracesys
Roland McGrath86a1c342008-06-23 15:37:04 -0700339system_call_fastpath:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 cmpq $__NR_syscall_max,%rax
341 ja badsys
342 movq %r10,%rcx
343 call *sys_call_table(,%rax,8) # XXX: rip relative
344 movq %rax,RAX-ARGOFFSET(%rsp)
345/*
346 * Syscall return path ending with SYSRET (fast path)
347 * Has incomplete stack frame and undefined top of stack.
348 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349ret_from_sys_call:
Andi Kleen11b854b2005-04-16 15:25:02 -0700350 movl $_TIF_ALLWORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 /* edi: flagmask */
352sysret_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200353 LOCKDEP_SYS_EXIT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 GET_THREAD_INFO(%rcx)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100355 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700356 TRACE_IRQS_OFF
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300357 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 andl %edi,%edx
359 jnz sysret_careful
Jan Beulichbcddc012006-12-07 02:14:02 +0100360 CFI_REMEMBER_STATE
Ingo Molnar2601e642006-07-03 00:24:45 -0700361 /*
362 * sysretq will re-enable interrupts:
363 */
364 TRACE_IRQS_ON
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 movq RIP-ARGOFFSET(%rsp),%rcx
Jan Beulich7effaa82005-09-12 18:49:24 +0200366 CFI_REGISTER rip,rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 RESTORE_ARGS 0,-ARG_SKIP,1
Jan Beulich7effaa82005-09-12 18:49:24 +0200368 /*CFI_REGISTER rflags,r11*/
Jeremy Fitzhardingec7245da2008-06-25 00:19:27 -0400369 movq %gs:pda_oldrsp, %rsp
Jeremy Fitzhardinge2be29982008-06-25 00:19:28 -0400370 USERGS_SYSRET64
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
Jan Beulichbcddc012006-12-07 02:14:02 +0100372 CFI_RESTORE_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 /* Handle reschedules */
374 /* edx: work, edi: workmask */
375sysret_careful:
376 bt $TIF_NEED_RESCHED,%edx
377 jnc sysret_signal
Ingo Molnar2601e642006-07-03 00:24:45 -0700378 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100379 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200381 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 call schedule
383 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200384 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 jmp sysret_check
386
387 /* Handle a signal */
388sysret_signal:
Ingo Molnar2601e642006-07-03 00:24:45 -0700389 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100390 ENABLE_INTERRUPTS(CLBR_NONE)
Roland McGrath86a1c342008-06-23 15:37:04 -0700391#ifdef CONFIG_AUDITSYSCALL
392 bt $TIF_SYSCALL_AUDIT,%edx
393 jc sysret_audit
394#endif
Andi Kleen10ffdbb2005-05-16 21:53:19 -0700395 /* edx: work flags (arg3) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 leaq do_notify_resume(%rip),%rax
397 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
398 xorl %esi,%esi # oldset -> arg2
399 call ptregscall_common
Roland McGrath15e8f342008-06-23 20:41:12 -0700400 movl $_TIF_WORK_MASK,%edi
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200401 /* Use IRET because user could have changed frame. This
402 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100403 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700404 TRACE_IRQS_OFF
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200405 jmp int_with_check
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
Jan Beulich7effaa82005-09-12 18:49:24 +0200407badsys:
408 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
409 jmp ret_from_sys_call
410
Roland McGrath86a1c342008-06-23 15:37:04 -0700411#ifdef CONFIG_AUDITSYSCALL
412 /*
413 * Fast path for syscall audit without full syscall trace.
414 * We just call audit_syscall_entry() directly, and then
415 * jump back to the normal fast path.
416 */
417auditsys:
418 movq %r10,%r9 /* 6th arg: 4th syscall arg */
419 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
420 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
421 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
422 movq %rax,%rsi /* 2nd arg: syscall number */
423 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
424 call audit_syscall_entry
425 LOAD_ARGS 0 /* reload call-clobbered registers */
426 jmp system_call_fastpath
427
428 /*
429 * Return fast path for syscall audit. Call audit_syscall_exit()
430 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
431 * masked off.
432 */
433sysret_audit:
434 movq %rax,%rsi /* second arg, syscall return value */
435 cmpq $0,%rax /* is it < 0? */
436 setl %al /* 1 if so, 0 if not */
437 movzbl %al,%edi /* zero-extend that into %edi */
438 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
439 call audit_syscall_exit
440 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
441 jmp sysret_check
442#endif /* CONFIG_AUDITSYSCALL */
443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 /* Do syscall tracing */
445tracesys:
Roland McGrath86a1c342008-06-23 15:37:04 -0700446#ifdef CONFIG_AUDITSYSCALL
447 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
448 jz auditsys
449#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 SAVE_REST
Roland McGratha31f8dd2008-03-16 21:59:11 -0700451 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 FIXUP_TOP_OF_STACK %rdi
453 movq %rsp,%rdi
454 call syscall_trace_enter
Roland McGrathd4d67152008-07-09 02:38:07 -0700455 /*
456 * Reload arg registers from stack in case ptrace changed them.
457 * We don't reload %rax because syscall_trace_enter() returned
458 * the value it wants us to use in the table lookup.
459 */
460 LOAD_ARGS ARGOFFSET, 1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 RESTORE_REST
462 cmpq $__NR_syscall_max,%rax
Roland McGratha31f8dd2008-03-16 21:59:11 -0700463 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 movq %r10,%rcx /* fixup for C */
465 call *sys_call_table(,%rax,8)
Roland McGratha31f8dd2008-03-16 21:59:11 -0700466 movq %rax,RAX-ARGOFFSET(%rsp)
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200467 /* Use IRET because user could have changed frame */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469/*
470 * Syscall return path ending with IRET.
471 * Has correct top of stack, but partial stack frame.
Jan Beulichbcddc012006-12-07 02:14:02 +0100472 */
473 .globl int_ret_from_sys_call
Roland McGrath5cbf1562008-06-24 01:13:31 -0700474 .globl int_with_check
Jan Beulichbcddc012006-12-07 02:14:02 +0100475int_ret_from_sys_call:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100476 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700477 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 testl $3,CS-ARGOFFSET(%rsp)
479 je retint_restore_args
480 movl $_TIF_ALLWORK_MASK,%edi
481 /* edi: mask to check */
482int_with_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200483 LOCKDEP_SYS_EXIT_IRQ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300485 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 andl %edi,%edx
487 jnz int_careful
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300488 andl $~TS_COMPAT,TI_status(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 jmp retint_swapgs
490
491 /* Either reschedule or signal or syscall exit tracking needed. */
492 /* First do a reschedule test. */
493 /* edx: work, edi: workmask */
494int_careful:
495 bt $TIF_NEED_RESCHED,%edx
496 jnc int_very_careful
Ingo Molnar2601e642006-07-03 00:24:45 -0700497 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100498 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200500 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 call schedule
502 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200503 CFI_ADJUST_CFA_OFFSET -8
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100504 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700505 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 jmp int_with_check
507
508 /* handle signals and tracing -- both require a full stack frame */
509int_very_careful:
Ingo Molnar2601e642006-07-03 00:24:45 -0700510 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100511 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 SAVE_REST
513 /* Check for syscall exit trace */
Roland McGrathd4d67152008-07-09 02:38:07 -0700514 testl $_TIF_WORK_SYSCALL_EXIT,%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 jz int_signal
516 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200517 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 leaq 8(%rsp),%rdi # &ptregs -> arg1
519 call syscall_trace_leave
520 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200521 CFI_ADJUST_CFA_OFFSET -8
Roland McGrathd4d67152008-07-09 02:38:07 -0700522 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 jmp int_restore_rest
524
525int_signal:
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +0100526 testl $_TIF_DO_NOTIFY_MASK,%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 jz 1f
528 movq %rsp,%rdi # &ptregs -> arg1
529 xorl %esi,%esi # oldset -> arg2
530 call do_notify_resume
Roland McGratheca91e72008-07-10 14:50:39 -07005311: movl $_TIF_WORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532int_restore_rest:
533 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100534 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700535 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 jmp int_with_check
537 CFI_ENDPROC
Jan Beulichbcddc012006-12-07 02:14:02 +0100538END(system_call)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
540/*
541 * Certain special system calls that need to save a complete full stack frame.
542 */
543
544 .macro PTREGSCALL label,func,arg
545 .globl \label
546\label:
547 leaq \func(%rip),%rax
548 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
549 jmp ptregscall_common
Jan Beulich4b787e02006-06-26 13:56:55 +0200550END(\label)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 .endm
552
Jan Beulich7effaa82005-09-12 18:49:24 +0200553 CFI_STARTPROC
554
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 PTREGSCALL stub_clone, sys_clone, %r8
556 PTREGSCALL stub_fork, sys_fork, %rdi
557 PTREGSCALL stub_vfork, sys_vfork, %rdi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
559 PTREGSCALL stub_iopl, sys_iopl, %rsi
560
561ENTRY(ptregscall_common)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 popq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200563 CFI_ADJUST_CFA_OFFSET -8
564 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 SAVE_REST
566 movq %r11, %r15
Jan Beulich7effaa82005-09-12 18:49:24 +0200567 CFI_REGISTER rip, r15
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 FIXUP_TOP_OF_STACK %r11
569 call *%rax
570 RESTORE_TOP_OF_STACK %r11
571 movq %r15, %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200572 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 RESTORE_REST
574 pushq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200575 CFI_ADJUST_CFA_OFFSET 8
576 CFI_REL_OFFSET rip, 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 ret
578 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200579END(ptregscall_common)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580
581ENTRY(stub_execve)
582 CFI_STARTPROC
583 popq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200584 CFI_ADJUST_CFA_OFFSET -8
585 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 SAVE_REST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 FIXUP_TOP_OF_STACK %r11
Ingo Molnar5d119b22008-02-26 12:55:57 +0100588 movq %rsp, %rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 call sys_execve
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 RESTORE_TOP_OF_STACK %r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 movq %rax,RAX(%rsp)
592 RESTORE_REST
593 jmp int_ret_from_sys_call
594 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200595END(stub_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
597/*
598 * sigreturn is special because it needs to restore all registers on return.
599 * This cannot be done with SYSRET, so use the IRET return path instead.
600 */
601ENTRY(stub_rt_sigreturn)
602 CFI_STARTPROC
Jan Beulich7effaa82005-09-12 18:49:24 +0200603 addq $8, %rsp
604 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 SAVE_REST
606 movq %rsp,%rdi
607 FIXUP_TOP_OF_STACK %r11
608 call sys_rt_sigreturn
609 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
610 RESTORE_REST
611 jmp int_ret_from_sys_call
612 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200613END(stub_rt_sigreturn)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
Jan Beulich7effaa82005-09-12 18:49:24 +0200615/*
616 * initial frame state for interrupts and exceptions
617 */
618 .macro _frame ref
619 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200620 CFI_SIGNAL_FRAME
Jan Beulich7effaa82005-09-12 18:49:24 +0200621 CFI_DEF_CFA rsp,SS+8-\ref
622 /*CFI_REL_OFFSET ss,SS-\ref*/
623 CFI_REL_OFFSET rsp,RSP-\ref
624 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
625 /*CFI_REL_OFFSET cs,CS-\ref*/
626 CFI_REL_OFFSET rip,RIP-\ref
627 .endm
628
629/* initial frame state for interrupts (and exceptions without error code) */
630#define INTR_FRAME _frame RIP
631/* initial frame state for exceptions with error code (and interrupts with
632 vector already pushed) */
633#define XCPT_FRAME _frame ORIG_RAX
634
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635/*
636 * Interrupt entry/exit.
637 *
638 * Interrupt entry points save only callee clobbered registers in fast path.
639 *
640 * Entry runs with interrupts off.
641 */
642
643/* 0(%rsp): interrupt number */
644 .macro interrupt func
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 cld
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 SAVE_ARGS
647 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200648 pushq %rbp
Glauber Costa097a0782008-08-14 17:33:12 -0300649 /*
650 * Save rbp twice: One is for marking the stack frame, as usual, and the
651 * other, to fill pt_regs properly. This is because bx comes right
652 * before the last saved register in that structure, and not bp. If the
653 * base pointer were in the place bx is today, this would not be needed.
654 */
655 movq %rbp, -8(%rsp)
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200656 CFI_ADJUST_CFA_OFFSET 8
657 CFI_REL_OFFSET rbp, 0
658 movq %rsp,%rbp
659 CFI_DEF_CFA_REGISTER rbp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 testl $3,CS(%rdi)
661 je 1f
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100662 SWAPGS
Andi Kleen96e54042006-09-26 10:52:39 +0200663 /* irqcount is used to check if a CPU is already on an interrupt
664 stack or not. While this is essentially redundant with preempt_count
665 it is a little cheaper to use a separate counter in the PDA
666 (short of moving irq_enter into assembly, which would be too
667 much work) */
6681: incl %gs:pda_irqcount
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200669 cmoveq %gs:pda_irqstackptr,%rsp
Andi Kleen26995002006-08-02 22:37:28 +0200670 push %rbp # backlink for old unwinder
Ingo Molnar2601e642006-07-03 00:24:45 -0700671 /*
672 * We entered an interrupt context - irqs are off:
673 */
674 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 call \func
676 .endm
677
678ENTRY(common_interrupt)
Jan Beulich7effaa82005-09-12 18:49:24 +0200679 XCPT_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 interrupt do_IRQ
681 /* 0(%rsp): oldrsp-ARGOFFSET */
Jan Beulich7effaa82005-09-12 18:49:24 +0200682ret_from_intr:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100683 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700684 TRACE_IRQS_OFF
Andi Kleen3829ee62005-07-28 21:15:48 -0700685 decl %gs:pda_irqcount
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200686 leaveq
Jan Beulich7effaa82005-09-12 18:49:24 +0200687 CFI_DEF_CFA_REGISTER rsp
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200688 CFI_ADJUST_CFA_OFFSET -8
Jan Beulich7effaa82005-09-12 18:49:24 +0200689exit_intr:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 GET_THREAD_INFO(%rcx)
691 testl $3,CS-ARGOFFSET(%rsp)
692 je retint_kernel
693
694 /* Interrupt came from user space */
695 /*
696 * Has a correct top of stack, but a partial stack frame
697 * %rcx: thread info. Interrupts off.
698 */
699retint_with_reschedule:
700 movl $_TIF_WORK_MASK,%edi
Jan Beulich7effaa82005-09-12 18:49:24 +0200701retint_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200702 LOCKDEP_SYS_EXIT_IRQ
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300703 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 andl %edi,%edx
Jan Beulich7effaa82005-09-12 18:49:24 +0200705 CFI_REMEMBER_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 jnz retint_careful
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200707
708retint_swapgs: /* return to user-space */
Ingo Molnar2601e642006-07-03 00:24:45 -0700709 /*
710 * The iretq could re-enable interrupts:
711 */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100712 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700713 TRACE_IRQS_IRETQ
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100714 SWAPGS
Ingo Molnar2601e642006-07-03 00:24:45 -0700715 jmp restore_args
716
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200717retint_restore_args: /* return to kernel space */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100718 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700719 /*
720 * The iretq could re-enable interrupts:
721 */
722 TRACE_IRQS_IRETQ
723restore_args:
Ingo Molnar3701d8632008-02-09 23:24:08 +0100724 RESTORE_ARGS 0,8,0
725
Adrian Bunkf7f3d792008-02-13 23:29:53 +0200726irq_return:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100727 INTERRUPT_RETURN
Ingo Molnar3701d8632008-02-09 23:24:08 +0100728
729 .section __ex_table, "a"
730 .quad irq_return, bad_iret
731 .previous
732
733#ifdef CONFIG_PARAVIRT
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100734ENTRY(native_iret)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 iretq
736
737 .section __ex_table,"a"
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100738 .quad native_iret, bad_iret
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 .previous
Ingo Molnar3701d8632008-02-09 23:24:08 +0100740#endif
741
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 .section .fixup,"ax"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743bad_iret:
Roland McGrath3aa4b372008-02-06 22:39:43 +0100744 /*
745 * The iret traps when the %cs or %ss being restored is bogus.
746 * We've lost the original trap vector and error code.
747 * #GPF is the most likely one to get for an invalid selector.
748 * So pretend we completed the iret and took the #GPF in user mode.
749 *
750 * We are now running with the kernel GS after exception recovery.
751 * But error_entry expects us to have user GS to match the user %cs,
752 * so swap back.
753 */
754 pushq $0
755
756 SWAPGS
757 jmp general_protection
758
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100759 .previous
760
Jan Beulich7effaa82005-09-12 18:49:24 +0200761 /* edi: workmask, edx: work */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762retint_careful:
Jan Beulich7effaa82005-09-12 18:49:24 +0200763 CFI_RESTORE_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 bt $TIF_NEED_RESCHED,%edx
765 jnc retint_signal
Ingo Molnar2601e642006-07-03 00:24:45 -0700766 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100767 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200769 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 call schedule
771 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200772 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 GET_THREAD_INFO(%rcx)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100774 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700775 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 jmp retint_check
777
778retint_signal:
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +0100779 testl $_TIF_DO_NOTIFY_MASK,%edx
Andi Kleen10ffdbb2005-05-16 21:53:19 -0700780 jz retint_swapgs
Ingo Molnar2601e642006-07-03 00:24:45 -0700781 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100782 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 SAVE_REST
784 movq $-1,ORIG_RAX(%rsp)
Andi Kleen3829ee62005-07-28 21:15:48 -0700785 xorl %esi,%esi # oldset
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 movq %rsp,%rdi # &pt_regs
787 call do_notify_resume
788 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100789 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700790 TRACE_IRQS_OFF
Andi Kleenbe9e6872005-05-01 08:58:51 -0700791 GET_THREAD_INFO(%rcx)
Roland McGratheca91e72008-07-10 14:50:39 -0700792 jmp retint_with_reschedule
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
794#ifdef CONFIG_PREEMPT
795 /* Returning to kernel space. Check if we need preemption */
796 /* rcx: threadinfo. interrupts off. */
Andi Kleenb06baba2006-09-26 10:52:29 +0200797ENTRY(retint_kernel)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300798 cmpl $0,TI_preempt_count(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 jnz retint_restore_args
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300800 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 jnc retint_restore_args
802 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
803 jnc retint_restore_args
804 call preempt_schedule_irq
805 jmp exit_intr
806#endif
Jan Beulich4b787e02006-06-26 13:56:55 +0200807
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200809END(common_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
811/*
812 * APIC interrupts.
813 */
814 .macro apicinterrupt num,func
Jan Beulich7effaa82005-09-12 18:49:24 +0200815 INTR_FRAME
Rusty Russell19eadf92006-06-27 02:53:44 -0700816 pushq $~(\num)
Jan Beulich7effaa82005-09-12 18:49:24 +0200817 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 interrupt \func
819 jmp ret_from_intr
820 CFI_ENDPROC
821 .endm
822
823ENTRY(thermal_interrupt)
824 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200825END(thermal_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
Jacob Shin89b831e2005-11-05 17:25:53 +0100827ENTRY(threshold_interrupt)
828 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200829END(threshold_interrupt)
Jacob Shin89b831e2005-11-05 17:25:53 +0100830
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831#ifdef CONFIG_SMP
832ENTRY(reschedule_interrupt)
833 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200834END(reschedule_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835
Andi Kleene5bc8b62005-09-12 18:49:24 +0200836 .macro INVALIDATE_ENTRY num
837ENTRY(invalidate_interrupt\num)
838 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200839END(invalidate_interrupt\num)
Andi Kleene5bc8b62005-09-12 18:49:24 +0200840 .endm
841
842 INVALIDATE_ENTRY 0
843 INVALIDATE_ENTRY 1
844 INVALIDATE_ENTRY 2
845 INVALIDATE_ENTRY 3
846 INVALIDATE_ENTRY 4
847 INVALIDATE_ENTRY 5
848 INVALIDATE_ENTRY 6
849 INVALIDATE_ENTRY 7
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
851ENTRY(call_function_interrupt)
852 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200853END(call_function_interrupt)
Jens Axboe3b16cf82008-06-26 11:21:54 +0200854ENTRY(call_function_single_interrupt)
855 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
856END(call_function_single_interrupt)
Eric W. Biederman61014292007-02-23 04:40:58 -0700857ENTRY(irq_move_cleanup_interrupt)
858 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
859END(irq_move_cleanup_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860#endif
861
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862ENTRY(apic_timer_interrupt)
863 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200864END(apic_timer_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
Cliff Wickman18129242008-06-02 08:56:14 -0500866ENTRY(uv_bau_message_intr1)
867 apicinterrupt 220,uv_bau_message_interrupt
868END(uv_bau_message_intr1)
869
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870ENTRY(error_interrupt)
871 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200872END(error_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873
874ENTRY(spurious_interrupt)
875 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200876END(spurious_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877
878/*
879 * Exception entry points.
880 */
881 .macro zeroentry sym
Jan Beulich7effaa82005-09-12 18:49:24 +0200882 INTR_FRAME
Jeremy Fitzhardingefab58422008-06-25 00:19:31 -0400883 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 pushq $0 /* push error code/oldrax */
Jan Beulich7effaa82005-09-12 18:49:24 +0200885 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 pushq %rax /* push real oldrax to the rdi slot */
Jan Beulich7effaa82005-09-12 18:49:24 +0200887 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich37550902007-05-02 19:27:05 +0200888 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 leaq \sym(%rip),%rax
890 jmp error_entry
Jan Beulich7effaa82005-09-12 18:49:24 +0200891 CFI_ENDPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892 .endm
893
894 .macro errorentry sym
Jan Beulich7effaa82005-09-12 18:49:24 +0200895 XCPT_FRAME
Jeremy Fitzhardingefab58422008-06-25 00:19:31 -0400896 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 pushq %rax
Jan Beulich7effaa82005-09-12 18:49:24 +0200898 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich37550902007-05-02 19:27:05 +0200899 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 leaq \sym(%rip),%rax
901 jmp error_entry
Jan Beulich7effaa82005-09-12 18:49:24 +0200902 CFI_ENDPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 .endm
904
905 /* error code is on the stack already */
906 /* handle NMI like exceptions that can happen everywhere */
Ingo Molnar2601e642006-07-03 00:24:45 -0700907 .macro paranoidentry sym, ist=0, irqtrace=1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 SAVE_ALL
909 cld
910 movl $1,%ebx
911 movl $MSR_GS_BASE,%ecx
912 rdmsr
913 testl %edx,%edx
914 js 1f
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100915 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 xorl %ebx,%ebx
Jan Beulichb556b352006-01-11 22:43:00 +01009171:
918 .if \ist
919 movq %gs:pda_data_offset, %rbp
920 .endif
Alexander van Heukelum7e61a792008-09-26 14:03:03 +0200921 .if \irqtrace
922 TRACE_IRQS_OFF
923 .endif
Jan Beulichb556b352006-01-11 22:43:00 +0100924 movq %rsp,%rdi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 movq ORIG_RAX(%rsp),%rsi
926 movq $-1,ORIG_RAX(%rsp)
Jan Beulichb556b352006-01-11 22:43:00 +0100927 .if \ist
Andi Kleen5f8efbb2006-01-16 01:56:39 +0100928 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
Jan Beulichb556b352006-01-11 22:43:00 +0100929 .endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 call \sym
Jan Beulichb556b352006-01-11 22:43:00 +0100931 .if \ist
Andi Kleen5f8efbb2006-01-16 01:56:39 +0100932 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
Jan Beulichb556b352006-01-11 22:43:00 +0100933 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100934 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700935 .if \irqtrace
936 TRACE_IRQS_OFF
937 .endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 .endm
Ingo Molnar2601e642006-07-03 00:24:45 -0700939
940 /*
941 * "Paranoid" exit path from exception stack.
942 * Paranoid because this is used by NMIs and cannot take
943 * any kernel state for granted.
944 * We don't do kernel preemption checks here, because only
945 * NMI should be common and it does not enable IRQs and
946 * cannot get reschedule ticks.
947 *
948 * "trace" is 0 for the NMI handler only, because irq-tracing
949 * is fundamentally NMI-unsafe. (we cannot change the soft and
950 * hard flags at once, atomically)
951 */
952 .macro paranoidexit trace=1
953 /* ebx: no swapgs flag */
954paranoid_exit\trace:
955 testl %ebx,%ebx /* swapgs needed? */
956 jnz paranoid_restore\trace
957 testl $3,CS(%rsp)
958 jnz paranoid_userspace\trace
959paranoid_swapgs\trace:
Andi Kleen7a0a2df2006-09-26 10:52:37 +0200960 .if \trace
Ingo Molnar2601e642006-07-03 00:24:45 -0700961 TRACE_IRQS_IRETQ 0
Andi Kleen7a0a2df2006-09-26 10:52:37 +0200962 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100963 SWAPGS_UNSAFE_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -0700964paranoid_restore\trace:
965 RESTORE_ALL 8
Ingo Molnar3701d8632008-02-09 23:24:08 +0100966 jmp irq_return
Ingo Molnar2601e642006-07-03 00:24:45 -0700967paranoid_userspace\trace:
968 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300969 movl TI_flags(%rcx),%ebx
Ingo Molnar2601e642006-07-03 00:24:45 -0700970 andl $_TIF_WORK_MASK,%ebx
971 jz paranoid_swapgs\trace
972 movq %rsp,%rdi /* &pt_regs */
973 call sync_regs
974 movq %rax,%rsp /* switch stack for scheduling */
975 testl $_TIF_NEED_RESCHED,%ebx
976 jnz paranoid_schedule\trace
977 movl %ebx,%edx /* arg3: thread flags */
978 .if \trace
979 TRACE_IRQS_ON
980 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100981 ENABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700982 xorl %esi,%esi /* arg2: oldset */
983 movq %rsp,%rdi /* arg1: &pt_regs */
984 call do_notify_resume
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100985 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700986 .if \trace
987 TRACE_IRQS_OFF
988 .endif
989 jmp paranoid_userspace\trace
990paranoid_schedule\trace:
991 .if \trace
992 TRACE_IRQS_ON
993 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100994 ENABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700995 call schedule
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100996 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700997 .if \trace
998 TRACE_IRQS_OFF
999 .endif
1000 jmp paranoid_userspace\trace
1001 CFI_ENDPROC
1002 .endm
1003
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004/*
1005 * Exception entry point. This expects an error code/orig_rax on the stack
1006 * and the exception handler in %rax.
1007 */
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001008KPROBE_ENTRY(error_entry)
Jan Beulich7effaa82005-09-12 18:49:24 +02001009 _frame RDI
Jan Beulich37550902007-05-02 19:27:05 +02001010 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 /* rdi slot contains rax, oldrax contains error code */
1012 cld
1013 subq $14*8,%rsp
1014 CFI_ADJUST_CFA_OFFSET (14*8)
1015 movq %rsi,13*8(%rsp)
1016 CFI_REL_OFFSET rsi,RSI
1017 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
Jan Beulich37550902007-05-02 19:27:05 +02001018 CFI_REGISTER rax,rsi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 movq %rdx,12*8(%rsp)
1020 CFI_REL_OFFSET rdx,RDX
1021 movq %rcx,11*8(%rsp)
1022 CFI_REL_OFFSET rcx,RCX
1023 movq %rsi,10*8(%rsp) /* store rax */
1024 CFI_REL_OFFSET rax,RAX
1025 movq %r8, 9*8(%rsp)
1026 CFI_REL_OFFSET r8,R8
1027 movq %r9, 8*8(%rsp)
1028 CFI_REL_OFFSET r9,R9
1029 movq %r10,7*8(%rsp)
1030 CFI_REL_OFFSET r10,R10
1031 movq %r11,6*8(%rsp)
1032 CFI_REL_OFFSET r11,R11
1033 movq %rbx,5*8(%rsp)
1034 CFI_REL_OFFSET rbx,RBX
1035 movq %rbp,4*8(%rsp)
1036 CFI_REL_OFFSET rbp,RBP
1037 movq %r12,3*8(%rsp)
1038 CFI_REL_OFFSET r12,R12
1039 movq %r13,2*8(%rsp)
1040 CFI_REL_OFFSET r13,R13
1041 movq %r14,1*8(%rsp)
1042 CFI_REL_OFFSET r14,R14
1043 movq %r15,(%rsp)
1044 CFI_REL_OFFSET r15,R15
1045 xorl %ebx,%ebx
1046 testl $3,CS(%rsp)
1047 je error_kernelspace
1048error_swapgs:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001049 SWAPGS
Alexander van Heukelum6b11d4e2008-09-26 14:03:02 +02001050error_sti:
1051 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 movq %rdi,RDI(%rsp)
Jan Beulich37550902007-05-02 19:27:05 +02001053 CFI_REL_OFFSET rdi,RDI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 movq %rsp,%rdi
1055 movq ORIG_RAX(%rsp),%rsi /* get error code */
1056 movq $-1,ORIG_RAX(%rsp)
1057 call *%rax
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001058 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1059error_exit:
1060 movl %ebx,%eax
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001062 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -07001063 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 GET_THREAD_INFO(%rcx)
1065 testl %eax,%eax
1066 jne retint_kernel
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001067 LOCKDEP_SYS_EXIT_IRQ
Glauber Costa26ccb8a2008-06-24 11:19:35 -03001068 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 movl $_TIF_WORK_MASK,%edi
1070 andl %edi,%edx
1071 jnz retint_careful
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001072 jmp retint_swapgs
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 CFI_ENDPROC
1074
1075error_kernelspace:
1076 incl %ebx
1077 /* There are two places in the kernel that can potentially fault with
1078 usergs. Handle them here. The exception handlers after
1079 iret run with kernel gs again, so don't set the user space flag.
1080 B stepping K8s sometimes report an truncated RIP for IRET
1081 exceptions returning to compat mode. Check for these here too. */
Vegard Nossum9d8ad5d2008-06-27 17:22:17 +02001082 leaq irq_return(%rip),%rcx
1083 cmpq %rcx,RIP(%rsp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 je error_swapgs
Vegard Nossum9d8ad5d2008-06-27 17:22:17 +02001085 movl %ecx,%ecx /* zero extend */
1086 cmpq %rcx,RIP(%rsp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 je error_swapgs
1088 cmpq $gs_change,RIP(%rsp)
1089 je error_swapgs
1090 jmp error_sti
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001091KPROBE_END(error_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092
1093 /* Reload gs selector with exception handling */
1094 /* edi: new selector */
Jeremy Fitzhardinge9f9d4892008-06-25 00:19:32 -04001095ENTRY(native_load_gs_index)
Jan Beulich7effaa82005-09-12 18:49:24 +02001096 CFI_STARTPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 pushf
Jan Beulich7effaa82005-09-12 18:49:24 +02001098 CFI_ADJUST_CFA_OFFSET 8
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001099 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1100 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101gs_change:
1102 movl %edi,%gs
11032: mfence /* workaround */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001104 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 popf
Jan Beulich7effaa82005-09-12 18:49:24 +02001106 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 ret
Jan Beulich7effaa82005-09-12 18:49:24 +02001108 CFI_ENDPROC
Jeremy Fitzhardinge9f9d4892008-06-25 00:19:32 -04001109ENDPROC(native_load_gs_index)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
1111 .section __ex_table,"a"
1112 .align 8
1113 .quad gs_change,bad_gs
1114 .previous
1115 .section .fixup,"ax"
1116 /* running with kernelgs */
1117bad_gs:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001118 SWAPGS /* switch back to user gs */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 xorl %eax,%eax
1120 movl %eax,%gs
1121 jmp 2b
1122 .previous
1123
1124/*
1125 * Create a kernel thread.
1126 *
1127 * C extern interface:
1128 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1129 *
1130 * asm input arguments:
1131 * rdi: fn, rsi: arg, rdx: flags
1132 */
1133ENTRY(kernel_thread)
1134 CFI_STARTPROC
1135 FAKE_STACK_FRAME $child_rip
1136 SAVE_ALL
1137
1138 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1139 movq %rdx,%rdi
1140 orq kernel_thread_flags(%rip),%rdi
1141 movq $-1, %rsi
1142 movq %rsp, %rdx
1143
1144 xorl %r8d,%r8d
1145 xorl %r9d,%r9d
1146
1147 # clone now
1148 call do_fork
1149 movq %rax,RAX(%rsp)
1150 xorl %edi,%edi
1151
1152 /*
1153 * It isn't worth to check for reschedule here,
1154 * so internally to the x86_64 port you can rely on kernel_thread()
1155 * not to reschedule the child before returning, this avoids the need
1156 * of hacks for example to fork off the per-CPU idle tasks.
1157 * [Hopefully no generic code relies on the reschedule -AK]
1158 */
1159 RESTORE_ALL
1160 UNFAKE_STACK_FRAME
1161 ret
1162 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001163ENDPROC(kernel_thread)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165child_rip:
Andi Kleenc05991e2006-08-30 19:37:08 +02001166 pushq $0 # fake return address
1167 CFI_STARTPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 /*
1169 * Here we are in the child and the registers are set as they were
1170 * at kernel_thread() invocation in the parent.
1171 */
1172 movq %rdi, %rax
1173 movq %rsi, %rdi
1174 call *%rax
1175 # exit
Andrey Mirkin1c5b5cf2007-10-17 18:04:33 +02001176 mov %eax, %edi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 call do_exit
Andi Kleenc05991e2006-08-30 19:37:08 +02001178 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001179ENDPROC(child_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181/*
1182 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1183 *
1184 * C extern interface:
1185 * extern long execve(char *name, char **argv, char **envp)
1186 *
1187 * asm input arguments:
1188 * rdi: name, rsi: argv, rdx: envp
1189 *
1190 * We want to fallback into:
Ingo Molnar5d119b22008-02-26 12:55:57 +01001191 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 *
1193 * do_sys_execve asm fallback arguments:
Ingo Molnar5d119b22008-02-26 12:55:57 +01001194 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 */
Arnd Bergmann3db03b42006-10-02 02:18:31 -07001196ENTRY(kernel_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 CFI_STARTPROC
1198 FAKE_STACK_FRAME $0
1199 SAVE_ALL
Ingo Molnar5d119b22008-02-26 12:55:57 +01001200 movq %rsp,%rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 call sys_execve
1202 movq %rax, RAX(%rsp)
1203 RESTORE_REST
1204 testq %rax,%rax
1205 je int_ret_from_sys_call
1206 RESTORE_ARGS
1207 UNFAKE_STACK_FRAME
1208 ret
1209 CFI_ENDPROC
Arnd Bergmann3db03b42006-10-02 02:18:31 -07001210ENDPROC(kernel_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001212KPROBE_ENTRY(page_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 errorentry do_page_fault
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001214KPROBE_END(page_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215
1216ENTRY(coprocessor_error)
1217 zeroentry do_coprocessor_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001218END(coprocessor_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220ENTRY(simd_coprocessor_error)
1221 zeroentry do_simd_coprocessor_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001222END(simd_coprocessor_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223
1224ENTRY(device_not_available)
Alexander van Heukelume407d6202008-09-30 18:41:36 +02001225 zeroentry do_device_not_available
Jan Beulich4b787e02006-06-26 13:56:55 +02001226END(device_not_available)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
1228 /* runs on exception stack */
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001229KPROBE_ENTRY(debug)
Jan Beulich7effaa82005-09-12 18:49:24 +02001230 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001231 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 pushq $0
1233 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001234 paranoidentry do_debug, DEBUG_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -07001235 paranoidexit
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001236KPROBE_END(debug)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
1238 /* runs on exception stack */
Andi Kleeneddb6fb2006-02-03 21:50:41 +01001239KPROBE_ENTRY(nmi)
Jan Beulich7effaa82005-09-12 18:49:24 +02001240 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001241 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 pushq $-1
Jan Beulich7effaa82005-09-12 18:49:24 +02001243 CFI_ADJUST_CFA_OFFSET 8
Ingo Molnar2601e642006-07-03 00:24:45 -07001244 paranoidentry do_nmi, 0, 0
1245#ifdef CONFIG_TRACE_IRQFLAGS
1246 paranoidexit 0
1247#else
1248 jmp paranoid_exit1
1249 CFI_ENDPROC
1250#endif
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001251KPROBE_END(nmi)
Andi Kleen6fefb0d2005-04-16 15:25:03 -07001252
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001253KPROBE_ENTRY(int3)
Jan Beulichb556b352006-01-11 22:43:00 +01001254 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001255 PARAVIRT_ADJUST_EXCEPTION_FRAME
Jan Beulichb556b352006-01-11 22:43:00 +01001256 pushq $0
1257 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001258 paranoidentry do_int3, DEBUG_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -07001259 jmp paranoid_exit1
Jan Beulichb556b352006-01-11 22:43:00 +01001260 CFI_ENDPROC
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001261KPROBE_END(int3)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
1263ENTRY(overflow)
1264 zeroentry do_overflow
Jan Beulich4b787e02006-06-26 13:56:55 +02001265END(overflow)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267ENTRY(bounds)
1268 zeroentry do_bounds
Jan Beulich4b787e02006-06-26 13:56:55 +02001269END(bounds)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
1271ENTRY(invalid_op)
1272 zeroentry do_invalid_op
Jan Beulich4b787e02006-06-26 13:56:55 +02001273END(invalid_op)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
1275ENTRY(coprocessor_segment_overrun)
1276 zeroentry do_coprocessor_segment_overrun
Jan Beulich4b787e02006-06-26 13:56:55 +02001277END(coprocessor_segment_overrun)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 /* runs on exception stack */
1280ENTRY(double_fault)
Jan Beulich7effaa82005-09-12 18:49:24 +02001281 XCPT_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001282 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 paranoidentry do_double_fault
Ingo Molnar2601e642006-07-03 00:24:45 -07001284 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001286END(double_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287
1288ENTRY(invalid_TSS)
1289 errorentry do_invalid_TSS
Jan Beulich4b787e02006-06-26 13:56:55 +02001290END(invalid_TSS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291
1292ENTRY(segment_not_present)
1293 errorentry do_segment_not_present
Jan Beulich4b787e02006-06-26 13:56:55 +02001294END(segment_not_present)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295
1296 /* runs on exception stack */
1297ENTRY(stack_segment)
Jan Beulich7effaa82005-09-12 18:49:24 +02001298 XCPT_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001299 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 paranoidentry do_stack_segment
Ingo Molnar2601e642006-07-03 00:24:45 -07001301 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001303END(stack_segment)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001305KPROBE_ENTRY(general_protection)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 errorentry do_general_protection
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001307KPROBE_END(general_protection)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
1309ENTRY(alignment_check)
1310 errorentry do_alignment_check
Jan Beulich4b787e02006-06-26 13:56:55 +02001311END(alignment_check)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
1313ENTRY(divide_error)
1314 zeroentry do_divide_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001315END(divide_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316
1317ENTRY(spurious_interrupt_bug)
1318 zeroentry do_spurious_interrupt_bug
Jan Beulich4b787e02006-06-26 13:56:55 +02001319END(spurious_interrupt_bug)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
1321#ifdef CONFIG_X86_MCE
1322 /* runs on exception stack */
1323ENTRY(machine_check)
Jan Beulich7effaa82005-09-12 18:49:24 +02001324 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001325 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 pushq $0
1327 CFI_ADJUST_CFA_OFFSET 8
1328 paranoidentry do_machine_check
Ingo Molnar2601e642006-07-03 00:24:45 -07001329 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001331END(machine_check)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332#endif
1333
Andi Kleen26995002006-08-02 22:37:28 +02001334/* Call softirq on interrupt stack. Interrupts are off. */
Andi Kleened6b6762005-07-28 21:15:49 -07001335ENTRY(call_softirq)
Jan Beulich7effaa82005-09-12 18:49:24 +02001336 CFI_STARTPROC
Andi Kleen26995002006-08-02 22:37:28 +02001337 push %rbp
1338 CFI_ADJUST_CFA_OFFSET 8
1339 CFI_REL_OFFSET rbp,0
1340 mov %rsp,%rbp
1341 CFI_DEF_CFA_REGISTER rbp
Andi Kleened6b6762005-07-28 21:15:49 -07001342 incl %gs:pda_irqcount
Andi Kleen26995002006-08-02 22:37:28 +02001343 cmove %gs:pda_irqstackptr,%rsp
1344 push %rbp # backlink for old unwinder
Andi Kleened6b6762005-07-28 21:15:49 -07001345 call __do_softirq
Andi Kleen26995002006-08-02 22:37:28 +02001346 leaveq
Jan Beulich7effaa82005-09-12 18:49:24 +02001347 CFI_DEF_CFA_REGISTER rsp
Andi Kleen26995002006-08-02 22:37:28 +02001348 CFI_ADJUST_CFA_OFFSET -8
Andi Kleened6b6762005-07-28 21:15:49 -07001349 decl %gs:pda_irqcount
Andi Kleened6b6762005-07-28 21:15:49 -07001350 ret
Jan Beulich7effaa82005-09-12 18:49:24 +02001351 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001352ENDPROC(call_softirq)
Andi Kleen75154f42007-06-23 02:29:25 +02001353
1354KPROBE_ENTRY(ignore_sysret)
1355 CFI_STARTPROC
1356 mov $-ENOSYS,%eax
1357 sysret
1358 CFI_ENDPROC
1359ENDPROC(ignore_sysret)
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001360
1361#ifdef CONFIG_XEN
1362ENTRY(xen_hypervisor_callback)
1363 zeroentry xen_do_hypervisor_callback
1364END(xen_hypervisor_callback)
1365
1366/*
1367# A note on the "critical region" in our callback handler.
1368# We want to avoid stacking callback handlers due to events occurring
1369# during handling of the last event. To do this, we keep events disabled
1370# until we've done all processing. HOWEVER, we must enable events before
1371# popping the stack frame (can't be done atomically) and so it would still
1372# be possible to get enough handler activations to overflow the stack.
1373# Although unlikely, bugs of that kind are hard to track down, so we'd
1374# like to avoid the possibility.
1375# So, on entry to the handler we detect whether we interrupted an
1376# existing activation in its critical region -- if so, we pop the current
1377# activation and restart the handler using the previous one.
1378*/
1379ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1380 CFI_STARTPROC
1381/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1382 see the correct pointer to the pt_regs */
1383 movq %rdi, %rsp # we don't return, adjust the stack frame
1384 CFI_ENDPROC
1385 CFI_DEFAULT_STACK
138611: incl %gs:pda_irqcount
1387 movq %rsp,%rbp
1388 CFI_DEF_CFA_REGISTER rbp
1389 cmovzq %gs:pda_irqstackptr,%rsp
1390 pushq %rbp # backlink for old unwinder
1391 call xen_evtchn_do_upcall
1392 popq %rsp
1393 CFI_DEF_CFA_REGISTER rsp
1394 decl %gs:pda_irqcount
1395 jmp error_exit
1396 CFI_ENDPROC
1397END(do_hypervisor_callback)
1398
1399/*
1400# Hypervisor uses this for application faults while it executes.
1401# We get here for two reasons:
1402# 1. Fault while reloading DS, ES, FS or GS
1403# 2. Fault while executing IRET
1404# Category 1 we do not need to fix up as Xen has already reloaded all segment
1405# registers that could be reloaded and zeroed the others.
1406# Category 2 we fix up by killing the current process. We cannot use the
1407# normal Linux return path in this case because if we use the IRET hypercall
1408# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1409# We distinguish between categories by comparing each saved segment register
1410# with its current contents: any discrepancy means we in category 1.
1411*/
1412ENTRY(xen_failsafe_callback)
Jeremy Fitzhardinge4a5c3e72008-07-08 15:07:09 -07001413 framesz = (RIP-0x30) /* workaround buggy gas */
1414 _frame framesz
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001415 CFI_REL_OFFSET rcx, 0
1416 CFI_REL_OFFSET r11, 8
1417 movw %ds,%cx
1418 cmpw %cx,0x10(%rsp)
1419 CFI_REMEMBER_STATE
1420 jne 1f
1421 movw %es,%cx
1422 cmpw %cx,0x18(%rsp)
1423 jne 1f
1424 movw %fs,%cx
1425 cmpw %cx,0x20(%rsp)
1426 jne 1f
1427 movw %gs,%cx
1428 cmpw %cx,0x28(%rsp)
1429 jne 1f
1430 /* All segments match their saved values => Category 2 (Bad IRET). */
1431 movq (%rsp),%rcx
1432 CFI_RESTORE rcx
1433 movq 8(%rsp),%r11
1434 CFI_RESTORE r11
1435 addq $0x30,%rsp
1436 CFI_ADJUST_CFA_OFFSET -0x30
Jeremy Fitzhardinge4a5c3e72008-07-08 15:07:09 -07001437 pushq $0
1438 CFI_ADJUST_CFA_OFFSET 8
1439 pushq %r11
1440 CFI_ADJUST_CFA_OFFSET 8
1441 pushq %rcx
1442 CFI_ADJUST_CFA_OFFSET 8
1443 jmp general_protection
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001444 CFI_RESTORE_STATE
14451: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1446 movq (%rsp),%rcx
1447 CFI_RESTORE rcx
1448 movq 8(%rsp),%r11
1449 CFI_RESTORE r11
1450 addq $0x30,%rsp
1451 CFI_ADJUST_CFA_OFFSET -0x30
1452 pushq $0
1453 CFI_ADJUST_CFA_OFFSET 8
1454 SAVE_ALL
1455 jmp error_exit
1456 CFI_ENDPROC
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001457END(xen_failsafe_callback)
1458
1459#endif /* CONFIG_XEN */