2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 * Jun Nakajima <jun.nakajima@intel.com>
8 * Asit Mallick <asit.k.mallick@intel.com>
13 * entry.S contains the system-call and fault low-level handling routines.
15 * NOTE: This code handles signal-recognition, which happens every time
16 * after an interrupt and after each system call.
18 * Normal syscalls and interrupts don't save a full stack frame, this is
19 * only done for syscall tracing, signals or fork/exec et.al.
21 * A note on terminology:
22 * - top of stack: Architecture defined interrupt frame from SS to RIP
23 * at the top of the kernel process stack.
24 * - partial stack frame: partially saved registers upto R11.
25 * - full stack frame: Like partial stack frame, but all register saved.
28 * - CFI macros are used to generate dwarf2 unwind information for better
29 * backtraces. They don't change any code.
30 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
31 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
32 * There are unfortunately lots of special cases where some registers
33 * not touched. The macro is a big mess that should be cleaned up.
34 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
35 * Gives a full stack frame.
36 * - ENTRY/END Define functions in the symbol table.
37 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
38 * frame that is otherwise undefined after a SYSCALL
39 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
40 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
43 #include <linux/linkage.h>
44 #include <asm/segment.h>
45 #include <asm/cache.h>
46 #include <asm/errno.h>
47 #include <asm/dwarf2.h>
48 #include <asm/calling.h>
49 #include <asm/asm-offsets.h>
51 #include <asm/unistd.h>
52 #include <asm/thread_info.h>
53 #include <asm/hw_irq.h>
55 #include <asm/irqflags.h>
56 #include <asm/errno.h>
57 #include <xen/interface/arch-x86_64.h>
58 #include <xen/interface/features.h>
60 #include "xen_entry_64.S"
64 #ifndef CONFIG_PREEMPT
65 #define retint_kernel retint_restore_args
69 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
70 #ifdef CONFIG_TRACE_IRQFLAGS
71 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
81 * C code is not supposed to know about undefined top of stack. Every time
82 * a C function with an pt_regs argument is called from the SYSCALL based
83 * fast path FIXUP_TOP_OF_STACK is needed.
84 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
88 /* %rsp:at FRAMEEND */
89 .macro FIXUP_TOP_OF_STACK tmp
90 movq $__USER_CS,CS(%rsp)
94 .macro RESTORE_TOP_OF_STACK tmp,offset=0
97 .macro FAKE_STACK_FRAME child_rip
98 /* push in order ss, rsp, eflags, cs, rip */
101 CFI_ADJUST_CFA_OFFSET 8
102 /*CFI_REL_OFFSET ss,0*/
104 CFI_ADJUST_CFA_OFFSET 8
106 pushq $(1<<9) /* eflags - interrupts on */
107 CFI_ADJUST_CFA_OFFSET 8
108 /*CFI_REL_OFFSET rflags,0*/
109 pushq $__KERNEL_CS /* cs */
110 CFI_ADJUST_CFA_OFFSET 8
111 /*CFI_REL_OFFSET cs,0*/
112 pushq \child_rip /* rip */
113 CFI_ADJUST_CFA_OFFSET 8
115 pushq %rax /* orig rax */
116 CFI_ADJUST_CFA_OFFSET 8
119 .macro UNFAKE_STACK_FRAME
121 CFI_ADJUST_CFA_OFFSET -(6*8)
124 .macro CFI_DEFAULT_STACK start=1,adj=0
128 CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
130 CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
133 CFI_REL_OFFSET r15,R15
134 CFI_REL_OFFSET r14,R14
135 CFI_REL_OFFSET r13,R13
136 CFI_REL_OFFSET r12,R12
137 CFI_REL_OFFSET rbp,RBP
138 CFI_REL_OFFSET rbx,RBX
140 CFI_REL_OFFSET r11,R11
141 CFI_REL_OFFSET r10,R10
144 CFI_REL_OFFSET rax,RAX
145 CFI_REL_OFFSET rcx,RCX
146 CFI_REL_OFFSET rdx,RDX
147 CFI_REL_OFFSET rsi,RSI
148 CFI_REL_OFFSET rdi,RDI
149 CFI_REL_OFFSET rip,RIP
150 /*CFI_REL_OFFSET cs,CS*/
151 /*CFI_REL_OFFSET rflags,EFLAGS*/
152 CFI_REL_OFFSET rsp,RSP
153 /*CFI_REL_OFFSET ss,SS*/
157 * Must be consistent with the definition in arch-x86/xen-x86_64.h:
158 * struct iret_context {
159 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
161 * with rax, r11, and rcx being taken care of in the hypercall stub.
163 .macro HYPERVISOR_IRET flag
166 testl $NMI_MASK,2*8(%rsp)
169 cmpb $0,(xen_features+XENFEAT_supervisor_mode_kernel)(%rip)
172 /* Direct iret to kernel space. Correct CS and SS. */
177 2: /* Slow iret via hypervisor. */
178 andl $~NMI_MASK, 2*8(%rsp)
180 jmp hypercall_page + (__HYPERVISOR_iret * 32)
184 * A newly forked process directly context switches into this.
189 push kernel_eflags(%rip)
190 CFI_ADJUST_CFA_OFFSET 4
191 popf # reset kernel eflags
192 CFI_ADJUST_CFA_OFFSET -4
194 GET_THREAD_INFO(%rcx)
195 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
199 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
200 je int_ret_from_sys_call
201 testl $_TIF_IA32,threadinfo_flags(%rcx)
202 jnz int_ret_from_sys_call
203 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
204 jmp ret_from_sys_call
207 call syscall_trace_leave
208 GET_THREAD_INFO(%rcx)
214 * initial frame state for interrupts and exceptions
219 CFI_DEF_CFA rsp,SS+8-\ref
220 /*CFI_REL_OFFSET ss,SS-\ref*/
221 CFI_REL_OFFSET rsp,RSP-\ref
222 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
223 /*CFI_REL_OFFSET cs,CS-\ref*/
224 CFI_REL_OFFSET rip,RIP-\ref
228 * System call entry. Upto 6 arguments in registers are supported.
230 * SYSCALL does not save anything on the stack and does not change the
236 * rax system call number
238 * rcx return address for syscall/sysret, C arg3
241 * r10 arg3 (--> moved to rcx for C)
244 * r11 eflags for syscall/sysret, temporary for C
245 * r12-r15,rbp,rbx saved by C code, not touched.
247 * Interrupts are enabled on entry.
248 * Only called from user space.
250 * XXX if we had a free scratch register we could save the RSP into the stack frame
251 * and report it properly in ps. Unfortunately we haven't.
253 * When user can change the frames always force IRET. That is because
254 * it deals with uncanonical addresses better. SYSRET has trouble
255 * with them due to bugs in both AMD and Intel CPUs.
261 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
262 GET_THREAD_INFO(%rcx)
263 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
265 cmpq $__NR_syscall_max,%rax
268 call *sys_call_table(,%rax,8) # XXX: rip relative
269 movq %rax,RAX-ARGOFFSET(%rsp)
271 * Syscall return path ending with SYSRET (fast path)
272 * Has incomplete stack frame and undefined top of stack.
275 movl $_TIF_ALLWORK_MASK,%edi
279 GET_THREAD_INFO(%rcx)
280 XEN_BLOCK_EVENTS(%rsi)
282 movl threadinfo_flags(%rcx),%edx
287 * sysretq will re-enable interrupts:
290 XEN_UNBLOCK_EVENTS(%rsi)
292 HYPERVISOR_IRET VGCF_IN_SYSCALL
295 /* Handle reschedules */
296 /* edx: work, edi: workmask */
298 bt $TIF_NEED_RESCHED,%edx
301 XEN_UNBLOCK_EVENTS(%rsi)
303 CFI_ADJUST_CFA_OFFSET 8
306 CFI_ADJUST_CFA_OFFSET -8
309 /* Handle a signal */
313 XEN_UNBLOCK_EVENTS(%rsi)
314 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
317 /* Really a signal */
318 /* edx: work flags (arg3) */
319 leaq do_notify_resume(%rip),%rax
320 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
321 xorl %esi,%esi # oldset -> arg2
322 call ptregscall_common
323 1: movl $_TIF_NEED_RESCHED,%edi
324 /* Use IRET because user could have changed frame. This
325 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
326 XEN_BLOCK_EVENTS(%rsi)
331 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
332 jmp ret_from_sys_call
334 /* Do syscall tracing */
337 movq $-ENOSYS,RAX(%rsp)
338 FIXUP_TOP_OF_STACK %rdi
340 call syscall_trace_enter
341 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
343 cmpq $__NR_syscall_max,%rax
347 movq %r10,%rcx /* fixup for C */
348 call *sys_call_table(,%rax,8)
349 1: movq %rax,RAX-ARGOFFSET(%rsp)
350 /* Use IRET because user could have changed frame */
353 * Syscall return path ending with IRET.
354 * Has correct top of stack, but partial stack frame.
356 .globl int_ret_from_sys_call
357 int_ret_from_sys_call:
358 XEN_BLOCK_EVENTS(%rsi)
360 testb $3,CS-ARGOFFSET(%rsp)
362 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
363 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
364 jmp retint_restore_args # retrun from ring3 kernel
366 movl $_TIF_ALLWORK_MASK,%edi
367 /* edi: mask to check */
370 GET_THREAD_INFO(%rcx)
371 movl threadinfo_flags(%rcx),%edx
374 andl $~TS_COMPAT,threadinfo_status(%rcx)
375 jmp retint_restore_args
377 /* Either reschedule or signal or syscall exit tracking needed. */
378 /* First do a reschedule test. */
379 /* edx: work, edi: workmask */
381 bt $TIF_NEED_RESCHED,%edx
385 XEN_UNBLOCK_EVENTS(%rsi)
387 CFI_ADJUST_CFA_OFFSET 8
390 CFI_ADJUST_CFA_OFFSET -8
391 XEN_BLOCK_EVENTS(%rsi)
395 /* handle signals and tracing -- both require a full stack frame */
399 XEN_UNBLOCK_EVENTS(%rsi)
401 /* Check for syscall exit trace */
402 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
405 CFI_ADJUST_CFA_OFFSET 8
406 leaq 8(%rsp),%rdi # &ptregs -> arg1
407 call syscall_trace_leave
409 CFI_ADJUST_CFA_OFFSET -8
410 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
414 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
416 movq %rsp,%rdi # &ptregs -> arg1
417 xorl %esi,%esi # oldset -> arg2
418 call do_notify_resume
419 1: movl $_TIF_NEED_RESCHED,%edi
422 XEN_BLOCK_EVENTS(%rsi)
429 * Certain special system calls that need to save a complete full stack frame.
432 .macro PTREGSCALL label,func,arg
435 leaq \func(%rip),%rax
436 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
437 jmp ptregscall_common
443 PTREGSCALL stub_clone, sys_clone, %r8
444 PTREGSCALL stub_fork, sys_fork, %rdi
445 PTREGSCALL stub_vfork, sys_vfork, %rdi
446 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
447 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
448 PTREGSCALL stub_iopl, sys_iopl, %rsi
450 ENTRY(ptregscall_common)
452 CFI_ADJUST_CFA_OFFSET -8
453 CFI_REGISTER rip, r11
456 CFI_REGISTER rip, r15
457 FIXUP_TOP_OF_STACK %r11
459 RESTORE_TOP_OF_STACK %r11
461 CFI_REGISTER rip, r11
464 CFI_ADJUST_CFA_OFFSET 8
465 CFI_REL_OFFSET rip, 0
468 END(ptregscall_common)
473 CFI_ADJUST_CFA_OFFSET -8
474 CFI_REGISTER rip, r11
476 FIXUP_TOP_OF_STACK %r11
478 RESTORE_TOP_OF_STACK %r11
481 jmp int_ret_from_sys_call
486 * sigreturn is special because it needs to restore all registers on return.
487 * This cannot be done with SYSRET, so use the IRET return path instead.
489 ENTRY(stub_rt_sigreturn)
492 CFI_ADJUST_CFA_OFFSET -8
495 FIXUP_TOP_OF_STACK %r11
496 call sys_rt_sigreturn
497 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
499 jmp int_ret_from_sys_call
501 END(stub_rt_sigreturn)
503 /* initial frame state for interrupts (and exceptions without error code) */
504 #define INTR_FRAME _frame (RIP-0x10); \
505 CFI_REL_OFFSET rcx,0; \
508 /* initial frame state for exceptions with error code (and interrupts with
509 vector already pushed) */
510 #define XCPT_FRAME _frame (RIP-0x18); \
511 CFI_REL_OFFSET rcx,0; \
520 CFI_DEFAULT_STACK adj=1
522 movl threadinfo_flags(%rcx),%edx
526 retint_restore_args: /* return to kernel space */
527 movl EFLAGS-REST_SKIP(%rsp), %eax
528 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
529 XEN_GET_VCPU_INFO(%rsi)
530 andb evtchn_upcall_mask(%rsi),%al
531 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
532 jnz restore_all_enable_events # != 0 => enable event delivery
533 XEN_PUT_VCPU_INFO(%rsi)
538 /* edi: workmask, edx: work */
541 bt $TIF_NEED_RESCHED,%edx
544 XEN_UNBLOCK_EVENTS(%rsi)
547 CFI_ADJUST_CFA_OFFSET 8
550 CFI_ADJUST_CFA_OFFSET -8
551 GET_THREAD_INFO(%rcx)
552 XEN_BLOCK_EVENTS(%rsi)
558 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
559 jz retint_restore_args
561 XEN_UNBLOCK_EVENTS(%rsi)
563 movq $-1,ORIG_RAX(%rsp)
564 xorl %esi,%esi # oldset
565 movq %rsp,%rdi # &pt_regs
566 call do_notify_resume
568 XEN_BLOCK_EVENTS(%rsi)
570 movl $_TIF_NEED_RESCHED,%edi
571 GET_THREAD_INFO(%rcx)
574 #ifdef CONFIG_PREEMPT
575 /* Returning to kernel space. Check if we need preemption */
576 /* rcx: threadinfo. interrupts off. */
578 cmpl $0,threadinfo_preempt_count(%rcx)
579 jnz retint_restore_args
580 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
581 jnc retint_restore_args
582 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
583 jnc retint_restore_args
584 call preempt_schedule_irq
585 jmp retint_kernel /* check again */
595 .macro apicinterrupt num,func
598 CFI_ADJUST_CFA_OFFSET 8
604 ENTRY(thermal_interrupt)
605 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
606 END(thermal_interrupt)
608 ENTRY(threshold_interrupt)
609 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
610 END(threshold_interrupt)
613 ENTRY(reschedule_interrupt)
614 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
615 END(reschedule_interrupt)
617 .macro INVALIDATE_ENTRY num
618 ENTRY(invalidate_interrupt\num)
619 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
620 END(invalidate_interrupt\num)
632 ENTRY(call_function_interrupt)
633 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
634 END(call_function_interrupt)
635 ENTRY(irq_move_cleanup_interrupt)
636 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
637 END(irq_move_cleanup_interrupt)
640 ENTRY(apic_timer_interrupt)
641 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
642 END(apic_timer_interrupt)
644 ENTRY(error_interrupt)
645 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
648 ENTRY(spurious_interrupt)
649 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
650 END(spurious_interrupt)
651 #endif /* !CONFIG_XEN */
654 * Exception entry points.
662 addq $0x10,%rsp /* skip rcx and r11 */
663 CFI_ADJUST_CFA_OFFSET -0x10
664 pushq $0 /* push error code/oldrax */
665 CFI_ADJUST_CFA_OFFSET 8
666 pushq %rax /* push real oldrax to the rdi slot */
667 CFI_ADJUST_CFA_OFFSET 8
674 .macro errorentry sym
680 addq $0x10,%rsp /* rsp points to the error code */
681 CFI_ADJUST_CFA_OFFSET -0x10
683 CFI_ADJUST_CFA_OFFSET 8
691 /* error code is on the stack already */
692 /* handle NMI like exceptions that can happen everywhere */
693 .macro paranoidentry sym, ist=0, irqtrace=1
696 addq $0x10,%rsp /* skip rcx and r11 */
701 movl $MSR_GS_BASE,%ecx
710 movq %gs:pda_data_offset, %rbp
713 movq ORIG_RAX(%rsp),%rsi
714 movq $-1,ORIG_RAX(%rsp)
716 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
720 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
723 XEN_BLOCK_EVENTS(%rsi)
730 * "Paranoid" exit path from exception stack.
731 * Paranoid because this is used by NMIs and cannot take
732 * any kernel state for granted.
733 * We don't do kernel preemption checks here, because only
734 * NMI should be common and it does not enable IRQs and
735 * cannot get reschedule ticks.
737 * "trace" is 0 for the NMI handler only, because irq-tracing
738 * is fundamentally NMI-unsafe. (we cannot change the soft and
739 * hard flags at once, atomically)
741 .macro paranoidexit trace=1
742 /* ebx: no swapgs flag */
744 testl %ebx,%ebx /* swapgs needed? */
745 jnz paranoid_restore\trace
747 jnz paranoid_userspace\trace
748 paranoid_swapgs\trace:
753 paranoid_restore\trace:
756 paranoid_userspace\trace:
757 GET_THREAD_INFO(%rcx)
758 movl threadinfo_flags(%rcx),%ebx
759 andl $_TIF_WORK_MASK,%ebx
760 jz paranoid_swapgs\trace
761 movq %rsp,%rdi /* &pt_regs */
763 movq %rax,%rsp /* switch stack for scheduling */
764 testl $_TIF_NEED_RESCHED,%ebx
765 jnz paranoid_schedule\trace
766 movl %ebx,%edx /* arg3: thread flags */
771 xorl %esi,%esi /* arg2: oldset */
772 movq %rsp,%rdi /* arg1: &pt_regs */
773 call do_notify_resume
778 jmp paranoid_userspace\trace
779 paranoid_schedule\trace:
789 jmp paranoid_userspace\trace
795 * Exception entry point. This expects an error code/orig_rax on the stack
796 * and the exception handler in %rax.
798 KPROBE_ENTRY(error_entry)
801 /* rdi slot contains rax, oldrax contains error code */
804 CFI_ADJUST_CFA_OFFSET (14*8)
806 CFI_REL_OFFSET rsi,RSI
807 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
810 CFI_REL_OFFSET rdx,RDX
812 CFI_REL_OFFSET rcx,RCX
813 movq %rsi,10*8(%rsp) /* store rax */
814 CFI_REL_OFFSET rax,RAX
820 CFI_REL_OFFSET r10,R10
822 CFI_REL_OFFSET r11,R11
824 CFI_REL_OFFSET rbx,RBX
826 CFI_REL_OFFSET rbp,RBP
828 CFI_REL_OFFSET r12,R12
830 CFI_REL_OFFSET r13,R13
832 CFI_REL_OFFSET r14,R14
834 CFI_REL_OFFSET r15,R15
836 cmpl $__KERNEL_CS,CS(%rsp)
842 CFI_REL_OFFSET rdi,RDI
844 movq ORIG_RAX(%rsp),%rsi # get error code
845 movq $-1,ORIG_RAX(%rsp)
850 XEN_BLOCK_EVENTS(%rsi)
852 GET_THREAD_INFO(%rcx)
853 testb $3,CS-ARGOFFSET(%rsp)
856 movl threadinfo_flags(%rcx),%edx
857 movl $_TIF_WORK_MASK,%edi
860 jmp retint_restore_args
864 * We need to re-write the logic here because we don't do iretq to
865 * to return to user mode. It's still possible that we get trap/fault
866 * in the kernel (when accessing buffers pointed to by system calls,
873 /* There are two places in the kernel that can potentially fault with
874 usergs. Handle them here. The exception handlers after
875 iret run with kernel gs again, so don't set the user space flag.
876 B stepping K8s sometimes report an truncated RIP for IRET
877 exceptions returning to compat mode. Check for these here too. */
878 leaq iret_label(%rip),%rbp
881 movl %ebp,%ebp /* zero extend */
884 cmpq $gs_change,RIP(%rsp)
889 KPROBE_END(error_entry)
891 ENTRY(hypervisor_callback)
892 zeroentry do_hypervisor_callback
893 END(hypervisor_callback)
896 * Copied from arch/xen/i386/kernel/entry.S
898 # A note on the "critical region" in our callback handler.
899 # We want to avoid stacking callback handlers due to events occurring
900 # during handling of the last event. To do this, we keep events disabled
901 # until we've done all processing. HOWEVER, we must enable events before
902 # popping the stack frame (can't be done atomically) and so it would still
903 # be possible to get enough handler activations to overflow the stack.
904 # Although unlikely, bugs of that kind are hard to track down, so we'd
905 # like to avoid the possibility.
906 # So, on entry to the handler we detect whether we interrupted an
907 # existing activation in its critical region -- if so, we pop the current
908 # activation and restart the handler using the previous one.
909 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
911 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
912 # see the correct pointer to the pt_regs
913 movq %rdi, %rsp # we don't return, adjust the stack frame
916 11: incl %gs:pda_irqcount
918 CFI_DEF_CFA_REGISTER rbp
919 cmovzq %gs:pda_irqstackptr,%rsp
920 pushq %rbp # backlink for old unwinder
921 call evtchn_do_upcall
923 CFI_DEF_CFA_REGISTER rsp
924 decl %gs:pda_irqcount
927 END(do_hypervisor_callback)
930 restore_all_enable_events:
931 CFI_DEFAULT_STACK adj=1
933 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
935 scrit: /**** START OF CRITICAL REGION ****/
936 XEN_TEST_PENDING(%rsi)
938 jnz 14f # process more events if necessary...
939 XEN_PUT_VCPU_INFO(%rsi)
944 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
945 XEN_PUT_VCPU_INFO(%rsi)
947 movq %rsp,%rdi # set the argument again
950 ecrit: /**** END OF CRITICAL REGION ****/
951 # At this point, unlike on x86-32, we don't do the fixup to simplify the
952 # code and the stack frame is more complex on x86-64.
953 # When the kernel is interrupted in the critical section, the kernel
954 # will do IRET in that case, and everything will be restored at that point,
955 # i.e. it just resumes from the next instruction interrupted with the same context.
957 # Hypervisor uses this for application faults while it executes.
958 # We get here for two reasons:
959 # 1. Fault while reloading DS, ES, FS or GS
960 # 2. Fault while executing IRET
961 # Category 1 we do not need to fix up as Xen has already reloaded all segment
962 # registers that could be reloaded and zeroed the others.
963 # Category 2 we fix up by killing the current process. We cannot use the
964 # normal Linux return path in this case because if we use the IRET hypercall
965 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
966 # We distinguish between categories by comparing each saved segment register
967 # with its current contents: any discrepancy means we in category 1.
968 ENTRY(failsafe_callback)
970 CFI_REL_OFFSET rcx, 0
971 CFI_REL_OFFSET r11, 8
985 /* All segments match their saved values => Category 2 (Bad IRET). */
991 CFI_ADJUST_CFA_OFFSET -0x30
992 movq $11,%rdi /* SIGSEGV */
995 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1001 CFI_ADJUST_CFA_OFFSET -0x30
1003 CFI_ADJUST_CFA_OFFSET 8
1008 .section __ex_table,"a"
1010 .quad gs_change,bad_gs
1012 .section .fixup,"ax"
1013 /* running with kernelgs */
1015 /* swapgs */ /* switch back to user gs */
1023 * Create a kernel thread.
1025 * C extern interface:
1026 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1028 * asm input arguments:
1029 * rdi: fn, rsi: arg, rdx: flags
1031 ENTRY(kernel_thread)
1033 FAKE_STACK_FRAME $child_rip
1036 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1038 orq kernel_thread_flags(%rip),%rdi
1051 * It isn't worth to check for reschedule here,
1052 * so internally to the x86_64 port you can rely on kernel_thread()
1053 * not to reschedule the child before returning, this avoids the need
1054 * of hacks for example to fork off the per-CPU idle tasks.
1055 * [Hopefully no generic code relies on the reschedule -AK]
1061 ENDPROC(kernel_thread)
1064 pushq $0 # fake return address
1067 * Here we are in the child and the registers are set as they were
1068 * at kernel_thread() invocation in the parent.
1080 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1082 * C extern interface:
1083 * extern long execve(char *name, char **argv, char **envp)
1085 * asm input arguments:
1086 * rdi: name, rsi: argv, rdx: envp
1088 * We want to fallback into:
1089 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1091 * do_sys_execve asm fallback arguments:
1092 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1094 ENTRY(kernel_execve)
1099 movq %rax, RAX(%rsp)
1103 jmp int_ret_from_sys_call
1108 ENDPROC(kernel_execve)
1110 KPROBE_ENTRY(page_fault)
1111 errorentry do_page_fault
1112 KPROBE_END(page_fault)
1114 ENTRY(coprocessor_error)
1115 zeroentry do_coprocessor_error
1116 END(coprocessor_error)
1118 ENTRY(simd_coprocessor_error)
1119 zeroentry do_simd_coprocessor_error
1120 END(simd_coprocessor_error)
1122 ENTRY(device_not_available)
1123 zeroentry math_state_restore
1124 END(device_not_available)
1126 /* runs on exception stack */
1130 CFI_ADJUST_CFA_OFFSET 8 */
1137 zeroentry do_nmi_callback
1145 orl $NMI_MASK,EFLAGS(%rsp)
1147 XEN_BLOCK_EVENTS(%rsi)
1149 GET_THREAD_INFO(%rcx)
1150 jmp retint_restore_args
1152 END(do_nmi_callback)
1157 CFI_ADJUST_CFA_OFFSET 8 */
1159 /* jmp paranoid_exit1
1164 zeroentry do_overflow
1172 zeroentry do_invalid_op
1175 ENTRY(coprocessor_segment_overrun)
1176 zeroentry do_coprocessor_segment_overrun
1177 END(coprocessor_segment_overrun)
1180 zeroentry do_reserved
1184 /* runs on exception stack */
1187 paranoidentry do_double_fault
1194 errorentry do_invalid_TSS
1197 ENTRY(segment_not_present)
1198 errorentry do_segment_not_present
1199 END(segment_not_present)
1201 /* runs on exception stack */
1202 ENTRY(stack_segment)
1204 paranoidentry do_stack_segment */
1205 errorentry do_stack_segment
1206 /* jmp paranoid_exit1
1210 KPROBE_ENTRY(general_protection)
1211 errorentry do_general_protection
1212 KPROBE_END(general_protection)
1214 ENTRY(alignment_check)
1215 errorentry do_alignment_check
1216 END(alignment_check)
1219 zeroentry do_divide_error
1222 ENTRY(spurious_interrupt_bug)
1223 zeroentry do_spurious_interrupt_bug
1224 END(spurious_interrupt_bug)
1226 #ifdef CONFIG_X86_MCE
1227 /* runs on exception stack */
1228 ENTRY(machine_check)
1231 CFI_ADJUST_CFA_OFFSET 8
1232 paranoidentry do_machine_check
1238 /* Call softirq on interrupt stack. Interrupts are off. */
1242 CFI_ADJUST_CFA_OFFSET 8
1243 CFI_REL_OFFSET rbp,0
1245 CFI_DEF_CFA_REGISTER rbp
1246 incl %gs:pda_irqcount
1247 cmove %gs:pda_irqstackptr,%rsp
1248 push %rbp # backlink for old unwinder
1251 CFI_DEF_CFA_REGISTER rsp
1252 CFI_ADJUST_CFA_OFFSET -8
1253 decl %gs:pda_irqcount
1256 ENDPROC(call_softirq)
1258 KPROBE_ENTRY(ignore_sysret)
1263 ENDPROC(ignore_sysret)
1265 #ifdef CONFIG_STACK_UNWIND
1266 ENTRY(arch_unwind_init_running)
1268 movq %r15, R15(%rdi)
1269 movq %r14, R14(%rdi)
1271 movq %r13, R13(%rdi)
1272 movq %r12, R12(%rdi)
1274 movq %rbp, RBP(%rdi)
1275 movq %rbx, RBX(%rdi)
1277 movq %rax, R11(%rdi)
1278 movq %rax, R10(%rdi)
1281 movq %rax, RAX(%rdi)
1282 movq %rax, RCX(%rdi)
1283 movq %rax, RDX(%rdi)
1284 movq %rax, RSI(%rdi)
1285 movq %rax, RDI(%rdi)
1286 movq %rax, ORIG_RAX(%rdi)
1287 movq %rcx, RIP(%rdi)
1289 movq $__KERNEL_CS, CS(%rdi)
1290 movq %rax, EFLAGS(%rdi)
1291 movq %rcx, RSP(%rdi)
1292 movq $__KERNEL_DS, SS(%rdi)
1295 ENDPROC(arch_unwind_init_running)