2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
12 * Jun Nakajima <jun.nakajima@intel.com>
17 * This file handles the architecture-dependent parts of process handling..
20 #include <linux/cpu.h>
21 #include <linux/errno.h>
22 #include <linux/sched.h>
24 #include <linux/kernel.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/interrupt.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/notifier.h>
35 #include <linux/kprobes.h>
36 #include <linux/kdebug.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
40 #include <linux/ftrace.h>
42 #include <asm/pgtable.h>
43 #include <asm/processor.h>
45 #include <asm/fpu-internal.h>
46 #include <asm/mmu_context.h>
47 #include <asm/prctl.h>
48 #include <xen/interface/physdev.h>
50 #include <asm/proto.h>
51 #include <asm/hardirq.h>
54 #include <asm/syscalls.h>
55 #include <asm/debugreg.h>
56 #include <asm/switch_to.h>
58 asmlinkage extern void ret_from_fork(void);
60 /* Prints also some state that isn't saved in the pt_regs */
61 void __show_regs(struct pt_regs *regs, int all)
63 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
64 unsigned long d0, d1, d2, d3, d6, d7;
65 unsigned int fsindex, gsindex;
66 unsigned int ds, cs, es;
69 printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
70 printk_address(regs->ip, 1);
71 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
72 regs->sp, regs->flags);
73 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
74 regs->ax, regs->bx, regs->cx);
75 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
76 regs->dx, regs->si, regs->di);
77 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
78 regs->bp, regs->r8, regs->r9);
79 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
80 regs->r10, regs->r11, regs->r12);
81 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
82 regs->r13, regs->r14, regs->r15);
84 asm("movl %%ds,%0" : "=r" (ds));
85 asm("movl %%cs,%0" : "=r" (cs));
86 asm("movl %%es,%0" : "=r" (es));
87 asm("mov %%fs,%0" : "=r" (fsindex));
88 asm("mov %%gs,%0" : "=r" (gsindex));
90 rdmsrl(MSR_FS_BASE, fs);
91 rdmsrl(MSR_GS_BASE, gs);
92 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
102 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
103 fs, fsindex, gs, gsindex, shadowgs);
104 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
106 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
112 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
116 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
119 void xen_load_gs_index(unsigned gs)
121 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
123 EXPORT_SYMBOL(xen_load_gs_index);
125 void release_thread(struct task_struct *dead_task)
128 if (dead_task->mm->context.size) {
129 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
131 dead_task->mm->context.ldt,
132 dead_task->mm->context.size);
138 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
140 struct user_desc ud = {
147 struct desc_struct *desc = t->thread.tls_array;
152 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
154 return get_desc_base(&t->thread.tls_array[tls]);
158 * This gets called before we allocate a new thread and copy
159 * the current task into it.
161 void prepare_to_copy(struct task_struct *tsk)
166 int copy_thread(unsigned long clone_flags, unsigned long sp,
167 unsigned long unused,
168 struct task_struct *p, struct pt_regs *regs)
171 struct pt_regs *childregs;
172 struct task_struct *me = current;
174 childregs = ((struct pt_regs *)
175 (THREAD_SIZE + task_stack_page(p))) - 1;
182 childregs->sp = (unsigned long)childregs;
184 p->thread.sp = (unsigned long) childregs;
185 p->thread.sp0 = (unsigned long) (childregs+1);
187 set_tsk_thread_flag(p, TIF_FORK);
190 p->thread.io_bitmap_ptr = NULL;
192 savesegment(gs, p->thread.gsindex);
193 p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
194 savesegment(fs, p->thread.fsindex);
195 p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
196 savesegment(es, p->thread.es);
197 savesegment(ds, p->thread.ds);
200 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
202 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
203 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
204 IO_BITMAP_BYTES, GFP_KERNEL);
205 if (!p->thread.io_bitmap_ptr) {
206 p->thread.io_bitmap_max = 0;
209 set_tsk_thread_flag(p, TIF_IO_BITMAP);
213 * Set a new TLS for the child thread?
215 if (clone_flags & CLONE_SETTLS) {
216 #ifdef CONFIG_IA32_EMULATION
217 if (test_thread_flag(TIF_IA32))
218 err = do_set_thread_area(p, -1,
219 (struct user_desc __user *)childregs->si, 0);
222 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
226 p->thread.iopl = current->thread.iopl;
230 if (err && p->thread.io_bitmap_ptr) {
231 kfree(p->thread.io_bitmap_ptr);
232 p->thread.io_bitmap_max = 0;
239 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
240 unsigned long new_sp,
241 unsigned int _cs, unsigned int _ss, unsigned int _ds)
244 loadsegment(es, _ds);
245 loadsegment(ds, _ds);
251 regs->flags = X86_EFLAGS_IF;
253 * Free the old FP and other extended state
255 free_thread_xstate(current);
259 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
261 start_thread_common(regs, new_ip, new_sp,
262 __USER_CS, __USER_DS, 0);
265 #ifdef CONFIG_IA32_EMULATION
266 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
268 start_thread_common(regs, new_ip, new_sp,
269 test_thread_flag(TIF_X32)
270 ? __USER_CS : __USER32_CS,
271 __USER_DS, __USER_DS);
276 * switch_to(x,y) should switch tasks from x to y.
278 * This could still be optimized:
279 * - fold all the options into a flag word and test it with a single test.
280 * - could test fs/gs bitsliced
282 * Kprobes not supported here. Set the probe on schedule instead.
283 * Function graph tracer not supported too.
285 __notrace_funcgraph struct task_struct *
286 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
288 struct thread_struct *prev = &prev_p->thread;
289 struct thread_struct *next = &next_p->thread;
290 int cpu = smp_processor_id();
291 #ifndef CONFIG_X86_NO_TSS
292 struct tss_struct *tss = &per_cpu(init_tss, cpu);
295 #if CONFIG_XEN_COMPAT > 0x030002
296 struct physdev_set_iopl iopl_op;
297 struct physdev_set_iobitmap iobmp_op;
299 struct physdev_op _pdo[2], *pdo = _pdo;
300 #define iopl_op pdo->u.set_iopl
301 #define iobmp_op pdo->u.set_iobitmap
303 multicall_entry_t _mcl[8], *mcl = _mcl;
305 fpu = xen_switch_fpu_prepare(prev_p, next_p, cpu, &mcl);
309 * This is load_sp0(tss, next) with a multicall.
311 mcl->op = __HYPERVISOR_stack_switch;
312 mcl->args[0] = __KERNEL_DS;
313 mcl->args[1] = next->sp0;
317 * Load the per-thread Thread-Local Storage descriptor.
318 * This is load_TLS(next, cpu) with multicalls.
321 if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
322 next->tls_array[i].b != prev->tls_array[i].b)) { \
323 mcl->op = __HYPERVISOR_update_descriptor; \
324 mcl->args[0] = arbitrary_virt_to_machine( \
325 &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
326 mcl->args[1] = *(u64 *)&next->tls_array[i]; \
333 if (unlikely(prev->iopl != next->iopl)) {
334 iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3;
335 #if CONFIG_XEN_COMPAT > 0x030002
336 mcl->op = __HYPERVISOR_physdev_op;
337 mcl->args[0] = PHYSDEVOP_set_iopl;
338 mcl->args[1] = (unsigned long)&iopl_op;
340 mcl->op = __HYPERVISOR_physdev_op_compat;
341 pdo->cmd = PHYSDEVOP_set_iopl;
342 mcl->args[0] = (unsigned long)pdo++;
347 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
348 set_xen_guest_handle(iobmp_op.bitmap,
349 (char *)next->io_bitmap_ptr);
350 iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
351 #if CONFIG_XEN_COMPAT > 0x030002
352 mcl->op = __HYPERVISOR_physdev_op;
353 mcl->args[0] = PHYSDEVOP_set_iobitmap;
354 mcl->args[1] = (unsigned long)&iobmp_op;
356 mcl->op = __HYPERVISOR_physdev_op_compat;
357 pdo->cmd = PHYSDEVOP_set_iobitmap;
358 mcl->args[0] = (unsigned long)pdo++;
363 #if CONFIG_XEN_COMPAT <= 0x030002
364 BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo));
366 BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
367 if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
372 * This won't pick up thread selector changes, but I guess that is ok.
374 if (unlikely(next->es))
375 loadsegment(es, next->es);
377 if (unlikely(next->ds))
378 loadsegment(ds, next->ds);
381 * Leave lazy mode, flushing any hypercalls made here.
382 * This must be done before restoring TLS segments so
383 * the GDT and LDT are properly updated, and must be
384 * done before math_state_restore, so the TS bit is up
387 arch_end_context_switch(next_p);
392 * Segment register != 0 always requires a reload. Also
393 * reload when it has changed. When prev process used 64bit
394 * base always reload to avoid an information leak.
396 if (unlikely(next->fsindex))
397 loadsegment(fs, next->fsindex);
400 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs));
402 if (unlikely(next->gsindex))
403 load_gs_index(next->gsindex);
406 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs));
408 switch_fpu_finish(next_p, fpu);
411 * Switch the PDA context.
413 percpu_write(current_task, next_p);
415 percpu_write(kernel_stack,
416 (unsigned long)task_stack_page(next_p) +
417 THREAD_SIZE - KERNEL_STACK_OFFSET);
420 * Now maybe reload the debug registers
422 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
423 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
424 __switch_to_xtra(prev_p, next_p);
429 void set_personality_64bit(void)
431 /* inherit personality from parent */
433 /* Make sure to be in 64bit mode */
434 clear_thread_flag(TIF_IA32);
435 clear_thread_flag(TIF_ADDR32);
436 clear_thread_flag(TIF_X32);
438 /* Ensure the corresponding mm is not marked. */
440 current->mm->context.ia32_compat = 0;
442 /* TBD: overwrites user setup. Should have two bits.
443 But 64bit processes have always behaved this way,
444 so it's not too bad. The main problem is just that
445 32bit childs are affected again. */
446 current->personality &= ~READ_IMPLIES_EXEC;
449 void set_personality_ia32(bool x32)
451 /* inherit personality from parent */
453 /* Make sure to be in 32bit mode */
454 set_thread_flag(TIF_ADDR32);
456 /* Mark the associated mm as containing 32-bit tasks. */
458 current->mm->context.ia32_compat = 1;
461 clear_thread_flag(TIF_IA32);
462 set_thread_flag(TIF_X32);
463 current->personality &= ~READ_IMPLIES_EXEC;
464 /* is_compat_task() uses the presence of the x32
465 syscall bit flag to determine compat status */
466 current_thread_info()->status &= ~TS_COMPAT;
468 set_thread_flag(TIF_IA32);
469 clear_thread_flag(TIF_X32);
470 current->personality |= force_personality32;
471 /* Prepare the first "return" to user space */
472 current_thread_info()->status |= TS_COMPAT;
475 EXPORT_SYMBOL_GPL(set_personality_ia32);
477 unsigned long get_wchan(struct task_struct *p)
483 if (!p || p == current || p->state == TASK_RUNNING)
485 stack = (unsigned long)task_stack_page(p);
486 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
488 fp = *(u64 *)(p->thread.sp);
490 if (fp < (unsigned long)stack ||
491 fp >= (unsigned long)stack+THREAD_SIZE)
494 if (!in_sched_functions(ip))
497 } while (count++ < 16);
501 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
504 int doit = task == current;
509 if (addr >= TASK_SIZE_OF(task))
512 /* handle small bases via the GDT because that's faster to
514 if (addr <= 0xffffffff) {
515 set_32bit_tls(task, GS_TLS, addr);
517 load_TLS(&task->thread, cpu);
518 load_gs_index(GS_TLS_SEL);
520 task->thread.gsindex = GS_TLS_SEL;
523 task->thread.gsindex = 0;
524 task->thread.gs = addr;
527 ret = HYPERVISOR_set_segment_base(
528 SEGBASE_GS_USER, addr);
534 /* Not strictly needed for fs, but do it for symmetry
536 if (addr >= TASK_SIZE_OF(task))
539 /* handle small bases via the GDT because that's faster to
541 if (addr <= 0xffffffff) {
542 set_32bit_tls(task, FS_TLS, addr);
544 load_TLS(&task->thread, cpu);
545 loadsegment(fs, FS_TLS_SEL);
547 task->thread.fsindex = FS_TLS_SEL;
550 task->thread.fsindex = 0;
551 task->thread.fs = addr;
553 /* set the selector to 0 to not confuse
556 ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
564 if (task->thread.fsindex == FS_TLS_SEL)
565 base = read_32bit_tls(task, FS_TLS);
567 rdmsrl(MSR_FS_BASE, base);
569 base = task->thread.fs;
570 ret = put_user(base, (unsigned long __user *)addr);
576 if (task->thread.gsindex == GS_TLS_SEL)
577 base = read_32bit_tls(task, GS_TLS);
579 savesegment(gs, gsindex);
581 rdmsrl(MSR_KERNEL_GS_BASE, base);
583 base = task->thread.gs;
585 base = task->thread.gs;
586 ret = put_user(base, (unsigned long __user *)addr);
598 long sys_arch_prctl(int code, unsigned long addr)
600 return do_arch_prctl(current, code, addr);