- patches.rt/0001-sched-count-of-queued-RT-tasks.patch: Delete.
[linux-flexiantxendom0-3.2.10.git] / arch / x86 / kernel / process_64-xen.c
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *      Gareth Hughes <gareth@valinux.com>, May 2000
6  * 
7  *  X86-64 port
8  *      Andi Kleen.
9  *
10  *      CPU hotplug support - ashok.raj@intel.com
11  * 
12  *  Jun Nakajima <jun.nakajima@intel.com> 
13  *     Modified for Xen
14  */
15
16 /*
17  * This file handles the architecture-dependent parts of process handling..
18  */
19
20 #include <stdarg.h>
21
22 #include <linux/cpu.h>
23 #include <linux/errno.h>
24 #include <linux/sched.h>
25 #include <linux/kernel.h>
26 #include <linux/mm.h>
27 #include <linux/fs.h>
28 #include <linux/elfcore.h>
29 #include <linux/smp.h>
30 #include <linux/slab.h>
31 #include <linux/user.h>
32 #include <linux/module.h>
33 #include <linux/a.out.h>
34 #include <linux/interrupt.h>
35 #include <linux/delay.h>
36 #include <linux/ptrace.h>
37 #include <linux/utsname.h>
38 #include <linux/random.h>
39 #include <linux/notifier.h>
40 #include <linux/kprobes.h>
41 #include <linux/kdebug.h>
42 #include <linux/tick.h>
43
44 #include <asm/uaccess.h>
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/io.h>
48 #include <asm/processor.h>
49 #include <asm/i387.h>
50 #include <asm/mmu_context.h>
51 #include <asm/pda.h>
52 #include <asm/prctl.h>
53 #include <xen/interface/platform.h>
54 #include <xen/interface/physdev.h>
55 #include <xen/interface/vcpu.h>
56 #include <asm/desc.h>
57 #include <asm/proto.h>
58 #include <asm/hardirq.h>
59 #include <asm/ia32.h>
60 #include <asm/idle.h>
61
62 #include <xen/cpu_hotplug.h>
63
64 asmlinkage extern void ret_from_fork(void);
65
66 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
67
68 unsigned long boot_option_idle_override = 0;
69 EXPORT_SYMBOL(boot_option_idle_override);
70
71 /*
72  * Powermanagement idle function, if any..
73  */
74 void (*pm_idle)(void);
75 EXPORT_SYMBOL(pm_idle);
76 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
77
78 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
79
80 void idle_notifier_register(struct notifier_block *n)
81 {
82         atomic_notifier_chain_register(&idle_notifier, n);
83 }
84 EXPORT_SYMBOL_GPL(idle_notifier_register);
85
86 void idle_notifier_unregister(struct notifier_block *n)
87 {
88         atomic_notifier_chain_unregister(&idle_notifier, n);
89 }
90 EXPORT_SYMBOL(idle_notifier_unregister);
91
92 void enter_idle(void)
93 {
94         write_pda(isidle, 1);
95         atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
96 }
97
98 static void __exit_idle(void)
99 {
100         if (test_and_clear_bit_pda(0, isidle) == 0)
101                 return;
102         atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
103 }
104
105 /* Called from interrupts to signify idle end */
106 void exit_idle(void)
107 {
108         /* idle loop has pid 0 */
109         if (current->pid)
110                 return;
111         __exit_idle();
112 }
113
114 /*
115  * On SMP it's slightly faster (but much more power-consuming!)
116  * to poll the ->need_resched flag instead of waiting for the
117  * cross-CPU IPI to arrive. Use this option with caution.
118  */
119 static void poll_idle (void)
120 {
121         local_irq_enable();
122         cpu_relax();
123 }
124
125 static void xen_idle(void)
126 {
127         current_thread_info()->status &= ~TS_POLLING;
128         /*
129          * TS_POLLING-cleared state must be visible before we
130          * test NEED_RESCHED:
131          */
132         smp_mb();
133         local_irq_disable();
134         if (!need_resched())
135                 safe_halt();
136         else
137                 local_irq_enable();
138         current_thread_info()->status |= TS_POLLING;
139 }
140
141 #ifdef CONFIG_HOTPLUG_CPU
142 static inline void play_dead(void)
143 {
144         idle_task_exit();
145         local_irq_disable();
146         cpu_clear(smp_processor_id(), cpu_initialized);
147         preempt_enable_no_resched();
148         VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
149         cpu_bringup();
150 }
151 #else
152 static inline void play_dead(void)
153 {
154         BUG();
155 }
156 #endif /* CONFIG_HOTPLUG_CPU */
157
158 /*
159  * The idle thread. There's no useful work to be
160  * done, so just try to conserve power and have a
161  * low exit latency (ie sit in a loop waiting for
162  * somebody to say that they'd like to reschedule)
163  */
164 void cpu_idle (void)
165 {
166         current_thread_info()->status |= TS_POLLING;
167         /* endless idle loop with no priority at all */
168         while (1) {
169                 while (!need_resched()) {
170                         void (*idle)(void);
171
172                         if (__get_cpu_var(cpu_idle_state))
173                                 __get_cpu_var(cpu_idle_state) = 0;
174
175                         tick_nohz_stop_sched_tick();
176
177                         rmb();
178                         idle = xen_idle; /* no alternatives */
179                         if (cpu_is_offline(smp_processor_id()))
180                                 play_dead();
181                         /*
182                          * Idle routines should keep interrupts disabled
183                          * from here on, until they go to idle.
184                          * Otherwise, idle callbacks can misfire.
185                          */
186                         local_irq_disable();
187                         enter_idle();
188                         idle();
189                         /* In many cases the interrupt that ended idle
190                            has already called exit_idle. But some idle
191                            loops can be woken up without interrupt. */
192                         __exit_idle();
193                 }
194
195                 tick_nohz_restart_sched_tick();
196                 preempt_enable_no_resched();
197                 schedule();
198                 preempt_disable();
199         }
200 }
201
202 static void do_nothing(void *unused)
203 {
204 }
205
206 void cpu_idle_wait(void)
207 {
208         unsigned int cpu, this_cpu = get_cpu();
209         cpumask_t map, tmp = current->cpus_allowed;
210
211         set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
212         put_cpu();
213
214         cpus_clear(map);
215         for_each_online_cpu(cpu) {
216                 per_cpu(cpu_idle_state, cpu) = 1;
217                 cpu_set(cpu, map);
218         }
219
220         __get_cpu_var(cpu_idle_state) = 0;
221
222         wmb();
223         do {
224                 ssleep(1);
225                 for_each_online_cpu(cpu) {
226                         if (cpu_isset(cpu, map) &&
227                                         !per_cpu(cpu_idle_state, cpu))
228                                 cpu_clear(cpu, map);
229                 }
230                 cpus_and(map, map, cpu_online_map);
231                 /*
232                  * We waited 1 sec, if a CPU still did not call idle
233                  * it may be because it is in idle and not waking up
234                  * because it has nothing to do.
235                  * Give all the remaining CPUS a kick.
236                  */
237                 smp_call_function_mask(map, do_nothing, 0, 0);
238         } while (!cpus_empty(map));
239
240         set_cpus_allowed(current, tmp);
241 }
242 EXPORT_SYMBOL_GPL(cpu_idle_wait);
243
244 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 
245 {
246 }
247
248 static int __init idle_setup (char *str)
249 {
250         if (!strcmp(str, "poll")) {
251                 printk("using polling idle threads.\n");
252                 pm_idle = poll_idle;
253         } else if (!strcmp(str, "mwait"))
254                 force_mwait = 1;
255         else
256                 return -1;
257
258         boot_option_idle_override = 1;
259         return 0;
260 }
261 early_param("idle", idle_setup);
262
263 /* Prints also some state that isn't saved in the pt_regs */ 
264 void __show_regs(struct pt_regs * regs)
265 {
266         unsigned long fs, gs, shadowgs;
267         unsigned long d0, d1, d2, d3, d6, d7;
268         unsigned int fsindex,gsindex;
269         unsigned int ds,cs,es; 
270
271         printk("\n");
272         print_modules();
273         printk("Pid: %d, comm: %.20s %s %s %.*s\n",
274                 current->pid, current->comm, print_tainted(),
275                 init_utsname()->release,
276                 (int)strcspn(init_utsname()->version, " "),
277                 init_utsname()->version);
278         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
279         printk_address(regs->rip); 
280         printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
281                 regs->eflags);
282         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
283                regs->rax, regs->rbx, regs->rcx);
284         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
285                regs->rdx, regs->rsi, regs->rdi); 
286         printk("RBP: %016lx R08: %016lx R09: %016lx\n",
287                regs->rbp, regs->r8, regs->r9); 
288         printk("R10: %016lx R11: %016lx R12: %016lx\n",
289                regs->r10, regs->r11, regs->r12); 
290         printk("R13: %016lx R14: %016lx R15: %016lx\n",
291                regs->r13, regs->r14, regs->r15); 
292
293         asm("mov %%ds,%0" : "=r" (ds)); 
294         asm("mov %%cs,%0" : "=r" (cs)); 
295         asm("mov %%es,%0" : "=r" (es)); 
296         asm("mov %%fs,%0" : "=r" (fsindex));
297         asm("mov %%gs,%0" : "=r" (gsindex));
298
299         rdmsrl(MSR_FS_BASE, fs);
300         rdmsrl(MSR_GS_BASE, gs); 
301         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
302
303         printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
304                fs,fsindex,gs,gsindex,shadowgs); 
305         printk("CS:  %04x DS: %04x ES: %04x\n", cs, ds, es); 
306
307         get_debugreg(d0, 0);
308         get_debugreg(d1, 1);
309         get_debugreg(d2, 2);
310         printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
311         get_debugreg(d3, 3);
312         get_debugreg(d6, 6);
313         get_debugreg(d7, 7);
314         printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
315 }
316
317 void show_regs(struct pt_regs *regs)
318 {
319         printk("CPU %d:", smp_processor_id());
320         __show_regs(regs);
321         show_trace(NULL, regs, (void *)(regs + 1));
322 }
323
324 /*
325  * Free current thread data structures etc..
326  */
327 void exit_thread(void)
328 {
329         struct task_struct *me = current;
330         struct thread_struct *t = &me->thread;
331
332         if (me->thread.io_bitmap_ptr) { 
333 #ifndef CONFIG_X86_NO_TSS
334                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
335 #endif
336 #ifdef CONFIG_XEN
337                 struct physdev_set_iobitmap iobmp_op;
338                 memset(&iobmp_op, 0, sizeof(iobmp_op));
339 #endif
340
341                 kfree(t->io_bitmap_ptr);
342                 t->io_bitmap_ptr = NULL;
343                 clear_thread_flag(TIF_IO_BITMAP);
344                 /*
345                  * Careful, clear this in the TSS too:
346                  */
347 #ifndef CONFIG_X86_NO_TSS
348                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
349                 put_cpu();
350 #endif
351 #ifdef CONFIG_XEN
352                 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
353                                               &iobmp_op));
354 #endif
355                 t->io_bitmap_max = 0;
356         }
357 }
358
359 void load_gs_index(unsigned gs)
360 {
361         WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
362 }
363
364 void flush_thread(void)
365 {
366         struct task_struct *tsk = current;
367
368         if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
369                 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
370                 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
371                         clear_tsk_thread_flag(tsk, TIF_IA32);
372                 } else {
373                         set_tsk_thread_flag(tsk, TIF_IA32);
374                         current_thread_info()->status |= TS_COMPAT;
375                 }
376         }
377         clear_tsk_thread_flag(tsk, TIF_DEBUG);
378
379         tsk->thread.debugreg0 = 0;
380         tsk->thread.debugreg1 = 0;
381         tsk->thread.debugreg2 = 0;
382         tsk->thread.debugreg3 = 0;
383         tsk->thread.debugreg6 = 0;
384         tsk->thread.debugreg7 = 0;
385         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
386         /*
387          * Forget coprocessor state..
388          */
389         clear_fpu(tsk);
390         clear_used_math();
391 }
392
393 void release_thread(struct task_struct *dead_task)
394 {
395         if (dead_task->mm) {
396                 if (dead_task->mm->context.size) {
397                         printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
398                                         dead_task->comm,
399                                         dead_task->mm->context.ldt,
400                                         dead_task->mm->context.size);
401                         BUG();
402                 }
403         }
404 }
405
406 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
407 {
408         struct user_desc ud = { 
409                 .base_addr = addr,
410                 .limit = 0xfffff,
411                 .seg_32bit = 1,
412                 .limit_in_pages = 1,
413                 .useable = 1,
414         };
415         struct n_desc_struct *desc = (void *)t->thread.tls_array;
416         desc += tls;
417         desc->a = LDT_entry_a(&ud); 
418         desc->b = LDT_entry_b(&ud); 
419 }
420
421 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
422 {
423         struct desc_struct *desc = (void *)t->thread.tls_array;
424         desc += tls;
425         return desc->base0 | 
426                 (((u32)desc->base1) << 16) | 
427                 (((u32)desc->base2) << 24);
428 }
429
430 /*
431  * This gets called before we allocate a new thread and copy
432  * the current task into it.
433  */
434 void prepare_to_copy(struct task_struct *tsk)
435 {
436         unlazy_fpu(tsk);
437 }
438
439 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
440                 unsigned long unused,
441         struct task_struct * p, struct pt_regs * regs)
442 {
443         int err;
444         struct pt_regs * childregs;
445         struct task_struct *me = current;
446
447         childregs = ((struct pt_regs *)
448                         (THREAD_SIZE + task_stack_page(p))) - 1;
449         *childregs = *regs;
450
451         childregs->rax = 0;
452         childregs->rsp = rsp;
453         if (rsp == ~0UL)
454                 childregs->rsp = (unsigned long)childregs;
455
456         p->thread.rsp = (unsigned long) childregs;
457         p->thread.rsp0 = (unsigned long) (childregs+1);
458         p->thread.userrsp = me->thread.userrsp; 
459
460         set_tsk_thread_flag(p, TIF_FORK);
461
462         p->thread.fs = me->thread.fs;
463         p->thread.gs = me->thread.gs;
464
465         asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
466         asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
467         asm("mov %%es,%0" : "=m" (p->thread.es));
468         asm("mov %%ds,%0" : "=m" (p->thread.ds));
469
470         if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
471                 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
472                 if (!p->thread.io_bitmap_ptr) {
473                         p->thread.io_bitmap_max = 0;
474                         return -ENOMEM;
475                 }
476                 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
477                                 IO_BITMAP_BYTES);
478                 set_tsk_thread_flag(p, TIF_IO_BITMAP);
479         } 
480
481         /*
482          * Set a new TLS for the child thread?
483          */
484         if (clone_flags & CLONE_SETTLS) {
485 #ifdef CONFIG_IA32_EMULATION
486                 if (test_thread_flag(TIF_IA32))
487                         err = ia32_child_tls(p, childregs); 
488                 else                    
489 #endif   
490                         err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
491                 if (err) 
492                         goto out;
493         }
494         p->thread.iopl = current->thread.iopl;
495
496         err = 0;
497 out:
498         if (err && p->thread.io_bitmap_ptr) {
499                 kfree(p->thread.io_bitmap_ptr);
500                 p->thread.io_bitmap_max = 0;
501         }
502         return err;
503 }
504
505 /*
506  * This special macro can be used to load a debugging register
507  */
508 #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
509
510 static inline void __switch_to_xtra(struct task_struct *prev_p,
511                                     struct task_struct *next_p)
512 {
513         struct thread_struct *prev, *next;
514
515         prev = &prev_p->thread,
516         next = &next_p->thread;
517
518         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
519                 loaddebug(next, 0);
520                 loaddebug(next, 1);
521                 loaddebug(next, 2);
522                 loaddebug(next, 3);
523                 /* no 4 and 5 */
524                 loaddebug(next, 6);
525                 loaddebug(next, 7);
526         }
527 }
528
529 /*
530  *      switch_to(x,y) should switch tasks from x to y.
531  *
532  * This could still be optimized: 
533  * - fold all the options into a flag word and test it with a single test.
534  * - could test fs/gs bitsliced
535  *
536  * Kprobes not supported here. Set the probe on schedule instead.
537  */
538 struct task_struct *
539 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
540 {
541         struct thread_struct *prev = &prev_p->thread,
542                                  *next = &next_p->thread;
543         int cpu = smp_processor_id();  
544 #ifndef CONFIG_X86_NO_TSS
545         struct tss_struct *tss = &per_cpu(init_tss, cpu);
546 #endif
547         struct physdev_set_iopl iopl_op;
548         struct physdev_set_iobitmap iobmp_op;
549         multicall_entry_t _mcl[8], *mcl = _mcl;
550
551         /* we're going to use this soon, after a few expensive things */
552         if (next_p->fpu_counter>5)
553                 prefetch(&next->i387.fxsave);
554
555         /*
556          * This is basically '__unlazy_fpu', except that we queue a
557          * multicall to indicate FPU task switch, rather than
558          * synchronously trapping to Xen.
559          * The AMD workaround requires it to be after DS reload, or
560          * after DS has been cleared, which we do in __prepare_arch_switch.
561          */
562         if (task_thread_info(prev_p)->status & TS_USEDFPU) {
563                 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
564                 mcl->op      = __HYPERVISOR_fpu_taskswitch;
565                 mcl->args[0] = 1;
566                 mcl++;
567         } else
568                 prev_p->fpu_counter = 0;
569
570         /*
571          * Reload esp0, LDT and the page table pointer:
572          */
573         mcl->op      = __HYPERVISOR_stack_switch;
574         mcl->args[0] = __KERNEL_DS;
575         mcl->args[1] = next->rsp0;
576         mcl++;
577
578         /*
579          * Load the per-thread Thread-Local Storage descriptor.
580          * This is load_TLS(next, cpu) with multicalls.
581          */
582 #define C(i) do {                                                       \
583         if (unlikely(next->tls_array[i] != prev->tls_array[i])) {       \
584                 mcl->op      = __HYPERVISOR_update_descriptor;          \
585                 mcl->args[0] = virt_to_machine(                         \
586                         &cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]);          \
587                 mcl->args[1] = next->tls_array[i];                      \
588                 mcl++;                                                  \
589         }                                                               \
590 } while (0)
591         C(0); C(1); C(2);
592 #undef C
593
594         if (unlikely(prev->iopl != next->iopl)) {
595                 iopl_op.iopl = (next->iopl == 0) ? 1 : next->iopl;
596                 mcl->op      = __HYPERVISOR_physdev_op;
597                 mcl->args[0] = PHYSDEVOP_set_iopl;
598                 mcl->args[1] = (unsigned long)&iopl_op;
599                 mcl++;
600         }
601
602         if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
603                 set_xen_guest_handle(iobmp_op.bitmap,
604                                      (char *)next->io_bitmap_ptr);
605                 iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
606                 mcl->op      = __HYPERVISOR_physdev_op;
607                 mcl->args[0] = PHYSDEVOP_set_iobitmap;
608                 mcl->args[1] = (unsigned long)&iobmp_op;
609                 mcl++;
610         }
611
612         BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
613         if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
614                 BUG();
615
616         /* 
617          * Switch DS and ES.
618          * This won't pick up thread selector changes, but I guess that is ok.
619          */
620         if (unlikely(next->es))
621                 loadsegment(es, next->es); 
622         
623         if (unlikely(next->ds))
624                 loadsegment(ds, next->ds);
625
626         /* 
627          * Switch FS and GS.
628          */
629         if (unlikely(next->fsindex))
630                 loadsegment(fs, next->fsindex);
631
632         if (next->fs)
633                 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs));
634         
635         if (unlikely(next->gsindex))
636                 load_gs_index(next->gsindex);
637
638         if (next->gs)
639                 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs));
640
641         /* 
642          * Switch the PDA context.
643          */
644         prev->userrsp = read_pda(oldrsp); 
645         write_pda(oldrsp, next->userrsp); 
646         write_pda(pcurrent, next_p); 
647         write_pda(kernelstack,
648         (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
649 #ifdef CONFIG_CC_STACKPROTECTOR
650         write_pda(stack_canary, next_p->stack_canary);
651
652         /*
653          * Build time only check to make sure the stack_canary is at
654          * offset 40 in the pda; this is a gcc ABI requirement
655          */
656         BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
657 #endif
658
659         /*
660          * Now maybe reload the debug registers
661          */
662         if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
663                 __switch_to_xtra(prev_p, next_p);
664
665         /* If the task has used fpu the last 5 timeslices, just do a full
666          * restore of the math state immediately to avoid the trap; the
667          * chances of needing FPU soon are obviously high now
668          */
669         if (next_p->fpu_counter>5)
670                 math_state_restore();
671         return prev_p;
672 }
673
674 /*
675  * sys_execve() executes a new program.
676  */
677 asmlinkage 
678 long sys_execve(char __user *name, char __user * __user *argv,
679                 char __user * __user *envp, struct pt_regs regs)
680 {
681         long error;
682         char * filename;
683
684         filename = getname(name);
685         error = PTR_ERR(filename);
686         if (IS_ERR(filename)) 
687                 return error;
688         error = do_execve(filename, argv, envp, &regs); 
689         if (error == 0) {
690                 task_lock(current);
691                 current->ptrace &= ~PT_DTRACE;
692                 task_unlock(current);
693         }
694         putname(filename);
695         return error;
696 }
697
698 void set_personality_64bit(void)
699 {
700         /* inherit personality from parent */
701
702         /* Make sure to be in 64bit mode */
703         clear_thread_flag(TIF_IA32); 
704
705         /* TBD: overwrites user setup. Should have two bits.
706            But 64bit processes have always behaved this way,
707            so it's not too bad. The main problem is just that
708            32bit childs are affected again. */
709         current->personality &= ~READ_IMPLIES_EXEC;
710 }
711
712 asmlinkage long sys_fork(struct pt_regs *regs)
713 {
714         return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
715 }
716
717 asmlinkage long
718 sys_clone(unsigned long clone_flags, unsigned long newsp,
719           void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
720 {
721         if (!newsp)
722                 newsp = regs->rsp;
723         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
724 }
725
726 /*
727  * This is trivial, and on the face of it looks like it
728  * could equally well be done in user mode.
729  *
730  * Not so, for quite unobvious reasons - register pressure.
731  * In user mode vfork() cannot have a stack frame, and if
732  * done by calling the "clone()" system call directly, you
733  * do not have enough call-clobbered registers to hold all
734  * the information you need.
735  */
736 asmlinkage long sys_vfork(struct pt_regs *regs)
737 {
738         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
739                     NULL, NULL);
740 }
741
742 unsigned long get_wchan(struct task_struct *p)
743 {
744         unsigned long stack;
745         u64 fp,rip;
746         int count = 0;
747
748         if (!p || p == current || p->state==TASK_RUNNING)
749                 return 0; 
750         stack = (unsigned long)task_stack_page(p);
751         if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
752                 return 0;
753         fp = *(u64 *)(p->thread.rsp);
754         do { 
755                 if (fp < (unsigned long)stack ||
756                     fp > (unsigned long)stack+THREAD_SIZE)
757                         return 0; 
758                 rip = *(u64 *)(fp+8); 
759                 if (!in_sched_functions(rip))
760                         return rip; 
761                 fp = *(u64 *)fp; 
762         } while (count++ < 16); 
763         return 0;
764 }
765
766 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
767
768         int ret = 0; 
769         int doit = task == current;
770         int cpu;
771
772         switch (code) { 
773         case ARCH_SET_GS:
774                 if (addr >= TASK_SIZE_OF(task))
775                         return -EPERM; 
776                 cpu = get_cpu();
777                 /* handle small bases via the GDT because that's faster to 
778                    switch. */
779                 if (addr <= 0xffffffff) {  
780                         set_32bit_tls(task, GS_TLS, addr); 
781                         if (doit) { 
782                                 load_TLS(&task->thread, cpu);
783                                 load_gs_index(GS_TLS_SEL); 
784                         }
785                         task->thread.gsindex = GS_TLS_SEL; 
786                         task->thread.gs = 0;
787                 } else { 
788                         task->thread.gsindex = 0;
789                         task->thread.gs = addr;
790                         if (doit) {
791                                 load_gs_index(0);
792                                 ret = HYPERVISOR_set_segment_base(
793                                         SEGBASE_GS_USER, addr);
794                         } 
795                 }
796                 put_cpu();
797                 break;
798         case ARCH_SET_FS:
799                 /* Not strictly needed for fs, but do it for symmetry
800                    with gs */
801                 if (addr >= TASK_SIZE_OF(task))
802                         return -EPERM; 
803                 cpu = get_cpu();
804                 /* handle small bases via the GDT because that's faster to 
805                    switch. */
806                 if (addr <= 0xffffffff) { 
807                         set_32bit_tls(task, FS_TLS, addr);
808                         if (doit) { 
809                                 load_TLS(&task->thread, cpu); 
810                                 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
811                         }
812                         task->thread.fsindex = FS_TLS_SEL;
813                         task->thread.fs = 0;
814                 } else { 
815                         task->thread.fsindex = 0;
816                         task->thread.fs = addr;
817                         if (doit) {
818                                 /* set the selector to 0 to not confuse
819                                    __switch_to */
820                                 asm volatile("movl %0,%%fs" :: "r" (0));
821                                 ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
822                                                                   addr);
823                         }
824                 }
825                 put_cpu();
826                 break;
827         case ARCH_GET_FS: { 
828                 unsigned long base; 
829                 if (task->thread.fsindex == FS_TLS_SEL)
830                         base = read_32bit_tls(task, FS_TLS);
831                 else if (doit)
832                         rdmsrl(MSR_FS_BASE, base);
833                 else
834                         base = task->thread.fs;
835                 ret = put_user(base, (unsigned long __user *)addr); 
836                 break; 
837         }
838         case ARCH_GET_GS: { 
839                 unsigned long base;
840                 unsigned gsindex;
841                 if (task->thread.gsindex == GS_TLS_SEL)
842                         base = read_32bit_tls(task, GS_TLS);
843                 else if (doit) {
844                         asm("movl %%gs,%0" : "=r" (gsindex));
845                         if (gsindex)
846                                 rdmsrl(MSR_KERNEL_GS_BASE, base);
847                         else
848                                 base = task->thread.gs;
849                 }
850                 else
851                         base = task->thread.gs;
852                 ret = put_user(base, (unsigned long __user *)addr); 
853                 break;
854         }
855
856         default:
857                 ret = -EINVAL;
858                 break;
859         } 
860
861         return ret;     
862
863
864 long sys_arch_prctl(int code, unsigned long addr)
865 {
866         return do_arch_prctl(current, code, addr);
867
868
869 /* 
870  * Capture the user space registers if the task is not running (in user space)
871  */
872 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
873 {
874         struct pt_regs *pp, ptregs;
875
876         pp = task_pt_regs(tsk);
877
878         ptregs = *pp; 
879         ptregs.cs &= 0xffff;
880         ptregs.ss &= 0xffff;
881
882         elf_core_copy_regs(regs, &ptregs);
883  
884         boot_option_idle_override = 1;
885         return 1;
886 }
887
888 unsigned long arch_align_stack(unsigned long sp)
889 {
890         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
891                 sp -= get_random_int() % 8192;
892         return sp & ~0xf;
893 }
894
895 unsigned long arch_randomize_brk(struct mm_struct *mm)
896 {
897         unsigned long range_end = mm->brk + 0x02000000;
898         return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
899 }
900
901 #ifndef CONFIG_SMP
902 void _restore_vcpu(void)
903 {
904 }
905 #endif