commented early_printk patch because of rejects.
[linux-flexiantxendom0-3.2.10.git] / arch / ppc64 / kernel / process.c
1 /*
2  *  linux/arch/ppc64/kernel/process.c
3  *
4  *  Derived from "arch/i386/kernel/process.c"
5  *    Copyright (C) 1995  Linus Torvalds
6  *
7  *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
8  *  Paul Mackerras (paulus@cs.anu.edu.au)
9  *
10  *  PowerPC version 
11  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
12  *
13  *  This program is free software; you can redistribute it and/or
14  *  modify it under the terms of the GNU General Public License
15  *  as published by the Free Software Foundation; either version
16  *  2 of the License, or (at your option) any later version.
17  */
18
19 #include <linux/config.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/smp.h>
25 #include <linux/smp_lock.h>
26 #include <linux/stddef.h>
27 #include <linux/unistd.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/elf.h>
31 #include <linux/init.h>
32 #include <linux/init_task.h>
33 #include <linux/prctl.h>
34 #include <linux/ptrace.h>
35
36 #include <asm/pgtable.h>
37 #include <asm/uaccess.h>
38 #include <asm/system.h>
39 #include <asm/io.h>
40 #include <asm/processor.h>
41 #include <asm/mmu.h>
42 #include <asm/mmu_context.h>
43 #include <asm/prom.h>
44 #include <asm/ppcdebug.h>
45 #include <asm/machdep.h>
46 #include <asm/iSeries/HvCallHpt.h>
47 #include <asm/hardirq.h>
48 #include <asm/cputable.h>
49 #include <asm/sections.h>
50
51 struct task_struct *last_task_used_math = NULL;
52
53 struct mm_struct ioremap_mm = { pgd             : ioremap_dir  
54                                ,page_table_lock : SPIN_LOCK_UNLOCKED };
55
56 char *sysmap = NULL;
57 unsigned long sysmap_size = 0;
58
59 void
60 enable_kernel_fp(void)
61 {
62 #ifdef CONFIG_SMP
63         if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
64                 giveup_fpu(current);
65         else
66                 giveup_fpu(NULL);       /* just enables FP for kernel */
67 #else
68         giveup_fpu(last_task_used_math);
69 #endif /* CONFIG_SMP */
70 }
71
72 #ifdef CONFIG_SMP
73 static void smp_unlazy_onefpu(void *arg)
74 {
75         struct pt_regs *regs = current->thread.regs;
76
77         if (!regs)
78                 return;
79         if (regs->msr & MSR_FP)
80                 giveup_fpu(current);
81 }
82
83 void dump_smp_unlazy_fpu(void)
84 {
85         smp_call_function(smp_unlazy_onefpu, NULL, 1, 1);
86 }
87 #endif
88
89 int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
90 {
91         struct pt_regs *regs = tsk->thread.regs;
92
93         if (!regs)
94                 return 0;
95         if (tsk == current && (regs->msr & MSR_FP))
96                 giveup_fpu(current);
97
98         memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
99
100         return 1;
101 }
102
103 struct task_struct *__switch_to(struct task_struct *prev,
104                                 struct task_struct *new)
105 {
106         struct thread_struct *new_thread, *old_thread;
107         unsigned long flags;
108         struct task_struct *last;
109
110 #ifdef CONFIG_SMP
111         /* avoid complexity of lazy save/restore of fpu
112          * by just saving it every time we switch out if
113          * this task used the fpu during the last quantum.
114          * 
115          * If it tries to use the fpu again, it'll trap and
116          * reload its fp regs.  So we don't have to do a restore
117          * every switch, just a save.
118          *  -- Cort
119          */
120         if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
121                 giveup_fpu(prev);
122 #endif /* CONFIG_SMP */
123
124         new_thread = &new->thread;
125         old_thread = &current->thread;
126
127         local_irq_save(flags);
128         last = _switch(old_thread, new_thread);
129         local_irq_restore(flags);
130         return last;
131 }
132
133 char *ppc_find_proc_name(unsigned *p, char *buf, unsigned buflen);
134
135 void show_regs(struct pt_regs * regs)
136 {
137         int i;
138         char name_buf[256];
139
140         printk("NIP: %016lX XER: %016lX LR: %016lX\n",
141                regs->nip, regs->xer, regs->link);
142         printk("REGS: %p TRAP: %04lx    %s\n",
143                regs, regs->trap, print_tainted());
144         printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
145                regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
146                regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
147                regs->msr&MSR_IR ? 1 : 0,
148                regs->msr&MSR_DR ? 1 : 0);
149         if (regs->trap == 0x300 || regs->trap == 0x380 || regs->trap == 0x600)
150                 printk("DAR: %016lx, DSISR: %016lx\n", regs->dar, regs->dsisr);
151         printk("TASK = %p[%d] '%s' ",
152                current, current->pid, current->comm);
153
154 #ifdef CONFIG_SMP
155         printk(" CPU: %d", smp_processor_id());
156 #endif /* CONFIG_SMP */
157
158         for (i = 0; i < 32; i++) {
159                 long r;
160                 if ((i % 4) == 0) {
161                         printk("\n" KERN_INFO "GPR%02d: ", i);
162                 }
163                 if (__get_user(r, &(regs->gpr[i])))
164                     return;
165                 printk("%016lX ", r);
166         }
167         printk("\n");
168         /*
169          * Lookup NIP late so we have the best change of getting the
170          * above info out without failing
171          */
172         printk("NIP [%016lx] ", regs->nip);
173         printk("%s\n", ppc_find_proc_name((unsigned *)regs->nip,
174                name_buf, 256));
175         show_stack(current, (unsigned long *)regs->gpr[1]);
176 }
177
178 void exit_thread(void)
179 {
180         if (last_task_used_math == current)
181                 last_task_used_math = NULL;
182 }
183
184 void flush_thread(void)
185 {
186         struct thread_info *t = current_thread_info();
187
188         if (t->flags & _TIF_ABI_PENDING)
189                 t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
190
191         if (last_task_used_math == current)
192                 last_task_used_math = NULL;
193 }
194
195 void
196 release_thread(struct task_struct *t)
197 {
198 }
199
200 /*
201  * Copy a thread..
202  */
203 int
204 copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
205             unsigned long unused, struct task_struct *p, struct pt_regs *regs)
206 {
207         struct pt_regs *childregs, *kregs;
208         extern void ret_from_fork(void);
209         unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
210
211         p->set_child_tid = p->clear_child_tid = NULL;
212
213         /* Copy registers */
214         sp -= sizeof(struct pt_regs);
215         childregs = (struct pt_regs *) sp;
216         *childregs = *regs;
217         if ((childregs->msr & MSR_PR) == 0) {
218                 /* for kernel thread, set stackptr in new task */
219                 childregs->gpr[1] = sp + sizeof(struct pt_regs);
220                 p->thread.regs = NULL;  /* no user register state */
221                 clear_ti_thread_flag(p->thread_info, TIF_32BIT);
222 #ifdef CONFIG_PPC_ISERIES
223                 set_ti_thread_flag(p->thread_info, TIF_RUN_LIGHT);
224 #endif
225         } else {
226                 childregs->gpr[1] = usp;
227                 p->thread.regs = childregs;
228                 if (clone_flags & CLONE_SETTLS) {
229                         if (test_thread_flag(TIF_32BIT))
230                                 childregs->gpr[2] = childregs->gpr[6];
231                         else
232                                 childregs->gpr[13] = childregs->gpr[6];
233                 }
234         }
235         childregs->gpr[3] = 0;  /* Result from fork() */
236         sp -= STACK_FRAME_OVERHEAD;
237
238         /*
239          * The way this works is that at some point in the future
240          * some task will call _switch to switch to the new task.
241          * That will pop off the stack frame created below and start
242          * the new task running at ret_from_fork.  The new task will
243          * do some house keeping and then return from the fork or clone
244          * system call, using the stack frame created above.
245          */
246         sp -= sizeof(struct pt_regs);
247         kregs = (struct pt_regs *) sp;
248         sp -= STACK_FRAME_OVERHEAD;
249         p->thread.ksp = sp;
250
251         /*
252          * The PPC64 ABI makes use of a TOC to contain function 
253          * pointers.  The function (ret_from_except) is actually a pointer
254          * to the TOC entry.  The first entry is a pointer to the actual
255          * function.
256          */
257         kregs->nip = *((unsigned long *)ret_from_fork);
258
259         return 0;
260 }
261
262 /*
263  * Set up a thread for executing a new program
264  */
265 void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp)
266 {
267         unsigned long entry, toc, load_addr = regs->gpr[2];
268
269         /* fdptr is a relocated pointer to the function descriptor for
270          * the elf _start routine.  The first entry in the function
271          * descriptor is the entry address of _start and the second
272          * entry is the TOC value we need to use.
273          */
274         set_fs(USER_DS);
275         __get_user(entry, (unsigned long *)fdptr);
276         __get_user(toc, (unsigned long *)fdptr+1);
277
278         /* Check whether the e_entry function descriptor entries
279          * need to be relocated before we can use them.
280          */
281         if ( load_addr != 0 ) {
282                 entry += load_addr;
283                 toc   += load_addr;
284         }
285
286         regs->nip = entry;
287         regs->gpr[1] = sp;
288         regs->gpr[2] = toc;
289         regs->msr = MSR_USER64;
290         if (last_task_used_math == current)
291                 last_task_used_math = 0;
292         current->thread.fpscr = 0;
293 }
294
295 int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
296 {
297         struct pt_regs *regs = tsk->thread.regs;
298
299         if (val > PR_FP_EXC_PRECISE)
300                 return -EINVAL;
301         tsk->thread.fpexc_mode = __pack_fe01(val);
302         if (regs != NULL && (regs->msr & MSR_FP) != 0)
303                 regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
304                         | tsk->thread.fpexc_mode;
305         return 0;
306 }
307
308 int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
309 {
310         unsigned int val;
311
312         val = __unpack_fe01(tsk->thread.fpexc_mode);
313         return put_user(val, (unsigned int *) adr);
314 }
315
316 int sys_clone(unsigned long clone_flags, unsigned long p2, unsigned long p3,
317               unsigned long p4, unsigned long p5, unsigned long p6,
318               struct pt_regs *regs)
319 {
320         unsigned long parent_tidptr = 0;
321         unsigned long child_tidptr = 0;
322
323         if (p2 == 0)
324                 p2 = regs->gpr[1];      /* stack pointer for child */
325
326         if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
327                            CLONE_CHILD_CLEARTID)) {
328                 parent_tidptr = p3;
329                 child_tidptr = p5;
330                 if (test_thread_flag(TIF_32BIT)) {
331                         parent_tidptr &= 0xffffffff;
332                         child_tidptr &= 0xffffffff;
333                 }
334         }
335
336         if (regs->msr & MSR_FP)
337                 giveup_fpu(current);
338
339         return do_fork(clone_flags & ~CLONE_IDLETASK, p2, regs, 0,
340                     (int *)parent_tidptr, (int *)child_tidptr);
341 }
342
343 int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
344              unsigned long p4, unsigned long p5, unsigned long p6,
345              struct pt_regs *regs)
346 {
347         if (regs->msr & MSR_FP)
348                 giveup_fpu(current);
349
350         return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
351 }
352
353 int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
354               unsigned long p4, unsigned long p5, unsigned long p6,
355               struct pt_regs *regs)
356 {
357         if (regs->msr & MSR_FP)
358                 giveup_fpu(current);
359
360         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0,
361                     NULL, NULL);
362 }
363
364 int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
365                unsigned long a3, unsigned long a4, unsigned long a5,
366                struct pt_regs *regs)
367 {
368         int error;
369         char * filename;
370         
371         filename = getname((char *) a0);
372         error = PTR_ERR(filename);
373         if (IS_ERR(filename))
374                 goto out;
375         if (regs->msr & MSR_FP)
376                 giveup_fpu(current);
377   
378         error = do_execve(filename, (char **) a1, (char **) a2, regs);
379   
380         if (error == 0)
381                 current->ptrace &= ~PT_DTRACE;
382         putname(filename);
383
384 out:
385         return error;
386 }
387
388 void initialize_paca_hardware_interrupt_stack(void)
389 {
390         int i;
391         unsigned long stack;
392         unsigned long end_of_stack =0;
393
394         for (i=1; i < NR_CPUS; i++) {
395                 if (!cpu_possible(i))
396                         continue;
397                 /* Carve out storage for the hardware interrupt stack */
398                 stack = __get_free_pages(GFP_ATOMIC, get_order(8*PAGE_SIZE));
399
400                 if ( !stack ) {     
401                         printk("ERROR, cannot find space for hardware stack.\n");
402                         panic(" no hardware stack ");
403                 }
404
405
406                 /* Store the stack value in the PACA for the processor */
407                 paca[i].xHrdIntStack = stack + (8*PAGE_SIZE) - STACK_FRAME_OVERHEAD;
408                 paca[i].xHrdIntCount = 0;
409
410         }
411
412         /*
413          * __get_free_pages() might give us a page > KERNBASE+256M which
414          * is mapped with large ptes so we can't set up the guard page.
415          */
416         if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE)
417                 return;
418
419         for (i=0; i < NR_CPUS; i++) {
420                 if (!cpu_possible(i))
421                         continue;
422                 /* set page at the top of stack to be protected - prevent overflow */
423                 end_of_stack = paca[i].xHrdIntStack - (8*PAGE_SIZE - STACK_FRAME_OVERHEAD);
424                 ppc_md.hpte_updateboltedpp(PP_RXRX,end_of_stack);
425         }
426 }
427
428 char *ppc_find_proc_name(unsigned *p, char *buf, unsigned buflen)
429 {
430         unsigned long tb_flags;
431         unsigned short name_len;
432         unsigned long tb_start, code_start, code_ptr, code_offset;
433         unsigned int code_len;
434         unsigned long end;
435
436         strcpy(buf, "Unknown");
437         code_ptr = (unsigned long)p;
438         code_offset = 0;
439
440         /* handle functions in text and init sections */
441         if (((unsigned long)p >= (unsigned long)_stext) && 
442             ((unsigned long)p < (unsigned long)_etext))
443                 end = (unsigned long)_etext;
444         else if (((unsigned long)p >= (unsigned long)__init_begin) && 
445                  ((unsigned long)p < (unsigned long)__init_end))
446                 end = (unsigned long)__init_end;
447         else
448                 return buf;
449
450         while ((unsigned long)p < end) {
451                 if (*p == 0) {
452                         tb_start = (unsigned long)p;
453                         ++p;    /* Point to traceback flags */
454                         tb_flags = *((unsigned long *)p);
455                         p += 2; /* Skip over traceback flags */
456                         if (tb_flags & TB_NAME_PRESENT) {
457                                 if (tb_flags & TB_PARMINFO)
458                                         ++p;    /* skip over parminfo data */
459                                 if (tb_flags & TB_HAS_TBOFF) {
460                                         code_len = *p;  /* get code length */
461                                         code_start = tb_start - code_len;
462                                         code_offset = code_ptr - code_start + 1;
463                                         if (code_offset > 0x100000)
464                                                 break;
465                                         ++p;    /* skip over code size */
466                                 }
467                                 name_len = *((unsigned short *)p);
468                                 if (name_len > (buflen-20))
469                                         name_len = buflen-20;
470                                 memcpy(buf, ((char *)p)+2, name_len);
471                                 buf[name_len] = 0;
472                                 if (code_offset)
473                                         sprintf(buf+name_len, "+0x%lx",
474                                                 code_offset-1); 
475                         }
476                         break;
477                 }
478                 ++p;
479         }
480
481         return buf;
482 }
483
484 /*
485  * These bracket the sleeping functions..
486  */
487 extern void scheduling_functions_start_here(void);
488 extern void scheduling_functions_end_here(void);
489 #define first_sched    (*(unsigned long *)scheduling_functions_start_here)
490 #define last_sched     (*(unsigned long *)scheduling_functions_end_here)
491
492 unsigned long get_wchan(struct task_struct *p)
493 {
494         unsigned long ip, sp;
495         unsigned long stack_page = (unsigned long)p->thread_info;
496         int count = 0;
497         if (!p || p == current || p->state == TASK_RUNNING)
498                 return 0;
499         sp = p->thread.ksp;
500         do {
501                 sp = *(unsigned long *)sp;
502                 if (sp < (stack_page + sizeof(struct thread_struct)) ||
503                     sp >= (stack_page + THREAD_SIZE))
504                         return 0;
505                 if (count > 0) {
506                         ip = *(unsigned long *)(sp + 16);
507                         /*
508                          * XXX we mask the upper 32 bits until procps
509                          * gets fixed.
510                          */
511                         if (ip < first_sched || ip >= last_sched)
512                                 return (ip & 0xFFFFFFFF);
513                 }
514         } while (count++ < 16);
515         return 0;
516 }
517
518 void show_stack(struct task_struct *p, unsigned long *_sp)
519 {
520         unsigned long ip;
521         unsigned long stack_page = (unsigned long)p->thread_info;
522         int count = 0;
523         char name_buf[256];
524         unsigned long sp = (unsigned long)_sp;
525
526         if (!p)
527                 return;
528
529         if (sp == 0)
530                 sp = p->thread.ksp;
531         printk("Call Trace:\n");
532         do {
533                 if (__get_user(sp, (unsigned long *)sp))
534                         break;
535                 if (sp < stack_page + sizeof(struct thread_struct))
536                         break;
537                 if (sp >= stack_page + THREAD_SIZE)
538                         break;
539                 if (__get_user(ip, (unsigned long *)(sp + 16)))
540                         break;
541                 printk("[%016lx] ", ip);
542                 printk("%s\n", ppc_find_proc_name((unsigned *)ip,
543                        name_buf, 256));
544         } while (count++ < 32);
545 }
546
547 void dump_stack(void)
548 {
549         show_stack(current, (unsigned long *)_get_SP());
550 }
551
552 void show_trace_task(struct task_struct *tsk)
553 {
554         show_stack(tsk, (unsigned long *)tsk->thread.ksp);
555 }