arch/x86/kernel/traps.c

   1 /*
   2  *  Copyright (C) 1991, 1992  Linus Torvalds
   3  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
   4  *
   5  *  Pentium III FXSR, SSE support
   6  *      Gareth Hughes <gareth@valinux.com>, May 2000
   7  */
   8
   9 /*
  10  * Handle hardware traps and faults.
  11  */
  12 #include <linux/interrupt.h>
  13 #include <linux/kallsyms.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/kprobes.h>
  16 #include <linux/uaccess.h>
  17 #include <linux/kdebug.h>
  18 #include <linux/kgdb.h>
  19 #include <linux/kernel.h>
  20 #include <linux/module.h>
  21 #include <linux/ptrace.h>
  22 #include <linux/string.h>
  23 #include <linux/delay.h>
  24 #include <linux/errno.h>
  25 #include <linux/kexec.h>
  26 #include <linux/sched.h>
  27 #include <linux/timer.h>
  28 #include <linux/init.h>
  29 #include <linux/bug.h>
  30 #include <linux/nmi.h>
  31 #include <linux/mm.h>
  32 #include <linux/smp.h>
  33 #include <linux/io.h>
  34
  35 #ifdef CONFIG_EISA
  36 #include <linux/ioport.h>
  37 #include <linux/eisa.h>
  38 #endif
  39
  40 #ifdef CONFIG_MCA
  41 #include <linux/mca.h>
  42 #endif
  43
  44 #if defined(CONFIG_EDAC)
  45 #include <linux/edac.h>
  46 #endif
  47
  48 #include <asm/kmemcheck.h>
  49 #include <asm/stacktrace.h>
  50 #include <asm/processor.h>
  51 #include <asm/debugreg.h>
  52 #include <asm/atomic.h>
  53 #include <asm/system.h>
  54 #include <asm/traps.h>
  55 #include <asm/desc.h>
  56 #include <asm/i387.h>
  57 #include <asm/mce.h>
  58
  59 #include <asm/mach_traps.h>
  60
  61 #ifdef CONFIG_X86_64
  62 #include <asm/x86_init.h>
  63 #include <asm/pgalloc.h>
  64 #include <asm/proto.h>
  65 #else
  66 #include <asm/processor-flags.h>
  67 #include <asm/setup.h>
  68
  69 asmlinkage int system_call(void);
  70
  71 /* Do we ignore FPU interrupts ? */
  72 char ignore_fpu_irq;
  73
  74 /*
  75  * The IDT has to be page-aligned to simplify the Pentium
  76  * F0 0F bug workaround.
  77  */
  78 gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
  79 #endif
  80
  81 DECLARE_BITMAP(used_vectors, NR_VECTORS);
  82 EXPORT_SYMBOL_GPL(used_vectors);
  83
  84 static int ignore_nmis;
  85
  86 int unknown_nmi_panic;
  87 /*
  88  * Prevent NMI reason port (0x61) being accessed simultaneously, can
  89  * only be used in NMI handler.
  90  */
  91 static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
  92
  93 static inline void conditional_sti(struct pt_regs *regs)
  94 {
  95         if (regs->flags & X86_EFLAGS_IF)
  96                 local_irq_enable();
  97 }
  98
  99 static inline void preempt_conditional_sti(struct pt_regs *regs)
 100 {
 101         inc_preempt_count();
 102         if (regs->flags & X86_EFLAGS_IF)
 103                 local_irq_enable();
 104 }
 105
 106 static inline void conditional_cli(struct pt_regs *regs)
 107 {
 108         if (regs->flags & X86_EFLAGS_IF)
 109                 local_irq_disable();
 110 }
 111
 112 static inline void preempt_conditional_cli(struct pt_regs *regs)
 113 {
 114         if (regs->flags & X86_EFLAGS_IF)
 115                 local_irq_disable();
 116         dec_preempt_count();
 117 }
 118
 119 #ifdef CONFIG_X86_32
 120 static inline int
 121 __compare_user_cs_desc(const struct desc_struct *desc1,
 122         const struct desc_struct *desc2)
 123 {
 124         return ((desc1->limit0 != desc2->limit0) ||
 125                 (desc1->limit != desc2->limit) ||
 126                 (desc1->base0 != desc2->base0) ||
 127                 (desc1->base1 != desc2->base1) ||
 128                 (desc1->base2 != desc2->base2));
 129 }
 130
 131 /*
 132  * lazy-check for CS validity on exec-shield binaries:
 133  *
 134  * the original non-exec stack patch was written by
 135  * Solar Designer <solar at openwall.com>. Thanks!
 136  */
 137 static int
 138 check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
 139 {
 140         struct desc_struct *desc1, *desc2;
 141         struct vm_area_struct *vma;
 142         unsigned long limit;
 143
 144         if (current->mm == NULL)
 145                 return 0;
 146
 147         limit = -1UL;
 148         if (current->mm->context.exec_limit != -1UL) {
 149                 limit = PAGE_SIZE;
 150                 spin_lock(&current->mm->page_table_lock);
 151                 for (vma = current->mm->mmap; vma; vma = vma->vm_next)
 152                         if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
 153                                 limit = vma->vm_end;
 154                 vma = get_gate_vma(current);
 155                 if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
 156                         limit = vma->vm_end;
 157                 spin_unlock(&current->mm->page_table_lock);
 158                 if (limit >= TASK_SIZE)
 159                         limit = -1UL;
 160                 current->mm->context.exec_limit = limit;
 161         }
 162         set_user_cs(&current->mm->context.user_cs, limit);
 163
 164         desc1 = &current->mm->context.user_cs;
 165         desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
 166
 167         if (__compare_user_cs_desc(desc1, desc2)) {
 168                 /*
 169                  * The CS was not in sync - reload it and retry the
 170                  * instruction. If the instruction still faults then
 171                  * we won't hit this branch next time around.
 172                  */
 173                 if (print_fatal_signals >= 2) {
 174                         printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
 175                                 error_code, error_code/8, regs->ip,
 176                                 smp_processor_id());
 177                         printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
 178                                 current->mm->context.exec_limit,
 179                                 desc1->a, desc1->b, desc2->a, desc2->b);
 180                 }
 181
 182                 load_user_cs_desc(cpu, current->mm);
 183
 184                 return 1;
 185         }
 186
 187         return 0;
 188 }
 189 #endif
 190
 191 static void __kprobes
 192 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 193         long error_code, siginfo_t *info)
 194 {
 195         struct task_struct *tsk = current;
 196
 197 #ifdef CONFIG_X86_32
 198         if (regs->flags & X86_VM_MASK) {
 199                 /*
 200                  * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 201                  * On nmi (interrupt 2), do_trap should not be called.
 202                  */
 203                 if (trapnr < 6)
 204                         goto vm86_trap;
 205                 goto trap_signal;
 206         }
 207 #endif
 208
 209         if (!user_mode(regs))
 210                 goto kernel_trap;
 211
 212 #ifdef CONFIG_X86_32
 213 trap_signal:
 214 #endif
 215         /*
 216          * We want error_code and trap_no set for userspace faults and
 217          * kernelspace faults which result in die(), but not
 218          * kernelspace faults which are fixed up.  die() gives the
 219          * process no chance to handle the signal and notice the
 220          * kernel fault information, so that won't result in polluting
 221          * the information about previously queued, but not yet
 222          * delivered, faults.  See also do_general_protection below.
 223          */
 224         tsk->thread.error_code = error_code;
 225         tsk->thread.trap_no = trapnr;
 226
 227 #ifdef CONFIG_X86_64
 228         if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 229             printk_ratelimit()) {
 230                 printk(KERN_INFO
 231                        "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
 232                        tsk->comm, tsk->pid, str,
 233                        regs->ip, regs->sp, error_code);
 234                 print_vma_addr(" in ", regs->ip);
 235                 printk("\n");
 236         }
 237 #endif
 238
 239         if (info)
 240                 force_sig_info(signr, info, tsk);
 241         else
 242                 force_sig(signr, tsk);
 243         return;
 244
 245 kernel_trap:
 246         if (!fixup_exception(regs)) {
 247                 tsk->thread.error_code = error_code;
 248                 tsk->thread.trap_no = trapnr;
 249                 die(str, regs, error_code);
 250         }
 251         return;
 252
 253 #ifdef CONFIG_X86_32
 254 vm86_trap:
 255         if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
 256                                                 error_code, trapnr))
 257                 goto trap_signal;
 258         return;
 259 #endif
 260 }
 261
 262 #define DO_ERROR(trapnr, signr, str, name)                              \
 263 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)     \
 264 {                                                                       \
 265         if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
 266                                                         == NOTIFY_STOP) \
 267                 return;                                                 \
 268         conditional_sti(regs);                                          \
 269         do_trap(trapnr, signr, str, regs, error_code, NULL);            \
 270 }
 271
 272 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)         \
 273 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)     \
 274 {                                                                       \
 275         siginfo_t info;                                                 \
 276         info.si_signo = signr;                                          \
 277         info.si_errno = 0;                                              \
 278         info.si_code = sicode;                                          \
 279         info.si_addr = (void __user *)siaddr;                           \
 280         if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
 281                                                         == NOTIFY_STOP) \
 282                 return;                                                 \
 283         conditional_sti(regs);                                          \
 284         do_trap(trapnr, signr, str, regs, error_code, &info);           \
 285 }
 286
 287 DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
 288 DO_ERROR(4, SIGSEGV, "overflow", overflow)
 289 DO_ERROR(5, SIGSEGV, "bounds", bounds)
 290 DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
 291 DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
 292 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 293 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
 294 #ifdef CONFIG_X86_32
 295 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
 296 #endif
 297 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 298
 299 #ifdef CONFIG_X86_64
 300 /* Runs on IST stack */
 301 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 302 {
 303         if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
 304                         12, SIGBUS) == NOTIFY_STOP)
 305                 return;
 306         preempt_conditional_sti(regs);
 307         do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
 308         preempt_conditional_cli(regs);
 309 }
 310
 311 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 312 {
 313         static const char str[] = "double fault";
 314         struct task_struct *tsk = current;
 315
 316         /* Return not checked because double check cannot be ignored */
 317         notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
 318
 319         tsk->thread.error_code = error_code;
 320         tsk->thread.trap_no = 8;
 321
 322         /*
 323          * This is always a kernel trap and never fixable (and thus must
 324          * never return).
 325          */
 326         for (;;)
 327                 die(str, regs, error_code);
 328 }
 329 #endif
 330
 331 dotraplinkage void __kprobes
 332 do_general_protection(struct pt_regs *regs, long error_code)
 333 {
 334         struct task_struct *tsk;
 335
 336         conditional_sti(regs);
 337
 338 #ifdef CONFIG_X86_32
 339         if (regs->flags & X86_VM_MASK)
 340                 goto gp_in_vm86;
 341 #endif
 342
 343         tsk = current;
 344         if (!user_mode(regs))
 345                 goto gp_in_kernel;
 346
 347 #ifdef CONFIG_X86_32
 348 {
 349         int cpu;
 350         int ok;
 351
 352         cpu = get_cpu();
 353         ok = check_lazy_exec_limit(cpu, regs, error_code);
 354         put_cpu();
 355
 356         if (ok)
 357                 return;
 358
 359         if (print_fatal_signals) {
 360                 printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
 361                         error_code, error_code/8, regs->ip, smp_processor_id());
 362                 printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
 363                         current->mm->context.exec_limit,
 364                         current->mm->context.user_cs.a,
 365                         current->mm->context.user_cs.b);
 366         }
 367 }
 368 #endif /*CONFIG_X86_32*/
 369
 370         tsk->thread.error_code = error_code;
 371         tsk->thread.trap_no = 13;
 372
 373         if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 374                         printk_ratelimit()) {
 375                 printk(KERN_INFO
 376                         "%s[%d] general protection ip:%lx sp:%lx error:%lx",
 377                         tsk->comm, task_pid_nr(tsk),
 378                         regs->ip, regs->sp, error_code);
 379                 print_vma_addr(" in ", regs->ip);
 380                 printk("\n");
 381         }
 382
 383         force_sig(SIGSEGV, tsk);
 384         return;
 385
 386 #ifdef CONFIG_X86_32
 387 gp_in_vm86:
 388         local_irq_enable();
 389         handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 390         return;
 391 #endif
 392
 393 gp_in_kernel:
 394         if (fixup_exception(regs))
 395                 return;
 396
 397         tsk->thread.error_code = error_code;
 398         tsk->thread.trap_no = 13;
 399         if (notify_die(DIE_GPF, "general protection fault", regs,
 400                                 error_code, 13, SIGSEGV) == NOTIFY_STOP)
 401                 return;
 402         die("general protection fault", regs, error_code);
 403 }
 404
 405 static int __init setup_unknown_nmi_panic(char *str)
 406 {
 407         unknown_nmi_panic = 1;
 408         return 1;
 409 }
 410 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
 411
 412 static notrace __kprobes void
 413 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 414 {
 415         pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
 416                  reason, smp_processor_id());
 417
 418         /*
 419          * On some machines, PCI SERR line is used to report memory
 420          * errors. EDAC makes use of it.
 421          */
 422 #if defined(CONFIG_EDAC)
 423         if (edac_handler_set()) {
 424                 edac_atomic_assert_error();
 425                 return;
 426         }
 427 #endif
 428
 429         if (panic_on_unrecovered_nmi)
 430                 panic("NMI: Not continuing");
 431
 432         pr_emerg("Dazed and confused, but trying to continue\n");
 433
 434         /* Clear and disable the PCI SERR error line. */
 435         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
 436         outb(reason, NMI_REASON_PORT);
 437 }
 438
 439 static notrace __kprobes void
 440 io_check_error(unsigned char reason, struct pt_regs *regs)
 441 {
 442         unsigned long i;
 443
 444         pr_emerg(
 445         "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
 446                  reason, smp_processor_id());
 447         show_registers(regs);
 448
 449         if (panic_on_io_nmi)
 450                 panic("NMI IOCK error: Not continuing");
 451
 452         /* Re-enable the IOCK line, wait for a few seconds */
 453         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
 454         outb(reason, NMI_REASON_PORT);
 455
 456         i = 20000;
 457         while (--i) {
 458                 touch_nmi_watchdog();
 459                 udelay(100);
 460         }
 461
 462         reason &= ~NMI_REASON_CLEAR_IOCHK;
 463         outb(reason, NMI_REASON_PORT);
 464 }
 465
 466 static notrace __kprobes void
 467 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 468 {
 469         if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 470                         NOTIFY_STOP)
 471                 return;
 472 #ifdef CONFIG_MCA
 473         /*
 474          * Might actually be able to figure out what the guilty party
 475          * is:
 476          */
 477         if (MCA_bus) {
 478                 mca_handle_nmi();
 479                 return;
 480         }
 481 #endif
 482         pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 483                  reason, smp_processor_id());
 484
 485         pr_emerg("Do you have a strange power saving mode enabled?\n");
 486         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 487                 panic("NMI: Not continuing");
 488
 489         pr_emerg("Dazed and confused, but trying to continue\n");
 490 }
 491
 492 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 493 {
 494         unsigned char reason = 0;
 495
 496         /*
 497          * CPU-specific NMI must be processed before non-CPU-specific
 498          * NMI, otherwise we may lose it, because the CPU-specific
 499          * NMI can not be detected/processed on other CPUs.
 500          */
 501         if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 502                 return;
 503
 504         /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
 505         raw_spin_lock(&nmi_reason_lock);
 506         reason = get_nmi_reason();
 507
 508         if (reason & NMI_REASON_MASK) {
 509                 if (reason & NMI_REASON_SERR)
 510                         pci_serr_error(reason, regs);
 511                 else if (reason & NMI_REASON_IOCHK)
 512                         io_check_error(reason, regs);
 513 #ifdef CONFIG_X86_32
 514                 /*
 515                  * Reassert NMI in case it became active
 516                  * meanwhile as it's edge-triggered:
 517                  */
 518                 reassert_nmi();
 519 #endif
 520                 raw_spin_unlock(&nmi_reason_lock);
 521                 return;
 522         }
 523         raw_spin_unlock(&nmi_reason_lock);
 524
 525         unknown_nmi_error(reason, regs);
 526 }
 527
 528 dotraplinkage notrace __kprobes void
 529 do_nmi(struct pt_regs *regs, long error_code)
 530 {
 531         nmi_enter();
 532
 533         inc_irq_stat(__nmi_count);
 534
 535         if (!ignore_nmis)
 536                 default_do_nmi(regs);
 537
 538         nmi_exit();
 539 }
 540
 541 void stop_nmi(void)
 542 {
 543         ignore_nmis++;
 544 }
 545
 546 void restart_nmi(void)
 547 {
 548         ignore_nmis--;
 549 }
 550
 551 /* May run on IST stack. */
 552 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 553 {
 554 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 555         if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 556                         == NOTIFY_STOP)
 557                 return;
 558 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 559 #ifdef CONFIG_KPROBES
 560         if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 561                         == NOTIFY_STOP)
 562                 return;
 563 #else
 564         if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
 565                         == NOTIFY_STOP)
 566                 return;
 567 #endif
 568
 569         preempt_conditional_sti(regs);
 570         do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 571         preempt_conditional_cli(regs);
 572 }
 573
 574 #ifdef CONFIG_X86_64
 575 /*
 576  * Help handler running on IST stack to switch back to user stack
 577  * for scheduling or signal handling. The actual stack switch is done in
 578  * entry.S
 579  */
 580 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 581 {
 582         struct pt_regs *regs = eregs;
 583         /* Did already sync */
 584         if (eregs == (struct pt_regs *)eregs->sp)
 585                 ;
 586         /* Exception from user space */
 587         else if (user_mode(eregs))
 588                 regs = task_pt_regs(current);
 589         /*
 590          * Exception from kernel and interrupts are enabled. Move to
 591          * kernel process stack.
 592          */
 593         else if (eregs->flags & X86_EFLAGS_IF)
 594                 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 595         if (eregs != regs)
 596                 *regs = *eregs;
 597         return regs;
 598 }
 599 #endif
 600
 601 /*
 602  * Our handling of the processor debug registers is non-trivial.
 603  * We do not clear them on entry and exit from the kernel. Therefore
 604  * it is possible to get a watchpoint trap here from inside the kernel.
 605  * However, the code in ./ptrace.c has ensured that the user can
 606  * only set watchpoints on userspace addresses. Therefore the in-kernel
 607  * watchpoint trap can only occur in code which is reading/writing
 608  * from user space. Such code must not hold kernel locks (since it
 609  * can equally take a page fault), therefore it is safe to call
 610  * force_sig_info even though that claims and releases locks.
 611  *
 612  * Code in ./signal.c ensures that the debug control register
 613  * is restored before we deliver any signal, and therefore that
 614  * user code runs with the correct debug control register even though
 615  * we clear it here.
 616  *
 617  * Being careful here means that we don't have to be as careful in a
 618  * lot of more complicated places (task switching can be a bit lazy
 619  * about restoring all the debug state, and ptrace doesn't have to
 620  * find every occurrence of the TF bit that could be saved away even
 621  * by user code)
 622  *
 623  * May run on IST stack.
 624  */
 625 dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 626 {
 627         struct task_struct *tsk = current;
 628         int user_icebp = 0;
 629         unsigned long dr6;
 630         int si_code;
 631
 632         get_debugreg(dr6, 6);
 633
 634         /* Filter out all the reserved bits which are preset to 1 */
 635         dr6 &= ~DR6_RESERVED;
 636
 637         /*
 638          * If dr6 has no reason to give us about the origin of this trap,
 639          * then it's very likely the result of an icebp/int01 trap.
 640          * User wants a sigtrap for that.
 641          */
 642         if (!dr6 && user_mode(regs))
 643                 user_icebp = 1;
 644
 645         /* Catch kmemcheck conditions first of all! */
 646         if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 647                 return;
 648
 649         /* DR6 may or may not be cleared by the CPU */
 650         set_debugreg(0, 6);
 651
 652         /*
 653          * The processor cleared BTF, so don't mark that we need it set.
 654          */
 655         clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
 656
 657         /* Store the virtualized DR6 value */
 658         tsk->thread.debugreg6 = dr6;
 659
 660         if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
 661                                                         SIGTRAP) == NOTIFY_STOP)
 662                 return;
 663
 664         /* It's safe to allow irq's after DR6 has been saved */
 665         preempt_conditional_sti(regs);
 666
 667         if (regs->flags & X86_VM_MASK) {
 668                 handle_vm86_trap((struct kernel_vm86_regs *) regs,
 669                                 error_code, 1);
 670                 preempt_conditional_cli(regs);
 671                 return;
 672         }
 673
 674         /*
 675          * Single-stepping through system calls: ignore any exceptions in
 676          * kernel space, but re-enable TF when returning to user mode.
 677          *
 678          * We already checked v86 mode above, so we can check for kernel mode
 679          * by just checking the CPL of CS.
 680          */
 681         if ((dr6 & DR_STEP) && !user_mode(regs)) {
 682                 tsk->thread.debugreg6 &= ~DR_STEP;
 683                 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 684                 regs->flags &= ~X86_EFLAGS_TF;
 685         }
 686         si_code = get_si_code(tsk->thread.debugreg6);
 687         if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 688                 send_sigtrap(tsk, regs, error_code, si_code);
 689         preempt_conditional_cli(regs);
 690
 691         return;
 692 }
 693
 694 /*
 695  * Note that we play around with the 'TS' bit in an attempt to get
 696  * the correct behaviour even in the presence of the asynchronous
 697  * IRQ13 behaviour
 698  */
 699 void math_error(struct pt_regs *regs, int error_code, int trapnr)
 700 {
 701         struct task_struct *task = current;
 702         siginfo_t info;
 703         unsigned short err;
 704         char *str = (trapnr == 16) ? "fpu exception" : "simd exception";
 705
 706         if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
 707                 return;
 708         conditional_sti(regs);
 709
 710         if (!user_mode_vm(regs))
 711         {
 712                 if (!fixup_exception(regs)) {
 713                         task->thread.error_code = error_code;
 714                         task->thread.trap_no = trapnr;
 715                         die(str, regs, error_code);
 716                 }
 717                 return;
 718         }
 719
 720         /*
 721          * Save the info for the exception handler and clear the error.
 722          */
 723         save_init_fpu(task);
 724         task->thread.trap_no = trapnr;
 725         task->thread.error_code = error_code;
 726         info.si_signo = SIGFPE;
 727         info.si_errno = 0;
 728         info.si_addr = (void __user *)regs->ip;
 729         if (trapnr == 16) {
 730                 unsigned short cwd, swd;
 731                 /*
 732                  * (~cwd & swd) will mask out exceptions that are not set to unmasked
 733                  * status.  0x3f is the exception bits in these regs, 0x200 is the
 734                  * C1 reg you need in case of a stack fault, 0x040 is the stack
 735                  * fault bit.  We should only be taking one exception at a time,
 736                  * so if this combination doesn't produce any single exception,
 737                  * then we have a bad program that isn't synchronizing its FPU usage
 738                  * and it will suffer the consequences since we won't be able to
 739                  * fully reproduce the context of the exception
 740                  */
 741                 cwd = get_fpu_cwd(task);
 742                 swd = get_fpu_swd(task);
 743
 744                 err = swd & ~cwd;
 745         } else {
 746                 /*
 747                  * The SIMD FPU exceptions are handled a little differently, as there
 748                  * is only a single status/control register.  Thus, to determine which
 749                  * unmasked exception was caught we must mask the exception mask bits
 750                  * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 751                  */
 752                 unsigned short mxcsr = get_fpu_mxcsr(task);
 753                 err = ~(mxcsr >> 7) & mxcsr;
 754         }
 755
 756         if (err & 0x001) {      /* Invalid op */
 757                 /*
 758                  * swd & 0x240 == 0x040: Stack Underflow
 759                  * swd & 0x240 == 0x240: Stack Overflow
 760                  * User must clear the SF bit (0x40) if set
 761                  */
 762                 info.si_code = FPE_FLTINV;
 763         } else if (err & 0x004) { /* Divide by Zero */
 764                 info.si_code = FPE_FLTDIV;
 765         } else if (err & 0x008) { /* Overflow */
 766                 info.si_code = FPE_FLTOVF;
 767         } else if (err & 0x012) { /* Denormal, Underflow */
 768                 info.si_code = FPE_FLTUND;
 769         } else if (err & 0x020) { /* Precision */
 770                 info.si_code = FPE_FLTRES;
 771         } else {
 772                 /*
 773                  * If we're using IRQ 13, or supposedly even some trap 16
 774                  * implementations, it's possible we get a spurious trap...
 775                  */
 776                 return;         /* Spurious trap, no error */
 777         }
 778         force_sig_info(SIGFPE, &info, task);
 779 }
 780
 781 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 782 {
 783 #ifdef CONFIG_X86_32
 784         ignore_fpu_irq = 1;
 785 #endif
 786
 787         math_error(regs, error_code, 16);
 788 }
 789
 790 dotraplinkage void
 791 do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 792 {
 793         math_error(regs, error_code, 19);
 794 }
 795
 796 dotraplinkage void
 797 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 798 {
 799         conditional_sti(regs);
 800 #if 0
 801         /* No need to warn about this any longer. */
 802         printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 803 #endif
 804 }
 805
 806 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 807 {
 808 }
 809
 810 asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 811 {
 812 }
 813
 814 /*
 815  * __math_state_restore assumes that cr0.TS is already clear and the
 816  * fpu state is all ready for use.  Used during context switch.
 817  */
 818 void __math_state_restore(void)
 819 {
 820         struct thread_info *thread = current_thread_info();
 821         struct task_struct *tsk = thread->task;
 822
 823         /*
 824          * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 825          */
 826         if (unlikely(restore_fpu_checking(tsk))) {
 827                 stts();
 828                 force_sig(SIGSEGV, tsk);
 829                 return;
 830         }
 831
 832         thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
 833         tsk->fpu_counter++;
 834 }
 835
 836 /*
 837  * 'math_state_restore()' saves the current math information in the
 838  * old math state array, and gets the new ones from the current task
 839  *
 840  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
 841  * Don't touch unless you *really* know how it works.
 842  *
 843  * Must be called with kernel preemption disabled (in this case,
 844  * local interrupts are disabled at the call-site in entry.S).
 845  */
 846 asmlinkage void math_state_restore(void)
 847 {
 848         struct thread_info *thread = current_thread_info();
 849         struct task_struct *tsk = thread->task;
 850
 851         if (!tsk_used_math(tsk)) {
 852                 local_irq_enable();
 853                 /*
 854                  * does a slab alloc which can sleep
 855                  */
 856                 if (init_fpu(tsk)) {
 857                         /*
 858                          * ran out of memory!
 859                          */
 860                         do_group_exit(SIGKILL);
 861                         return;
 862                 }
 863                 local_irq_disable();
 864         }
 865
 866         clts();                         /* Allow maths ops (or we recurse) */
 867
 868         __math_state_restore();
 869 }
 870 EXPORT_SYMBOL_GPL(math_state_restore);
 871
 872 dotraplinkage void __kprobes
 873 do_device_not_available(struct pt_regs *regs, long error_code)
 874 {
 875 #ifdef CONFIG_MATH_EMULATION
 876         if (read_cr0() & X86_CR0_EM) {
 877                 struct math_emu_info info = { };
 878
 879                 conditional_sti(regs);
 880
 881                 info.regs = regs;
 882                 math_emulate(&info);
 883                 return;
 884         }
 885 #endif
 886         math_state_restore(); /* interrupts still off */
 887 #ifdef CONFIG_X86_32
 888         conditional_sti(regs);
 889 #endif
 890 }
 891
 892 #ifdef CONFIG_X86_32
 893 /*
 894  * The fixup code for errors in iret jumps to here (iret_exc). It loses
 895  * the original trap number and erorr code. The bogus trap 32 and error
 896  * code 0 are what the vanilla kernel delivers via:
 897  * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 898  *
 899  * NOTE: Because of the final "1" in the macro we need to enable interrupts.
 900  *
 901  * In case of a general protection fault in the iret instruction, we
 902  * need to check for a lazy CS update for exec-shield.
 903  */
 904 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 905 {
 906         int ok;
 907         int cpu;
 908
 909         local_irq_enable();
 910
 911         cpu = get_cpu();
 912         ok = check_lazy_exec_limit(cpu, regs, error_code);
 913         put_cpu();
 914
 915         if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
 916                 error_code, 32, SIGSEGV) != NOTIFY_STOP) {
 917                         siginfo_t info;
 918                         info.si_signo = SIGSEGV;
 919                         info.si_errno = 0;
 920                         info.si_code = ILL_BADSTK;
 921                         info.si_addr = 0;
 922                         do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
 923         }
 924 }
 925 #endif
 926
 927 /* Set of traps needed for early debugging. */
 928 void __init early_trap_init(void)
 929 {
 930         set_intr_gate_ist(1, &debug, DEBUG_STACK);
 931         /* int3 can be called from all */
 932         set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
 933         set_intr_gate(14, &page_fault);
 934         load_idt(&idt_descr);
 935 }
 936
 937 void __init trap_init(void)
 938 {
 939         int i;
 940
 941 #ifdef CONFIG_EISA
 942         void __iomem *p = early_ioremap(0x0FFFD9, 4);
 943
 944         if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
 945                 EISA_bus = 1;
 946         early_iounmap(p, 4);
 947 #endif
 948
 949         set_intr_gate(0, &divide_error);
 950         set_intr_gate_ist(2, &nmi, NMI_STACK);
 951         /* int4 can be called from all */
 952         set_system_intr_gate(4, &overflow);
 953         set_intr_gate(5, &bounds);
 954         set_intr_gate(6, &invalid_op);
 955         set_intr_gate(7, &device_not_available);
 956 #ifdef CONFIG_X86_32
 957         set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
 958 #else
 959         set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
 960 #endif
 961         set_intr_gate(9, &coprocessor_segment_overrun);
 962         set_intr_gate(10, &invalid_TSS);
 963         set_intr_gate(11, &segment_not_present);
 964         set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
 965         set_intr_gate(13, &general_protection);
 966         set_intr_gate(15, &spurious_interrupt_bug);
 967         set_intr_gate(16, &coprocessor_error);
 968         set_intr_gate(17, &alignment_check);
 969 #ifdef CONFIG_X86_MCE
 970         set_intr_gate_ist(18, &machine_check, MCE_STACK);
 971 #endif
 972         set_intr_gate(19, &simd_coprocessor_error);
 973
 974         /* Reserve all the builtin and the syscall vector: */
 975         for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 976                 set_bit(i, used_vectors);
 977
 978 #ifdef CONFIG_IA32_EMULATION
 979         set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 980         set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 981 #endif
 982
 983 #ifdef CONFIG_X86_32
 984         set_system_trap_gate(SYSCALL_VECTOR, &system_call);
 985         set_bit(SYSCALL_VECTOR, used_vectors);
 986 #endif
 987
 988         /*
 989          * Should be a barrier for any external CPU state:
 990          */
 991         cpu_init();
 992
 993         x86_init.irqs.trap_init();
 994 }