Merge branch 'tracing/core' into tracing/hw-breakpoints
authorIngo Molnar <mingo@elte.hu>
Mon, 7 Sep 2009 06:19:51 +0000 (08:19 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 7 Sep 2009 06:19:51 +0000 (08:19 +0200)
Conflicts:
arch/Kconfig
kernel/trace/trace.h

Merge reason: resolve the conflicts, plus adopt to the new
              ring-buffer APIs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

1  2 
arch/Kconfig
arch/x86/Kconfig
arch/x86/kernel/Makefile
arch/x86/kernel/process.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/traps.c
arch/x86/power/cpu.c
kernel/Makefile
kernel/trace/Kconfig
kernel/trace/trace.h
kernel/trace/trace_selftest.c

diff --combined arch/Kconfig
@@@ -113,6 -113,4 +113,8 @@@ config HAVE_DMA_API_DEBU
  config HAVE_DEFAULT_NO_SPIN_MUTEXES
        bool
  
 +config HAVE_HW_BREAKPOINT
 +      bool
 +
++
+ source "kernel/gcov/Kconfig"
diff --combined arch/x86/Kconfig
@@@ -24,18 -24,21 +24,21 @@@ config X8
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_IDE
        select HAVE_OPROFILE
+       select HAVE_PERF_COUNTERS if (!M386 && !M486)
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select ARCH_WANT_FRAME_POINTERS
+       select HAVE_DMA_ATTRS
        select HAVE_KRETPROBES
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
+       select HAVE_FUNCTION_GRAPH_FP_TEST
        select HAVE_FUNCTION_TRACE_MCOUNT_TEST
        select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
-       select HAVE_FTRACE_SYSCALLS
+       select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_KVM
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_TRACEHOOK
@@@ -46,7 -49,6 +49,7 @@@
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_LZMA
 +      select HAVE_HW_BREAKPOINT
        select HAVE_ARCH_KMEMCHECK
  
  config OUTPUT_FORMAT
@@@ -741,7 -743,6 +744,6 @@@ config X86_UP_IOAPI
  config X86_LOCAL_APIC
        def_bool y
        depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
-       select HAVE_PERF_COUNTERS if (!M386 && !M486)
  
  config X86_IO_APIC
        def_bool y
@@@ -1912,25 -1913,26 +1914,26 @@@ config DMAR_DEFAULT_O
          recommended you say N here while the DMAR code remains
          experimental.
  
- config DMAR_GFX_WA
-       def_bool y
-       prompt "Support for Graphics workaround"
+ config DMAR_BROKEN_GFX_WA
+       def_bool n
+       prompt "Workaround broken graphics drivers (going away soon)"
        depends on DMAR
        ---help---
          Current Graphics drivers tend to use physical address
          for DMA and avoid using DMA APIs. Setting this config
          option permits the IOMMU driver to set a unity map for
          all the OS-visible memory. Hence the driver can continue
-         to use physical addresses for DMA.
+         to use physical addresses for DMA, at least until this
+         option is removed in the 2.6.32 kernel.
  
  config DMAR_FLOPPY_WA
        def_bool y
        depends on DMAR
        ---help---
-         Floppy disk drivers are know to bypass DMA API calls
+         Floppy disk drivers are known to bypass DMA API calls
          thereby failing to work when IOMMU is enabled. This
          workaround will setup a 1:1 mapping for the first
-         16M to make floppy (an ISA device) work.
+         16MiB to make floppy (an ISA device) work.
  
  config INTR_REMAP
        bool "Support for Interrupt Remapping (EXPERIMENTAL)"
diff --combined arch/x86/kernel/Makefile
@@@ -24,6 -24,10 +24,10 @@@ CFLAGS_vsyscall_64.o        := $(PROFILING) -g
  CFLAGS_hpet.o         := $(nostackp)
  CFLAGS_tsc.o          := $(nostackp)
  CFLAGS_paravirt.o     := $(nostackp)
+ GCOV_PROFILE_vsyscall_64.o    := n
+ GCOV_PROFILE_hpet.o           := n
+ GCOV_PROFILE_tsc.o            := n
+ GCOV_PROFILE_paravirt.o               := n
  
  obj-y                 := process_$(BITS).o signal.o entry_$(BITS).o
  obj-y                 += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
@@@ -36,7 -40,7 +40,7 @@@ obj-$(CONFIG_X86_64)  += sys_x86_64.o x8
  obj-$(CONFIG_X86_64)  += syscall_64.o vsyscall_64.o
  obj-y                 += bootflag.o e820.o
  obj-y                 += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 -obj-y                 += alternative.o i8253.o pci-nommu.o
 +obj-y                 += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
  obj-y                 += tsc.o io_delay.o rtc.o
  
  obj-$(CONFIG_X86_TRAMPOLINE)  += trampoline.o
@@@ -17,8 -17,6 +17,8 @@@
  #include <asm/uaccess.h>
  #include <asm/i387.h>
  #include <asm/ds.h>
 +#include <asm/debugreg.h>
 +#include <asm/hw_breakpoint.h>
  
  unsigned long idle_halt;
  EXPORT_SYMBOL(idle_halt);
@@@ -50,8 -48,6 +50,8 @@@ void free_thread_xstate(struct task_str
                kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
                tsk->thread.xstate = NULL;
        }
 +      if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
 +              flush_thread_hw_breakpoint(tsk);
  
        WARN(tsk->thread.ds_ctx, "leaking DS context\n");
  }
@@@ -112,8 -108,12 +112,8 @@@ void flush_thread(void
  
        clear_tsk_thread_flag(tsk, TIF_DEBUG);
  
 -      tsk->thread.debugreg0 = 0;
 -      tsk->thread.debugreg1 = 0;
 -      tsk->thread.debugreg2 = 0;
 -      tsk->thread.debugreg3 = 0;
 -      tsk->thread.debugreg6 = 0;
 -      tsk->thread.debugreg7 = 0;
 +      if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
 +              flush_thread_hw_breakpoint(tsk);
        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
        /*
         * Forget coprocessor state..
@@@ -195,6 -195,16 +195,6 @@@ void __switch_to_xtra(struct task_struc
        else if (next->debugctlmsr != prev->debugctlmsr)
                update_debugctlmsr(next->debugctlmsr);
  
 -      if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 -              set_debugreg(next->debugreg0, 0);
 -              set_debugreg(next->debugreg1, 1);
 -              set_debugreg(next->debugreg2, 2);
 -              set_debugreg(next->debugreg3, 3);
 -              /* no 4 and 5 */
 -              set_debugreg(next->debugreg6, 6);
 -              set_debugreg(next->debugreg7, 7);
 -      }
 -
        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
                /* prev and next are different */
@@@ -509,16 -519,12 +509,12 @@@ static void c1e_idle(void
                if (!cpumask_test_cpu(cpu, c1e_mask)) {
                        cpumask_set_cpu(cpu, c1e_mask);
                        /*
-                        * Force broadcast so ACPI can not interfere. Needs
-                        * to run with interrupts enabled as it uses
-                        * smp_function_call.
+                        * Force broadcast so ACPI can not interfere.
                         */
-                       local_irq_enable();
                        clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
                                           &cpu);
                        printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
                               cpu);
-                       local_irq_disable();
                }
                clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
  
diff --combined arch/x86/kernel/ptrace.c
  #include <asm/prctl.h>
  #include <asm/proto.h>
  #include <asm/ds.h>
 +#include <asm/hw_breakpoint.h>
  
- #include <trace/syscall.h>
  #include "tls.h"
  
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/syscalls.h>
  enum x86_regset {
        REGSET_GENERAL,
        REGSET_FP,
@@@ -137,6 -137,11 +138,6 @@@ static int set_segment_reg(struct task_
        return 0;
  }
  
 -static unsigned long debugreg_addr_limit(struct task_struct *task)
 -{
 -      return TASK_SIZE - 3;
 -}
 -
  #else  /* CONFIG_X86_64 */
  
  #define FLAG_MASK             (FLAG_MASK_32 | X86_EFLAGS_NT)
@@@ -261,6 -266,15 +262,6 @@@ static int set_segment_reg(struct task_
        return 0;
  }
  
 -static unsigned long debugreg_addr_limit(struct task_struct *task)
 -{
 -#ifdef CONFIG_IA32_EMULATION
 -      if (test_tsk_thread_flag(task, TIF_IA32))
 -              return IA32_PAGE_OFFSET - 3;
 -#endif
 -      return TASK_SIZE_MAX - 7;
 -}
 -
  #endif        /* CONFIG_X86_32 */
  
  static unsigned long get_flags(struct task_struct *task)
@@@ -451,159 -465,95 +452,159 @@@ static int genregs_set(struct task_stru
  }
  
  /*
 - * This function is trivial and will be inlined by the compiler.
 - * Having it separates the implementation details of debug
 - * registers from the interface details of ptrace.
 + * Decode the length and type bits for a particular breakpoint as
 + * stored in debug register 7.  Return the "enabled" status.
   */
 -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
 +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
 +              unsigned *type)
  {
 -      switch (n) {
 -      case 0:         return child->thread.debugreg0;
 -      case 1:         return child->thread.debugreg1;
 -      case 2:         return child->thread.debugreg2;
 -      case 3:         return child->thread.debugreg3;
 -      case 6:         return child->thread.debugreg6;
 -      case 7:         return child->thread.debugreg7;
 -      }
 -      return 0;
 +      int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
 +
 +      *len = (bp_info & 0xc) | 0x40;
 +      *type = (bp_info & 0x3) | 0x80;
 +      return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
  }
  
 -static int ptrace_set_debugreg(struct task_struct *child,
 -                             int n, unsigned long data)
 +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
  {
 +      struct thread_struct *thread = &(current->thread);
        int i;
  
 -      if (unlikely(n == 4 || n == 5))
 -              return -EIO;
 +      /*
 +       * Store in the virtual DR6 register the fact that the breakpoint
 +       * was hit so the thread's debugger will see it.
 +       */
 +      for (i = 0; i < hbp_kernel_pos; i++)
 +              /*
 +               * We will check bp->info.address against the address stored in
 +               * thread's hbp structure and not debugreg[i]. This is to ensure
 +               * that the corresponding bit for 'i' in DR7 register is enabled
 +               */
 +              if (bp->info.address == thread->hbp[i]->info.address)
 +                      break;
  
 -      if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
 -              return -EIO;
 +      thread->debugreg6 |= (DR_TRAP0 << i);
 +}
  
 -      switch (n) {
 -      case 0:         child->thread.debugreg0 = data; break;
 -      case 1:         child->thread.debugreg1 = data; break;
 -      case 2:         child->thread.debugreg2 = data; break;
 -      case 3:         child->thread.debugreg3 = data; break;
 +/*
 + * Handle ptrace writes to debug register 7.
 + */
 +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 +{
 +      struct thread_struct *thread = &(tsk->thread);
 +      unsigned long old_dr7 = thread->debugreg7;
 +      int i, orig_ret = 0, rc = 0;
 +      int enabled, second_pass = 0;
 +      unsigned len, type;
 +      struct hw_breakpoint *bp;
 +
 +      data &= ~DR_CONTROL_RESERVED;
 +restore:
 +      /*
 +       * Loop through all the hardware breakpoints, making the
 +       * appropriate changes to each.
 +       */
 +      for (i = 0; i < HBP_NUM; i++) {
 +              enabled = decode_dr7(data, i, &len, &type);
 +              bp = thread->hbp[i];
 +
 +              if (!enabled) {
 +                      if (bp) {
 +                              /* Don't unregister the breakpoints right-away,
 +                               * unless all register_user_hw_breakpoint()
 +                               * requests have succeeded. This prevents
 +                               * any window of opportunity for debug
 +                               * register grabbing by other users.
 +                               */
 +                              if (!second_pass)
 +                                      continue;
 +                              unregister_user_hw_breakpoint(tsk, bp);
 +                              kfree(bp);
 +                      }
 +                      continue;
 +              }
 +              if (!bp) {
 +                      rc = -ENOMEM;
 +                      bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
 +                      if (bp) {
 +                              bp->info.address = thread->debugreg[i];
 +                              bp->triggered = ptrace_triggered;
 +                              bp->info.len = len;
 +                              bp->info.type = type;
 +                              rc = register_user_hw_breakpoint(tsk, bp);
 +                              if (rc)
 +                                      kfree(bp);
 +                      }
 +              } else
 +                      rc = modify_user_hw_breakpoint(tsk, bp);
 +              if (rc)
 +                      break;
 +      }
 +      /*
 +       * Make a second pass to free the remaining unused breakpoints
 +       * or to restore the original breakpoints if an error occurred.
 +       */
 +      if (!second_pass) {
 +              second_pass = 1;
 +              if (rc < 0) {
 +                      orig_ret = rc;
 +                      data = old_dr7;
 +              }
 +              goto restore;
 +      }
 +      return ((orig_ret < 0) ? orig_ret : rc);
 +}
  
 -      case 6:
 -              if ((data & ~0xffffffffUL) != 0)
 -                      return -EIO;
 -              child->thread.debugreg6 = data;
 -              break;
 +/*
 + * Handle PTRACE_PEEKUSR calls for the debug register area.
 + */
 +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 +{
 +      struct thread_struct *thread = &(tsk->thread);
 +      unsigned long val = 0;
 +
 +      if (n < HBP_NUM)
 +              val = thread->debugreg[n];
 +      else if (n == 6)
 +              val = thread->debugreg6;
 +      else if (n == 7)
 +              val = thread->debugreg7;
 +      return val;
 +}
  
 -      case 7:
 -              /*
 -               * Sanity-check data. Take one half-byte at once with
 -               * check = (val >> (16 + 4*i)) & 0xf. It contains the
 -               * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
 -               * 2 and 3 are LENi. Given a list of invalid values,
 -               * we do mask |= 1 << invalid_value, so that
 -               * (mask >> check) & 1 is a correct test for invalid
 -               * values.
 -               *
 -               * R/Wi contains the type of the breakpoint /
 -               * watchpoint, LENi contains the length of the watched
 -               * data in the watchpoint case.
 -               *
 -               * The invalid values are:
 -               * - LENi == 0x10 (undefined), so mask |= 0x0f00.       [32-bit]
 -               * - R/Wi == 0x10 (break on I/O reads or writes), so
 -               *   mask |= 0x4444.
 -               * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
 -               *   0x1110.
 -               *
 -               * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
 -               *
 -               * See the Intel Manual "System Programming Guide",
 -               * 15.2.4
 -               *
 -               * Note that LENi == 0x10 is defined on x86_64 in long
 -               * mode (i.e. even for 32-bit userspace software, but
 -               * 64-bit kernel), so the x86_64 mask value is 0x5454.
 -               * See the AMD manual no. 24593 (AMD64 System Programming)
 -               */
 -#ifdef CONFIG_X86_32
 -#define       DR7_MASK        0x5f54
 -#else
 -#define       DR7_MASK        0x5554
 -#endif
 -              data &= ~DR_CONTROL_RESERVED;
 -              for (i = 0; i < 4; i++)
 -                      if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
 -                              return -EIO;
 -              child->thread.debugreg7 = data;
 -              if (data)
 -                      set_tsk_thread_flag(child, TIF_DEBUG);
 -              else
 -                      clear_tsk_thread_flag(child, TIF_DEBUG);
 -              break;
 +/*
 + * Handle PTRACE_POKEUSR calls for the debug register area.
 + */
 +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
 +{
 +      struct thread_struct *thread = &(tsk->thread);
 +      int rc = 0;
 +
 +      /* There are no DR4 or DR5 registers */
 +      if (n == 4 || n == 5)
 +              return -EIO;
 +
 +      if (n == 6) {
 +              tsk->thread.debugreg6 = val;
 +              goto ret_path;
        }
 +      if (n < HBP_NUM) {
 +              if (thread->hbp[n]) {
 +                      if (arch_check_va_in_userspace(val,
 +                                      thread->hbp[n]->info.len) == 0) {
 +                              rc = -EIO;
 +                              goto ret_path;
 +                      }
 +                      thread->hbp[n]->info.address = val;
 +              }
 +              thread->debugreg[n] = val;
 +      }
 +      /* All that's left is DR7 */
 +      if (n == 7)
 +              rc = ptrace_write_dr7(tsk, val);
  
 -      return 0;
 +ret_path:
 +      return rc;
  }
  
  /*
@@@ -1548,8 -1498,8 +1549,8 @@@ asmregparm long syscall_trace_enter(str
            tracehook_report_syscall_entry(regs))
                ret = -1L;
  
-       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-               ftrace_syscall_enter(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_enter(regs, regs->orig_ax);
  
        if (unlikely(current->audit_context)) {
                if (IS_IA32)
@@@ -1574,8 -1524,8 +1575,8 @@@ asmregparm void syscall_trace_leave(str
        if (unlikely(current->audit_context))
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
  
-       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-               ftrace_syscall_exit(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_exit(regs, regs->ax);
  
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                tracehook_report_syscall_exit(regs, 0);
diff --combined arch/x86/kernel/traps.c
@@@ -54,6 -54,7 +54,7 @@@
  #include <asm/traps.h>
  #include <asm/desc.h>
  #include <asm/i387.h>
+ #include <asm/mce.h>
  
  #include <asm/mach_traps.h>
  
@@@ -65,8 -66,6 +66,6 @@@
  #include <asm/setup.h>
  #include <asm/traps.h>
  
- #include "cpu/mcheck/mce.h"
  asmlinkage int system_call(void);
  
  /* Do we ignore FPU interrupts ? */
@@@ -347,6 -346,9 +346,9 @@@ io_check_error(unsigned char reason, st
        printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
        show_registers(regs);
  
+       if (panic_on_io_nmi)
+               panic("NMI IOCK error: Not continuing");
        /* Re-enable the IOCK line, wait for a few seconds */
        reason = (reason & 0xf) | 8;
        outb(reason, 0x61);
@@@ -530,56 -532,77 +532,56 @@@ asmlinkage __kprobes struct pt_regs *sy
  dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
  {
        struct task_struct *tsk = current;
 -      unsigned long condition;
 +      unsigned long dr6;
        int si_code;
  
 -      get_debugreg(condition, 6);
 +      get_debugreg(dr6, 6);
  
        /* Catch kmemcheck conditions first of all! */
 -      if (condition & DR_STEP && kmemcheck_trap(regs))
 +      if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
                return;
  
 +      /* DR6 may or may not be cleared by the CPU */
 +      set_debugreg(0, 6);
        /*
         * The processor cleared BTF, so don't mark that we need it set.
         */
        clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
        tsk->thread.debugctlmsr = 0;
  
 -      if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 -                                              SIGTRAP) == NOTIFY_STOP)
 +      /* Store the virtualized DR6 value */
 +      tsk->thread.debugreg6 = dr6;
 +
 +      if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
 +                                                      SIGTRAP) == NOTIFY_STOP)
                return;
  
        /* It's safe to allow irq's after DR6 has been saved */
        preempt_conditional_sti(regs);
  
 -      /* Mask out spurious debug traps due to lazy DR7 setting */
 -      if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 -              if (!tsk->thread.debugreg7)
 -                      goto clear_dr7;
 +      if (regs->flags & X86_VM_MASK) {
 +              handle_vm86_trap((struct kernel_vm86_regs *) regs,
 +                              error_code, 1);
 +              return;
        }
  
 -#ifdef CONFIG_X86_32
 -      if (regs->flags & X86_VM_MASK)
 -              goto debug_vm86;
 -#endif
 -
 -      /* Save debug status register where ptrace can see it */
 -      tsk->thread.debugreg6 = condition;
 -
        /*
 -       * Single-stepping through TF: make sure we ignore any events in
 -       * kernel space (but re-enable TF when returning to user mode).
 +       * Single-stepping through system calls: ignore any exceptions in
 +       * kernel space, but re-enable TF when returning to user mode.
 +       *
 +       * We already checked v86 mode above, so we can check for kernel mode
 +       * by just checking the CPL of CS.
         */
 -      if (condition & DR_STEP) {
 -              if (!user_mode(regs))
 -                      goto clear_TF_reenable;
 +      if ((dr6 & DR_STEP) && !user_mode(regs)) {
 +              tsk->thread.debugreg6 &= ~DR_STEP;
 +              set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 +              regs->flags &= ~X86_EFLAGS_TF;
        }
 -
 -      si_code = get_si_code(condition);
 -      /* Ok, finally something we can handle */
 -      send_sigtrap(tsk, regs, error_code, si_code);
 -
 -      /*
 -       * Disable additional traps. They'll be re-enabled when
 -       * the signal is delivered.
 -       */
 -clear_dr7:
 -      set_debugreg(0, 7);
 +      si_code = get_si_code(tsk->thread.debugreg6);
 +      if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
 +              send_sigtrap(tsk, regs, error_code, si_code);
        preempt_conditional_cli(regs);
 -      return;
  
 -#ifdef CONFIG_X86_32
 -debug_vm86:
 -      /* reenable preemption: handle_vm86_trap() might sleep */
 -      dec_preempt_count();
 -      handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
 -      conditional_cli(regs);
 -      return;
 -#endif
 -
 -clear_TF_reenable:
 -      set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 -      regs->flags &= ~X86_EFLAGS_TF;
 -      preempt_conditional_cli(regs);
        return;
  }
  
diff --combined arch/x86/power/cpu.c
@@@ -18,7 -18,6 +18,7 @@@
  #include <asm/mce.h>
  #include <asm/xcr.h>
  #include <asm/suspend.h>
 +#include <asm/debugreg.h>
  
  #ifdef CONFIG_X86_32
  static struct saved_context saved_context;
@@@ -105,7 -104,6 +105,7 @@@ static void __save_processor_state(stru
        ctxt->cr4 = read_cr4();
        ctxt->cr8 = read_cr8();
  #endif
 +      hw_breakpoint_disable();
  }
  
  /* Needed by apm.c */
@@@ -148,7 -146,27 +148,7 @@@ static void fix_processor_context(void
        /*
         * Now maybe reload the debug registers
         */
 -      if (current->thread.debugreg7) {
 -#ifdef CONFIG_X86_32
 -              set_debugreg(current->thread.debugreg0, 0);
 -              set_debugreg(current->thread.debugreg1, 1);
 -              set_debugreg(current->thread.debugreg2, 2);
 -              set_debugreg(current->thread.debugreg3, 3);
 -              /* no 4 and 5 */
 -              set_debugreg(current->thread.debugreg6, 6);
 -              set_debugreg(current->thread.debugreg7, 7);
 -#else
 -              /* CONFIG_X86_64 */
 -              loaddebug(&current->thread, 0);
 -              loaddebug(&current->thread, 1);
 -              loaddebug(&current->thread, 2);
 -              loaddebug(&current->thread, 3);
 -              /* no 4 and 5 */
 -              loaddebug(&current->thread, 6);
 -              loaddebug(&current->thread, 7);
 -#endif
 -      }
 -
 +      load_debug_registers();
  }
  
  /**
@@@ -226,7 -244,7 +226,7 @@@ static void __restore_processor_state(s
        do_fpu_end();
        mtrr_ap_init();
  
- #ifdef CONFIG_X86_32
+ #ifdef CONFIG_X86_OLD_MCE
        mcheck_init(&boot_cpu_data);
  #endif
  }
diff --combined kernel/Makefile
@@@ -69,8 -69,9 +69,9 @@@ obj-$(CONFIG_IKCONFIG) += configs.
  obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
  obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
  obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
- obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
+ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
  obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_GCOV_KERNEL) += gcov/
  obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
  obj-$(CONFIG_KPROBES) += kprobes.o
  obj-$(CONFIG_KGDB) += kgdb.o
@@@ -95,9 -96,9 +96,10 @@@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT
  obj-$(CONFIG_FUNCTION_TRACER) += trace/
  obj-$(CONFIG_TRACING) += trace/
  obj-$(CONFIG_X86_DS) += trace/
+ obj-$(CONFIG_RING_BUFFER) += trace/
  obj-$(CONFIG_SMP) += sched_cpupri.o
  obj-$(CONFIG_SLOW_WORK) += slow-work.o
 +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
  obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
  
  ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --combined kernel/trace/Kconfig
@@@ -18,6 -18,13 +18,13 @@@ config HAVE_FUNCTION_TRACE
  config HAVE_FUNCTION_GRAPH_TRACER
        bool
  
+ config HAVE_FUNCTION_GRAPH_FP_TEST
+       bool
+       help
+        An arch may pass in a unique value (frame pointer) to both the
+        entering and exiting of a function. On exit, the value is compared
+        and if it does not match, then it will panic the kernel.
  config HAVE_FUNCTION_TRACE_MCOUNT_TEST
        bool
        help
@@@ -34,7 -41,7 +41,7 @@@ config HAVE_FTRACE_MCOUNT_RECOR
  config HAVE_HW_BRANCH_TRACER
        bool
  
- config HAVE_FTRACE_SYSCALLS
+ config HAVE_SYSCALL_TRACEPOINTS
        bool
  
  config TRACER_MAX_TRACE
@@@ -53,9 -60,14 +60,14 @@@ config EVENT_TRACIN
        bool
  
  config CONTEXT_SWITCH_TRACER
-       select MARKERS
        bool
  
+ config RING_BUFFER_ALLOW_SWAP
+       bool
+       help
+        Allow the use of ring_buffer_swap_cpu.
+        Adds a very slight overhead to tracing when enabled.
  # All tracer options should select GENERIC_TRACER. For those options that are
  # enabled by all tracers (context switch and event tracer) they select TRACING.
  # This allows those options to appear when no other tracer is selected. But the
@@@ -121,6 -133,7 +133,7 @@@ config FUNCTION_GRAPH_TRACE
        bool "Kernel Function Graph Tracer"
        depends on HAVE_FUNCTION_GRAPH_TRACER
        depends on FUNCTION_TRACER
+       depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
        default y
        help
          Enable the kernel to trace a function at both its return
@@@ -139,6 -152,7 +152,7 @@@ config IRQSOFF_TRACE
        select TRACE_IRQFLAGS
        select GENERIC_TRACER
        select TRACER_MAX_TRACE
+       select RING_BUFFER_ALLOW_SWAP
        help
          This option measures the time spent in irqs-off critical
          sections, with microsecond accuracy.
@@@ -160,6 -174,7 +174,7 @@@ config PREEMPT_TRACE
        depends on PREEMPT
        select GENERIC_TRACER
        select TRACER_MAX_TRACE
+       select RING_BUFFER_ALLOW_SWAP
        help
          This option measures the time spent in preemption off critical
          sections, with microsecond accuracy.
@@@ -203,7 -218,7 +218,7 @@@ config ENABLE_DEFAULT_TRACER
  
  config FTRACE_SYSCALLS
        bool "Trace syscalls"
-       depends on HAVE_FTRACE_SYSCALLS
+       depends on HAVE_SYSCALL_TRACEPOINTS
        select GENERIC_TRACER
        select KALLSYMS
        help
@@@ -218,13 -233,13 +233,13 @@@ config BOOT_TRACE
          the timings of the initcalls and traces key events and the identity
          of tasks that can cause boot delays, such as context-switches.
  
-         Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+         Its aim is to be parsed by the scripts/bootgraph.pl tool to
          produce pretty graphics about boot inefficiencies, giving a visual
          representation of the delays during initcalls - but the raw
          /debug/tracing/trace text output is readable too.
  
-         You must pass in ftrace=initcall to the kernel command line
-         to enable this on bootup.
+         You must pass in initcall_debug and ftrace=initcall to the kernel
+         command line to enable this on bootup.
  
  config TRACE_BRANCH_PROFILING
        bool
@@@ -314,27 -329,6 +329,27 @@@ config POWER_TRACE
          power management decisions, specifically the C-state and P-state
          behavior.
  
 +config KSYM_TRACER
 +      bool "Trace read and write access on kernel memory locations"
 +      depends on HAVE_HW_BREAKPOINT
 +      select TRACING
 +      help
 +        This tracer helps find read and write operations on any given kernel
 +        symbol i.e. /proc/kallsyms.
 +
 +config PROFILE_KSYM_TRACER
 +      bool "Profile all kernel memory accesses on 'watched' variables"
 +      depends on KSYM_TRACER
 +      help
 +        This tracer profiles kernel accesses on variables watched through the
 +        ksym tracer ftrace plugin. Depending upon the hardware, all read
 +        and write operations on kernel variables can be monitored for
 +        accesses.
 +
 +        The results will be displayed in:
 +        /debugfs/tracing/profile_ksym
 +
 +        Say N if unsure.
  
  config STACK_TRACER
        bool "Trace max stack"
diff --combined kernel/trace/trace.h
  #include <linux/trace_seq.h>
  #include <linux/ftrace_event.h>
  
 +#ifdef CONFIG_KSYM_TRACER
 +#include <asm/hw_breakpoint.h>
 +#endif
 +
  enum trace_type {
        __TRACE_FIRST_TYPE = 0,
  
        TRACE_GRAPH_ENT,
        TRACE_USER_STACK,
        TRACE_HW_BRANCHES,
-       TRACE_SYSCALL_ENTER,
-       TRACE_SYSCALL_EXIT,
        TRACE_KMEM_ALLOC,
        TRACE_KMEM_FREE,
        TRACE_POWER,
        TRACE_BLK,
 +      TRACE_KSYM,
  
        __TRACE_LAST_TYPE,
  };
@@@ -212,16 -205,6 +210,16 @@@ struct syscall_trace_exit 
        unsigned long           ret;
  };
  
 +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
 +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
 +
 +struct ksym_trace_entry {
 +      struct trace_entry      ent;
 +      unsigned long           ip;
 +      unsigned char           type;
 +      char                    ksym_name[KSYM_NAME_LEN];
 +      char                    cmd[TASK_COMM_LEN];
 +};
  
  /*
   * trace_flag_type is an enumeration that holds different
@@@ -251,9 -234,6 +249,6 @@@ struct trace_array_cpu 
        atomic_t                disabled;
        void                    *buffer_page;   /* ring buffer spare */
  
-       /* these fields get copied into max-trace: */
-       unsigned long           trace_idx;
-       unsigned long           overrun;
        unsigned long           saved_latency;
        unsigned long           critical_start;
        unsigned long           critical_end;
        unsigned long           nice;
        unsigned long           policy;
        unsigned long           rt_priority;
+       unsigned long           skipped_entries;
        cycle_t                 preempt_timestamp;
        pid_t                   pid;
        uid_t                   uid;
@@@ -334,11 -315,6 +330,7 @@@ extern void __ftrace_bad_type(void)
                          TRACE_KMEM_ALLOC);    \
                IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                          TRACE_KMEM_FREE);     \
-               IF_ASSIGN(var, ent, struct syscall_trace_enter,         \
-                         TRACE_SYSCALL_ENTER);                         \
-               IF_ASSIGN(var, ent, struct syscall_trace_exit,          \
-                         TRACE_SYSCALL_EXIT);                          \
 +              IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
                __ftrace_bad_type();                                    \
        } while (0)
  
@@@ -439,12 -415,13 +431,13 @@@ void init_tracer_sysprof_debugfs(struc
  
  struct ring_buffer_event;
  
- struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-                                                   int type,
-                                                   unsigned long len,
-                                                   unsigned long flags,
-                                                   int pc);
- void trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer_event *
+ trace_buffer_lock_reserve(struct ring_buffer *buffer,
+                         int type,
+                         unsigned long len,
+                         unsigned long flags,
+                         int pc);
+ void trace_buffer_unlock_commit(struct ring_buffer *buffer,
                                struct ring_buffer_event *event,
                                unsigned long flags, int pc);
  
@@@ -454,10 -431,6 +447,6 @@@ struct trace_entry *tracing_get_trace_e
  struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
                                          int *ent_cpu, u64 *ent_ts);
  
- void tracing_generic_entry_update(struct trace_entry *entry,
-                                 unsigned long flags,
-                                 int pc);
  void default_wait_pipe(struct trace_iterator *iter);
  void poll_wait_pipe(struct trace_iterator *iter);
  
@@@ -487,6 -460,7 +476,7 @@@ void trace_function(struct trace_array 
  
  void trace_graph_return(struct ftrace_graph_ret *trace);
  int trace_graph_entry(struct ftrace_graph_ent *trace);
+ void set_graph_array(struct trace_array *tr);
  
  void tracing_start_cmdline_record(void);
  void tracing_stop_cmdline_record(void);
@@@ -498,16 -472,40 +488,40 @@@ void unregister_tracer(struct tracer *t
  
  extern unsigned long nsecs_to_usecs(unsigned long nsecs);
  
+ #ifdef CONFIG_TRACER_MAX_TRACE
  extern unsigned long tracing_max_latency;
  extern unsigned long tracing_thresh;
  
  void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
  void update_max_tr_single(struct trace_array *tr,
                          struct task_struct *tsk, int cpu);
+ #endif /* CONFIG_TRACER_MAX_TRACE */
+ #ifdef CONFIG_STACKTRACE
+ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
+                       int skip, int pc);
+ void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
+                           int pc);
  
- void __trace_stack(struct trace_array *tr,
-                  unsigned long flags,
-                  int skip, int pc);
+ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+                  int pc);
+ #else
+ static inline void ftrace_trace_stack(struct trace_array *tr,
+                                     unsigned long flags, int skip, int pc)
+ {
+ }
+ static inline void ftrace_trace_userstack(struct trace_array *tr,
+                                         unsigned long flags, int pc)
+ {
+ }
+ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
+                                int skip, int pc)
+ {
+ }
+ #endif /* CONFIG_STACKTRACE */
  
  extern cycle_t ftrace_now(int cpu);
  
@@@ -533,6 -531,10 +547,10 @@@ extern unsigned long ftrace_update_tot_
  extern int DYN_FTRACE_TEST_NAME(void);
  #endif
  
+ extern int ring_buffer_expanded;
+ extern bool tracing_selftest_disabled;
+ DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
  #ifdef CONFIG_FTRACE_STARTUP_TEST
  extern int trace_selftest_startup_function(struct tracer *trace,
                                           struct trace_array *tr);
@@@ -556,8 -558,6 +574,8 @@@ extern int trace_selftest_startup_branc
                                         struct trace_array *tr);
  extern int trace_selftest_startup_hw_branches(struct tracer *trace,
                                              struct trace_array *tr);
 +extern int trace_selftest_startup_ksym(struct tracer *trace,
 +                                       struct trace_array *tr);
  #endif /* CONFIG_FTRACE_STARTUP_TEST */
  
  extern void *head_page(struct trace_array_cpu *data);
@@@ -566,9 -566,16 +584,16 @@@ extern in
  trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
  extern int
  trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+ extern int
+ trace_array_vprintk(struct trace_array *tr,
+                   unsigned long ip, const char *fmt, va_list args);
+ int trace_array_printk(struct trace_array *tr,
+                      unsigned long ip, const char *fmt, ...);
  
  extern unsigned long trace_flags;
  
+ extern int trace_clock_id;
  /* Standard output formatting function used for function return traces */
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  extern enum print_line_t print_graph_function(struct trace_iterator *iter);
@@@ -615,6 -622,7 +640,7 @@@ print_graph_function(struct trace_itera
  
  extern struct pid *ftrace_pid_trace;
  
+ #ifdef CONFIG_FUNCTION_TRACER
  static inline int ftrace_trace_task(struct task_struct *task)
  {
        if (!ftrace_pid_trace)
  
        return test_tsk_trace_trace(task);
  }
+ #else
+ static inline int ftrace_trace_task(struct task_struct *task)
+ {
+       return 1;
+ }
+ #endif
  
  /*
   * trace_iterator_flags is an enumeration that defines bit
@@@ -650,9 -664,8 +682,8 @@@ enum trace_iterator_flags 
        TRACE_ITER_PRINTK_MSGONLY       = 0x10000,
        TRACE_ITER_CONTEXT_INFO         = 0x20000, /* Print pid/cpu/time */
        TRACE_ITER_LATENCY_FMT          = 0x40000,
-       TRACE_ITER_GLOBAL_CLK           = 0x80000,
-       TRACE_ITER_SLEEP_TIME           = 0x100000,
-       TRACE_ITER_GRAPH_TIME           = 0x200000,
+       TRACE_ITER_SLEEP_TIME           = 0x80000,
+       TRACE_ITER_GRAPH_TIME           = 0x100000,
  };
  
  /*
@@@ -749,6 -762,7 +780,7 @@@ struct ftrace_event_field 
        struct list_head        link;
        char                    *name;
        char                    *type;
+       int                     filter_type;
        int                     offset;
        int                     size;
        int                     is_signed;
@@@ -758,13 -772,15 +790,15 @@@ struct event_filter 
        int                     n_preds;
        struct filter_pred      **preds;
        char                    *filter_string;
+       bool                    no_reset;
  };
  
  struct event_subsystem {
        struct list_head        list;
        const char              *name;
        struct dentry           *entry;
-       void                    *filter;
+       struct event_filter     *filter;
+       int                     nr_events;
  };
  
  struct filter_pred;
@@@ -792,6 -808,7 +826,7 @@@ extern int apply_subsystem_event_filter
                                        char *filter_string);
  extern void print_subsystem_event_filter(struct event_subsystem *system,
                                         struct trace_seq *s);
+ extern int filter_assign_type(const char *type);
  
  static inline int
  filter_check_discard(struct ftrace_event_call *call, void *rec,
@@@ -17,7 -17,6 +17,7 @@@ static inline int trace_valid_entry(str
        case TRACE_GRAPH_ENT:
        case TRACE_GRAPH_RET:
        case TRACE_HW_BRANCHES:
 +      case TRACE_KSYM:
                return 1;
        }
        return 0;
@@@ -289,6 -288,7 +289,7 @@@ trace_selftest_startup_function_graph(s
         * to detect and recover from possible hangs
         */
        tracing_reset_online_cpus(tr);
+       set_graph_array(tr);
        ret = register_ftrace_graph(&trace_graph_return,
                                    &trace_graph_entry_watchdog);
        if (ret) {
@@@ -808,55 -808,3 +809,55 @@@ trace_selftest_startup_hw_branches(stru
        return ret;
  }
  #endif /* CONFIG_HW_BRANCH_TRACER */
 +
 +#ifdef CONFIG_KSYM_TRACER
 +static int ksym_selftest_dummy;
 +
 +int
 +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
 +{
 +      unsigned long count;
 +      int ret;
 +
 +      /* start the tracing */
 +      ret = tracer_init(trace, tr);
 +      if (ret) {
 +              warn_failed_init_tracer(trace, ret);
 +              return ret;
 +      }
 +
 +      ksym_selftest_dummy = 0;
 +      /* Register the read-write tracing request */
 +      ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
 +                                      (unsigned long)(&ksym_selftest_dummy));
 +
 +      if (ret < 0) {
 +              printk(KERN_CONT "ksym_trace read-write startup test failed\n");
 +              goto ret_path;
 +      }
 +      /* Perform a read and a write operation over the dummy variable to
 +       * trigger the tracer
 +       */
 +      if (ksym_selftest_dummy == 0)
 +              ksym_selftest_dummy++;
 +
 +      /* stop the tracing. */
 +      tracing_stop();
 +      /* check the trace buffer */
 +      ret = trace_test_buffer(tr, &count);
 +      trace->reset(tr);
 +      tracing_start();
 +
 +      /* read & write operations - one each is performed on the dummy variable
 +       * triggering two entries in the trace buffer
 +       */
 +      if (!ret && count != 2) {
 +              printk(KERN_CONT "Ksym tracer startup test failed");
 +              ret = -1;
 +      }
 +
 +ret_path:
 +      return ret;
 +}
 +#endif /* CONFIG_KSYM_TRACER */
 +