From: Ingo Molnar Date: Wed, 17 Jun 2009 10:52:15 +0000 (+0200) Subject: Merge branch 'linus' into tracing/hw-breakpoints X-Git-Url: http://git.alex.org.uk Merge branch 'linus' into tracing/hw-breakpoints Conflicts: arch/x86/Kconfig arch/x86/kernel/traps.c arch/x86/power/cpu.c arch/x86/power/cpu_32.c kernel/Makefile Semantic conflict: arch/x86/kernel/hw_breakpoint.c Merge reason: Resolve the conflicts, move from put_cpu_no_sched() to put_cpu() in arch/x86/kernel/hw_breakpoint.c. Signed-off-by: Ingo Molnar --- eadb8a091b27a840de7450f84ecff5ef13476424 diff --cc arch/x86/Kconfig index 3033375,cf42fc3..52421d5 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -46,7 -46,12 +46,13 @@@ config X8 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT + select HAVE_ARCH_KMEMCHECK + + config OUTPUT_FORMAT + string + default "elf32-i386" if X86_32 + default "elf64-x86-64" if X86_64 config ARCH_DEFCONFIG string diff --cc arch/x86/include/asm/processor.h index 448b34a,c776826..2b03f70 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@@ -428,15 -425,20 +426,19 @@@ struct thread_struct unsigned short fsindex; unsigned short gsindex; #endif + #ifdef CONFIG_X86_32 unsigned long ip; + #endif + #ifdef CONFIG_X86_64 unsigned long fs; + #endif unsigned long gs; /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; + unsigned long debugreg[HBP_NUM]; unsigned long debugreg6; unsigned long debugreg7; + /* Hardware breakpoint info */ + struct hw_breakpoint *hbp[HBP_NUM]; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --cc arch/x86/kernel/hw_breakpoint.c index 6945147,0000000..51d9595 mode 100644,000000..100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@@ -1,391 -1,0 +1,391 @@@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Unmasked kernel DR7 value */ +static unsigned long kdr7; + +/* + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. + * Used to clear and verify the status of bits corresponding to DR0 - DR3 + */ +static const unsigned long dr7_masks[HBP_NUM] = { + 0x000f0003, /* LEN0, R/W0, G0, L0 */ + 0x00f0000c, /* LEN1, R/W1, G1, L1 */ + 0x0f000030, /* LEN2, R/W2, G2, L2 */ + 0xf00000c0 /* LEN3, R/W3, G3, L3 */ +}; + + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | + DR_GLOBAL_SLOWDOWN; + return bp_info; +} + +void arch_update_kernel_hw_breakpoint(void *unused) +{ + struct hw_breakpoint *bp; + int i, cpu = get_cpu(); + unsigned long temp_kdr7 = 0; + + /* Don't allow debug exceptions while we update the registers */ + set_debugreg(0UL, 7); + + for (i = hbp_kernel_pos; i < HBP_NUM; i++) { + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; + if (bp) { + temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); + set_debugreg(bp->info.address, i); + } + } + + /* No need to set DR6. Update the debug registers with kernel-space + * breakpoint values from kdr7 and user-space requests from the + * current process + */ + kdr7 = temp_kdr7; + set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu_no_resched(); ++ put_cpu(); +} + +/* + * Install the thread breakpoints in their debug registers. + */ +void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + + switch (hbp_kernel_pos) { + case 4: + set_debugreg(thread->debugreg[3], 3); + case 3: + set_debugreg(thread->debugreg[2], 2); + case 2: + set_debugreg(thread->debugreg[1], 1); + case 1: + set_debugreg(thread->debugreg[0], 0); + default: + break; + } + + /* No need to set DR6 */ + set_debugreg((kdr7 | thread->debugreg7), 7); +} + +/* + * Install the debug register values for just the kernel, no thread. + */ +void arch_uninstall_thread_hw_breakpoint() +{ + /* Clear the user-space portion of debugreg7 by setting only kdr7 */ + set_debugreg(kdr7, 7); + +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case HW_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case HW_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case HW_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +{ + /* + * User-space requests will always have the address field populated + * Symbol names from user-space are rejected + */ + if (tsk && bp->info.name) + return -EINVAL; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (bp->info.name) + bp->info.address = (unsigned long) + kallsyms_lookup_name(bp->info.name); + if (bp->info.address) + return 0; + return -EINVAL; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk) +{ + unsigned int align; + int ret = -EINVAL; + + switch (bp->info.type) { + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + case HW_BREAKPOINT_EXECUTE: + if ((!arch_check_va_in_userspace(bp->info.address, + bp->info.len)) && + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) + return ret; + break; + case HW_BREAKPOINT_WRITE: + break; + case HW_BREAKPOINT_RW: + break; + default: + return ret; + } + + switch (bp->info.len) { + case HW_BREAKPOINT_LEN_1: + align = 0; + break; + case HW_BREAKPOINT_LEN_2: + align = 1; + break; + case HW_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->triggered) + ret = arch_store_info(bp, tsk); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (bp->info.address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(bp->info.address, + bp->info.len)) + return -EFAULT; + } + return 0; +} + +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + struct hw_breakpoint *bp = thread->hbp[pos]; + + thread->debugreg7 &= ~dr7_masks[pos]; + if (bp) { + thread->debugreg[pos] = bp->info.address; + thread->debugreg7 |= encode_dr7(pos, bp->info.len, + bp->info.type); + } else + thread->debugreg[pos] = 0; +} + +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + thread->debugreg7 = 0; + for (i = 0; i < HBP_NUM; i++) + thread->debugreg[i] = 0; +} + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct hw_breakpoint *bp; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + /* Lazy debug register switching */ + if (!test_tsk_thread_flag(current, TIF_DEBUG)) + arch_uninstall_thread_hw_breakpoint(); + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + /* + * Find the corresponding hw_breakpoint structure and + * invoke its triggered callback. + */ + if (i >= hbp_kernel_pos) + bp = per_cpu(this_hbp_kernel[i], cpu); + else { + bp = current->thread.hbp[i]; + if (bp) + rc = NOTIFY_DONE; + } + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* + * bp can be NULL due to lazy debug register switching + * or due to the delay between updates of hbp_kernel_pos + * and this_hbp_kernel. + */ + if (!bp) + continue; + + (bp->triggered)(bp, args->regs); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); - put_cpu_no_resched(); ++ put_cpu(); + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} diff --cc arch/x86/kernel/traps.c index 124a4d5,5f935f0..286d64e --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@@ -529,13 -530,15 +530,17 @@@ asmlinkage __kprobes struct pt_regs *sy dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); + /* Catch kmemcheck conditions first of all! */ - if (condition & DR_STEP && kmemcheck_trap(regs)) ++ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) + return; + + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ diff --cc arch/x86/power/cpu.c index 0000000,d277ef1..394cbb8 mode 000000,100644..100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@@ -1,0 -1,259 +1,241 @@@ + /* + * Suspend support specific for i386/x86-64. + * + * Distribute under GPLv2 + * + * Copyright (c) 2007 Rafael J. Wysocki + * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2001 Patrick Mochel + */ + + #include + #include + + #include + #include + #include + #include + #include + #include + #include ++#include + + #ifdef CONFIG_X86_32 + static struct saved_context saved_context; + + unsigned long saved_context_ebx; + unsigned long saved_context_esp, saved_context_ebp; + unsigned long saved_context_esi, saved_context_edi; + unsigned long saved_context_eflags; + #else + /* CONFIG_X86_64 */ + struct saved_context saved_context; + #endif + + /** + * __save_processor_state - save CPU registers before creating a + * hibernation image and before restoring the memory state from it + * @ctxt - structure to store the registers contents in + * + * NOTE: If there is a CPU register the modification of which by the + * boot kernel (ie. the kernel used for loading the hibernation image) + * might affect the operations of the restored target kernel (ie. the one + * saved in the hibernation image), then its contents must be saved by this + * function. In other words, if kernel A is hibernated and different + * kernel B is used for loading the hibernation image into memory, the + * kernel A's __save_processor_state() function must save all registers + * needed by kernel A, so that it can operate correctly after the resume + * regardless of what kernel B does in the meantime. + */ + static void __save_processor_state(struct saved_context *ctxt) + { + #ifdef CONFIG_X86_32 + mtrr_save_fixed_ranges(NULL); + #endif + kernel_fpu_begin(); + + /* + * descriptor tables + */ + #ifdef CONFIG_X86_32 + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); + #else + /* CONFIG_X86_64 */ + store_gdt((struct desc_ptr *)&ctxt->gdt_limit); + store_idt((struct desc_ptr *)&ctxt->idt_limit); + #endif + store_tr(ctxt->tr); + + /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ + /* + * segment registers + */ + #ifdef CONFIG_X86_32 + savesegment(es, ctxt->es); + savesegment(fs, ctxt->fs); + savesegment(gs, ctxt->gs); + savesegment(ss, ctxt->ss); + #else + /* CONFIG_X86_64 */ + asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); + asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); + asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); + asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); + asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); + + rdmsrl(MSR_FS_BASE, ctxt->fs_base); + rdmsrl(MSR_GS_BASE, ctxt->gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + mtrr_save_fixed_ranges(NULL); + + rdmsrl(MSR_EFER, ctxt->efer); + #endif + + /* + * control registers + */ + ctxt->cr0 = read_cr0(); + ctxt->cr2 = read_cr2(); + ctxt->cr3 = read_cr3(); + #ifdef CONFIG_X86_32 + ctxt->cr4 = read_cr4_safe(); + #else + /* CONFIG_X86_64 */ + ctxt->cr4 = read_cr4(); + ctxt->cr8 = read_cr8(); + #endif ++ hw_breakpoint_disable(); + } + + /* Needed by apm.c */ + void save_processor_state(void) + { + __save_processor_state(&saved_context); + } + #ifdef CONFIG_X86_32 + EXPORT_SYMBOL(save_processor_state); + #endif + + static void do_fpu_end(void) + { + /* + * Restore FPU regs if necessary. + */ + kernel_fpu_end(); + } + + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be + * necessary. But... This is necessary, because + * 386 hardware has concept of busy TSS or some + * similar stupidity. + */ + + #ifdef CONFIG_X86_64 + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + + syscall_init(); /* This sets MSR_*STAR and related */ + #endif + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ - if (current->thread.debugreg7) { -#ifdef CONFIG_X86_32 - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); -#else - /* CONFIG_X86_64 */ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); -#endif - } - ++ load_debug_registers(); + } + + /** + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + */ + static void __restore_processor_state(struct saved_context *ctxt) + { + /* + * control registers + */ + /* cr4 was introduced in the Pentium CPU */ + #ifdef CONFIG_X86_32 + if (ctxt->cr4) + write_cr4(ctxt->cr4); + #else + /* CONFIG X86_64 */ + wrmsrl(MSR_EFER, ctxt->efer); + write_cr8(ctxt->cr8); + write_cr4(ctxt->cr4); + #endif + write_cr3(ctxt->cr3); + write_cr2(ctxt->cr2); + write_cr0(ctxt->cr0); + + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + #ifdef CONFIG_X86_32 + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); + #else + /* CONFIG_X86_64 */ + load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); + load_idt((const struct desc_ptr *)&ctxt->idt_limit); + #endif + + /* + * segment registers + */ + #ifdef CONFIG_X86_32 + loadsegment(es, ctxt->es); + loadsegment(fs, ctxt->fs); + loadsegment(gs, ctxt->gs); + loadsegment(ss, ctxt->ss); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); + #else + /* CONFIG_X86_64 */ + asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); + asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); + asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); + load_gs_index(ctxt->gs); + asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); + + wrmsrl(MSR_FS_BASE, ctxt->fs_base); + wrmsrl(MSR_GS_BASE, ctxt->gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + #endif + + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + + fix_processor_context(); + + do_fpu_end(); + mtrr_ap_init(); + + #ifdef CONFIG_X86_32 + mcheck_init(&boot_cpu_data); + #endif + } + + /* Needed by apm.c */ + void restore_processor_state(void) + { + __restore_processor_state(&saved_context); + } + #ifdef CONFIG_X86_32 + EXPORT_SYMBOL(restore_processor_state); + #endif diff --cc kernel/Makefile index 18ad111,9df4501..f88decb --- a/kernel/Makefile +++ b/kernel/Makefile @@@ -96,7 -97,7 +97,8 @@@ obj-$(CONFIG_TRACING) += trace obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o + obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is