pernode one pool for each NUMA node (equivalent
to global on non-NUMA machines)
+ sunrpc.tcp_slot_table_entries=
+ sunrpc.udp_slot_table_entries=
+ [NFS,SUNRPC]
+ Sets the upper limit on the number of simultaneous
+ RPC calls that can be sent from the client to a
+ server. Increasing these values may allow you to
+ improve throughput, but will also increase the
+ amount of memory reserved for use by the client.
+
+ unsupported Allow loading of unsupported kernel modules:
+ 0 = only allow supported modules,
+ 1 = warn when loading unsupported modules,
+ 2 = don't warn.
+
+ CONFIG_ENTERPRISE_SUPPORT must be enabled for this
+ to have any effect.
+
swiotlb= [IA-64] Number of I/O TLB slabs
switches= [HW,M68k]
core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
core-$(CONFIG_KVM) += arch/ia64/kvm/
- core-$(CONFIG_PARAVIRT_XEN) += arch/ia64/xen/
+ core-$(CONFIG_XEN) += arch/ia64/xen/
+drivers-$(CONFIG_KDB) += arch/$(ARCH)/kdb/
drivers-$(CONFIG_PCI) += arch/ia64/pci/
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
extern void ia64_mca_cmc_vector_setup(void);
extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
extern void ia64_unreg_MCA_extension(void);
+extern int ia64_reg_CE_extension(int (*fn)(void *));
+extern void ia64_unreg_CE_extension(void);
extern unsigned long ia64_get_rnat(unsigned long *);
+ extern void ia64_set_psr_mc(void);
extern void ia64_mca_printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
if (!sos->monarch) {
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+
+ #ifdef CONFIG_KEXEC
+ while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
+ udelay(1000);
+ #else
while (monarch_cpu == -1)
- cpu_relax(); /* spin until monarch enters */
+ cpu_relax(); /* spin until monarch enters */
+ #endif
+#ifdef CONFIG_KDB
+ KDB_ENTER_SLAVE();
+ if (kdba_recalcitrant)
+ monarch_cpu = -1;
+#endif /* CONFIG_KDB */
NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
printf("%s", after);
}
+extern void debugger_syslog_data(char *syslog_data[4]);
+#define SYSLOG_WRAP(p) if (p < syslog_data[0]) p = syslog_data[1]-1; \
+ else if (p >= syslog_data[1]) p = syslog_data[0];
+
+static void xmon_show_dmesg(void)
+{
+ char *syslog_data[4], *start, *end, c;
+ int logsize;
+
+ /* syslog_data[0,1] physical start, end+1.
+ * syslog_data[2,3] logical start, end+1.
+ */
+ debugger_syslog_data(syslog_data);
+ if (syslog_data[2] == syslog_data[3])
+ return;
+ logsize = syslog_data[1] - syslog_data[0];
+ start = syslog_data[0] + (syslog_data[2] - syslog_data[0]) % logsize;
+ end = syslog_data[0] + (syslog_data[3] - syslog_data[0]) % logsize;
+
+ /* Do a line at a time (max 200 chars) to reduce overhead */
+ c = '\0';
+ while(1) {
+ char *p;
+ int chars = 0;
+ if (!*start) {
+ while (!*start) {
+ ++start;
+ SYSLOG_WRAP(start);
+ if (start == end)
+ break;
+ }
+ if (start == end)
+ break;
+ }
+ p = start;
+ while (*start && chars < 200) {
+ c = *start;
+ ++chars;
+ ++start;
+ SYSLOG_WRAP(start);
+ if (start == end || c == '\n')
+ break;
+ }
+ if (chars)
+ printf("%.*s", chars, p);
+ if (start == end)
+ break;
+ }
+ if (c != '\n')
+ printf("\n");
+}
+
- #ifdef CONFIG_PPC64
+ #ifdef CONFIG_PPC_BOOK3S_64
static void dump_slb(void)
{
int i;
If unsure, say N.
+config KDB
+ bool "Built-in Kernel Debugger support"
- depends on DEBUG_KERNEL && !XEN
++ depends on DEBUG_KERNEL
+ select KALLSYMS
+ select KALLSYMS_ALL
+ help
+ This option provides a built-in kernel debugger. The built-in
+ kernel debugger contains commands which allow memory to be examined,
+ instructions to be disassembled and breakpoints to be set. For details,
+ see Documentation/kdb/kdb.mm and the manual pages kdb_bt, kdb_ss, etc.
+ Kdb can also be used via the serial port. Set up the system to
+ have a serial console (see Documentation/serial-console.txt).
+ The key sequence <escape>KDB on the serial port will cause the
+ kernel debugger to be entered with input from the serial port and
+ output to the serial console. If unsure, say N.
+
+config KDB_MODULES
+ tristate "KDB modules"
+ depends on KDB
+ help
+ KDB can be extended by adding your own modules, in directory
+ kdb/modules. This option selects the way that these modules should
+ be compiled, as free standing modules (select M) or built into the
+ kernel (select Y). If unsure say M.
+
+config KDB_OFF
+ bool "KDB off by default"
+ depends on KDB
+ help
+ Normally kdb is activated by default, as long as CONFIG_KDB is set.
+ If you want to ship a kernel with kdb support but only have kdb
+ turned on when the user requests it then select this option. When
+ compiled with CONFIG_KDB_OFF, kdb ignores all events unless you boot
+ with kdb=on or you echo "1" > /proc/sys/kernel/kdb. This option also
+ works in reverse, if kdb is normally activated, you can boot with
+ kdb=off or echo "0" > /proc/sys/kernel/kdb to deactivate kdb. If
+ unsure, say N.
+
+config KDB_CONTINUE_CATASTROPHIC
+ int "KDB continues after catastrophic errors"
+ depends on KDB
+ default "0"
+ help
+ This integer controls the behaviour of kdb when the kernel gets a
+ catastrophic error, i.e. for a panic, oops, NMI or other watchdog
+ tripping. CONFIG_KDB_CONTINUE_CATASTROPHIC interacts with
+ /proc/sys/kernel/kdb and CONFIG_LKCD_DUMP (if your kernel has the
+ LKCD patch).
+ When KDB is active (/proc/sys/kernel/kdb == 1) and a catastrophic
+ error occurs, nothing extra happens until you type 'go'.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default). The first time
+ you type 'go', kdb warns you. The second time you type 'go', KDB
+ tries to continue - no guarantees that the kernel is still usable.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 1. KDB tries to continue - no
+ guarantees that the kernel is still usable.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 2. If your kernel has the LKCD
+ patch and LKCD is configured to take a dump then KDB forces a dump.
+ Whether or not a dump is taken, KDB forces a reboot.
+ When KDB is not active (/proc/sys/kernel/kdb == 0) and a catastrophic
+ error occurs, the following steps are automatic, no human
+ intervention is required.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default) or 1. KDB attempts
+ to continue - no guarantees that the kernel is still usable.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 2. If your kernel has the LKCD
+ patch and LKCD is configured to take a dump then KDB automatically
+ forces a dump. Whether or not a dump is taken, KDB forces a
+ reboot.
+ If you are not sure, say 0. Read Documentation/kdb/dump.txt before
+ setting to 2.
+
+config KDB_USB
- bool "Support for USB Keyboard in KDB (OHCI and/or EHCI only)"
- depends on KDB && (USB_OHCI_HCD || USB_EHCI_HCD)
++ bool "Support for USB Keyboard in KDB"
++ depends on KDB && (USB_OHCI_HCD || USB_EHCI_HCD || USB_UHCI_HCD)
+ help
+ If you want to use kdb from USB keyboards then say Y here. If you
+ say N then kdb can only be used from a PC (AT) keyboard or a serial
+ console.
+
+config KDB_KDUMP
+ bool "Support for Kdump in KDB"
+ depends on KDB
+ select KEXEC
+ default N
+ help
+ If you want to take Kdump kernel vmcore from KDB then say Y here.
+ If unsure, say N.
+
endmenu
#else
# define IA32_SYSCALL_VECTOR 0x80
#endif
- #define KDBENTER_VECTOR 0x81
++#define KDBENTER_VECTOR 0x81
/*
* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
--- /dev/null
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+
+obj-$(CONFIG_KDB) += kdba_bp.o x86-dis.o kdba_bt.o \
+ kdba_io.o kdba_id.o kdba_support.o
+
+ifneq (,$(findstring -fno-optimize-sibling-calls,$(KBUILD_CFLAGS)))
+ CFLAGS_kdba_bt.o += -DNO_SIBLINGS
+endif
+
+REGPARM := $(subst -mregparm=,,$(filter -mregparm=%,$(KBUILD_CFLAGS)))
+ifeq (,$(REGPARM))
+ifeq ($(CONFIG_X86_32),y)
+ REGPARM := 3
+else
+ REGPARM := 6
+endif
+endif
+
+CFLAGS_kdba_bt.o += -DREGPARM=$(REGPARM) -DCCVERSION="$(CCVERSION)"
+
+override CFLAGS := $(CFLAGS:%-pg=% )
+
- CFLAGS_kdba_io.o += -I $(srctree)/arch/$(SRCARCH)/kdb
++CFLAGS_kdba_io.o += -I $(TOPDIR)/arch/$(SRCARCH)/kdb
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
- * Copyright (c) 2006, 2007-2008 Silicon Graphics, Inc. All Rights Reserved.
++ * Copyright (c) 2006, 2007-2009 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * Common code for doing accurate backtraces on i386 and x86_64, including
+ * printing the values of arguments.
+ */
+
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/nmi.h>
+#include <asm/asm-offsets.h>
+#include <asm/system.h>
+
+#define KDB_DEBUG_BB(fmt, ...) \
+ {if (KDB_DEBUG(BB)) kdb_printf(fmt, ## __VA_ARGS__);}
+#define KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix) \
+ kdb_printf(prefix "%c0x%x" suffix, \
+ offset >= 0 ? '+' : '-', \
+ offset >= 0 ? offset : -offset)
+#define KDB_DEBUG_BB_OFFSET(offset, prefix, suffix) \
+ {if (KDB_DEBUG(BB)) KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix);}
+
+#define BB_CHECK(expr, val, ret) \
+({ \
+ if (unlikely(expr)) { \
+ kdb_printf("%s, line %d: BB_CHECK(" #expr ") failed " \
+ #val "=%lx\n", \
+ __FUNCTION__, __LINE__, (long)val); \
+ bb_giveup = 1; \
+ return ret; \
+ } \
+})
+
+static int bb_giveup;
+
+/* Use BBRG_Rxx for both i386 and x86_64. RAX through R15 must be at the end,
+ * starting with RAX. Some of these codes do not reflect actual registers,
+ * such codes are special cases when parsing the record of register changes.
+ * When updating BBRG_ entries, update bbrg_name as well.
+ */
+
+enum bb_reg_code
+{
+ BBRG_UNDEFINED = 0, /* Register contents are undefined */
+ BBRG_OSP, /* original stack pointer on entry to function */
+ BBRG_RAX,
+ BBRG_RBX,
+ BBRG_RCX,
+ BBRG_RDX,
+ BBRG_RDI,
+ BBRG_RSI,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_R8,
+ BBRG_R9,
+ BBRG_R10,
+ BBRG_R11,
+ BBRG_R12,
+ BBRG_R13,
+ BBRG_R14,
+ BBRG_R15,
+};
+
+const static char *bbrg_name[] = {
+ [BBRG_UNDEFINED] = "undefined",
+ [BBRG_OSP] = "osp",
+ [BBRG_RAX] = "rax",
+ [BBRG_RBX] = "rbx",
+ [BBRG_RCX] = "rcx",
+ [BBRG_RDX] = "rdx",
+ [BBRG_RDI] = "rdi",
+ [BBRG_RSI] = "rsi",
+ [BBRG_RBP] = "rbp",
+ [BBRG_RSP] = "rsp",
+ [BBRG_R8] = "r8",
+ [BBRG_R9] = "r9",
+ [BBRG_R10] = "r10",
+ [BBRG_R11] = "r11",
+ [BBRG_R12] = "r12",
+ [BBRG_R13] = "r13",
+ [BBRG_R14] = "r14",
+ [BBRG_R15] = "r15",
+};
+
+/* Map a register name to its register code. This includes the sub-register
+ * addressable fields, e.g. parts of rax can be addressed as ax, al, ah, eax.
+ * The list is sorted so it can be binary chopped, sort command is:
+ * LANG=C sort -t '"' -k2
+ */
+
+struct bb_reg_code_map {
+ enum bb_reg_code reg;
+ const char *name;
+};
+
+const static struct bb_reg_code_map
+bb_reg_code_map[] = {
+ { BBRG_RAX, "ah" },
+ { BBRG_RAX, "al" },
+ { BBRG_RAX, "ax" },
+ { BBRG_RBX, "bh" },
+ { BBRG_RBX, "bl" },
+ { BBRG_RBP, "bp" },
+ { BBRG_RBP, "bpl" },
+ { BBRG_RBX, "bx" },
+ { BBRG_RCX, "ch" },
+ { BBRG_RCX, "cl" },
+ { BBRG_RCX, "cx" },
+ { BBRG_RDX, "dh" },
+ { BBRG_RDI, "di" },
+ { BBRG_RDI, "dil" },
+ { BBRG_RDX, "dl" },
+ { BBRG_RDX, "dx" },
+ { BBRG_RAX, "eax" },
+ { BBRG_RBP, "ebp" },
+ { BBRG_RBX, "ebx" },
+ { BBRG_RCX, "ecx" },
+ { BBRG_RDI, "edi" },
+ { BBRG_RDX, "edx" },
+ { BBRG_RSI, "esi" },
+ { BBRG_RSP, "esp" },
+ { BBRG_R10, "r10" },
+ { BBRG_R10, "r10d" },
+ { BBRG_R10, "r10l" },
+ { BBRG_R10, "r10w" },
+ { BBRG_R11, "r11" },
+ { BBRG_R11, "r11d" },
+ { BBRG_R11, "r11l" },
+ { BBRG_R11, "r11w" },
+ { BBRG_R12, "r12" },
+ { BBRG_R12, "r12d" },
+ { BBRG_R12, "r12l" },
+ { BBRG_R12, "r12w" },
+ { BBRG_R13, "r13" },
+ { BBRG_R13, "r13d" },
+ { BBRG_R13, "r13l" },
+ { BBRG_R13, "r13w" },
+ { BBRG_R14, "r14" },
+ { BBRG_R14, "r14d" },
+ { BBRG_R14, "r14l" },
+ { BBRG_R14, "r14w" },
+ { BBRG_R15, "r15" },
+ { BBRG_R15, "r15d" },
+ { BBRG_R15, "r15l" },
+ { BBRG_R15, "r15w" },
+ { BBRG_R8, "r8" },
+ { BBRG_R8, "r8d" },
+ { BBRG_R8, "r8l" },
+ { BBRG_R8, "r8w" },
+ { BBRG_R9, "r9" },
+ { BBRG_R9, "r9d" },
+ { BBRG_R9, "r9l" },
+ { BBRG_R9, "r9w" },
+ { BBRG_RAX, "rax" },
+ { BBRG_RBP, "rbp" },
+ { BBRG_RBX, "rbx" },
+ { BBRG_RCX, "rcx" },
+ { BBRG_RDI, "rdi" },
+ { BBRG_RDX, "rdx" },
+ { BBRG_RSI, "rsi" },
+ { BBRG_RSP, "rsp" },
+ { BBRG_RSI, "si" },
+ { BBRG_RSI, "sil" },
+ { BBRG_RSP, "sp" },
+ { BBRG_RSP, "spl" },
+};
+
+/* Record register contents in terms of the values that were passed to this
+ * function, IOW track which registers contain an input value. A register's
+ * contents can be undefined, it can contain an input register value or it can
+ * contain an offset from the original stack pointer.
+ *
+ * This structure is used to represent the current contents of the integer
+ * registers, it is held in an array that is indexed by BBRG_xxx. The element
+ * for BBRG_xxx indicates what input value is currently in BBRG_xxx. When
+ * 'value' is BBRG_OSP then register BBRG_xxx contains a stack pointer,
+ * pointing at 'offset' from the original stack pointer on entry to the
+ * function. When 'value' is not BBRG_OSP then element BBRG_xxx contains the
+ * original contents of an input register and offset is ignored.
+ *
+ * An input register 'value' can be stored in more than one register and/or in
+ * more than one memory location.
+ */
+
+struct bb_reg_contains
+{
+ enum bb_reg_code value: 8;
+ short offset;
+};
+
+/* Note: the offsets in struct bb_mem_contains in this code are _NOT_ offsets
+ * from OSP, they are offsets from current RSP. It fits better with the way
+ * that struct pt_regs is built, some code pushes extra data before pt_regs so
+ * working with OSP relative offsets gets messy. struct bb_mem_contains
+ * entries must be in descending order of RSP offset.
+ */
+
+typedef struct { DECLARE_BITMAP(bits, BBRG_R15+1); } bbrgmask_t;
+#define BB_SKIP(reg) (1 << (BBRG_ ## reg))
+struct bb_mem_contains {
+ short offset_address;
+ enum bb_reg_code value: 8;
+};
+
+/* Transfer of control to a label outside the current function. If the
+ * transfer is to a known common restore path that expects known registers
+ * and/or a known memory state (e.g. struct pt_regs) then do a sanity check on
+ * the state at this point.
+ */
+
+struct bb_name_state {
+ const char *name; /* target function */
+ bfd_vma address; /* Address of target function */
+ const char *fname; /* optional from function name */
+ const struct bb_mem_contains *mem; /* expected memory state */
+ const struct bb_reg_contains *regs; /* expected register state */
+ const unsigned short mem_size; /* ARRAY_SIZE(mem) */
+ const unsigned short regs_size; /* ARRAY_SIZE(regs) */
+ const short osp_offset; /* RSP in regs == OSP+osp_offset */
+ const bbrgmask_t skip_mem; /* Some slots in mem may be undefined */
+ const bbrgmask_t skip_regs; /* Some slots in regs may be undefined */
+};
+
+/* NS (NAME_STATE) macros define the register and memory state when we transfer
+ * control to or start decoding a special case name. Use NS when the target
+ * label always has the same state. Use NS_FROM and specify the source label
+ * if the target state is slightly different depending on where it is branched
+ * from. This gives better state checking, by isolating the special cases.
+ *
+ * Note: for the same target label, NS_FROM entries must be followed by a
+ * single NS entry.
+ */
+
+#define NS_FROM(iname, ifname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ { \
+ .name = iname, \
+ .fname = ifname, \
+ .mem = imem, \
+ .regs = iregs, \
+ .mem_size = ARRAY_SIZE(imem), \
+ .regs_size = ARRAY_SIZE(iregs), \
+ .skip_mem.bits[0] = iskip_mem, \
+ .skip_regs.bits[0] = iskip_regs, \
+ .osp_offset = iosp_offset, \
+ .address = 0 \
+ }
+
+/* Shorter forms for the common cases */
+#define NS(iname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ NS_FROM(iname, NULL, imem, iregs, iskip_mem, iskip_regs, iosp_offset)
+#define NS_MEM(iname, imem, iskip_mem) \
+ NS_FROM(iname, NULL, imem, no_regs, iskip_mem, 0, 0)
+#define NS_MEM_FROM(iname, ifname, imem, iskip_mem) \
+ NS_FROM(iname, ifname, imem, no_regs, iskip_mem, 0, 0)
+#define NS_REG(iname, iregs, iskip_regs) \
+ NS_FROM(iname, NULL, no_memory, iregs, 0, iskip_regs, 0)
+#define NS_REG_FROM(iname, ifname, iregs, iskip_regs) \
+ NS_FROM(iname, ifname, no_memory, iregs, 0, iskip_regs, 0)
+
+static void
+bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src);
+
+static const char *bb_mod_name, *bb_func_name;
+
+static int
+bb_noret(const char *name)
+{
+ if (strcmp(name, "panic") == 0 ||
+ strcmp(name, "do_exit") == 0 ||
+ strcmp(name, "do_group_exit") == 0 ||
+ strcmp(name, "complete_and_exit") == 0)
+ return 1;
+ return 0;
+}
+
+/*============================================================================*/
+/* */
+/* Most of the basic block code and data is common to x86_64 and i386. This */
+/* large ifdef contains almost all of the differences between the two */
+/* architectures. */
+/* */
+/* Make sure you update the correct section of this ifdef. */
+/* */
+/*============================================================================*/
+
+#ifdef CONFIG_X86_64
+
+/* Registers that can be used to pass parameters, in the order that parameters
+ * are passed.
+ */
+
+const static enum bb_reg_code
+bb_param_reg[] = {
+ BBRG_RDI,
+ BBRG_RSI,
+ BBRG_RDX,
+ BBRG_RCX,
+ BBRG_R8,
+ BBRG_R9,
+};
+
+const static enum bb_reg_code
+bb_preserved_reg[] = {
+ BBRG_RBX,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_R12,
+ BBRG_R13,
+ BBRG_R14,
+ BBRG_R15,
+};
+
+static const struct bb_mem_contains full_pt_regs[] = {
+ { 0x70, BBRG_RDI },
+ { 0x68, BBRG_RSI },
+ { 0x60, BBRG_RDX },
+ { 0x58, BBRG_RCX },
+ { 0x50, BBRG_RAX },
+ { 0x48, BBRG_R8 },
+ { 0x40, BBRG_R9 },
+ { 0x38, BBRG_R10 },
+ { 0x30, BBRG_R11 },
+ { 0x28, BBRG_RBX },
+ { 0x20, BBRG_RBP },
+ { 0x18, BBRG_R12 },
+ { 0x10, BBRG_R13 },
+ { 0x08, BBRG_R14 },
+ { 0x00, BBRG_R15 },
+};
++static const struct bb_mem_contains full_pt_regs_plus_1[] = {
++ { 0x78, BBRG_RDI },
++ { 0x70, BBRG_RSI },
++ { 0x68, BBRG_RDX },
++ { 0x60, BBRG_RCX },
++ { 0x58, BBRG_RAX },
++ { 0x50, BBRG_R8 },
++ { 0x48, BBRG_R9 },
++ { 0x40, BBRG_R10 },
++ { 0x38, BBRG_R11 },
++ { 0x30, BBRG_RBX },
++ { 0x28, BBRG_RBP },
++ { 0x20, BBRG_R12 },
++ { 0x18, BBRG_R13 },
++ { 0x10, BBRG_R14 },
++ { 0x08, BBRG_R15 },
++};
++/*
++ * Going into error_exit we have the hardware pushed error_code on the stack
++ * plus a full pt_regs
++ */
++static const struct bb_mem_contains error_code_full_pt_regs[] = {
++ { 0x78, BBRG_UNDEFINED },
++ { 0x70, BBRG_RDI },
++ { 0x68, BBRG_RSI },
++ { 0x60, BBRG_RDX },
++ { 0x58, BBRG_RCX },
++ { 0x50, BBRG_RAX },
++ { 0x48, BBRG_R8 },
++ { 0x40, BBRG_R9 },
++ { 0x38, BBRG_R10 },
++ { 0x30, BBRG_R11 },
++ { 0x28, BBRG_RBX },
++ { 0x20, BBRG_RBP },
++ { 0x18, BBRG_R12 },
++ { 0x10, BBRG_R13 },
++ { 0x08, BBRG_R14 },
++ { 0x00, BBRG_R15 },
++};
+static const struct bb_mem_contains partial_pt_regs[] = {
+ { 0x40, BBRG_RDI },
+ { 0x38, BBRG_RSI },
+ { 0x30, BBRG_RDX },
+ { 0x28, BBRG_RCX },
+ { 0x20, BBRG_RAX },
+ { 0x18, BBRG_R8 },
+ { 0x10, BBRG_R9 },
+ { 0x08, BBRG_R10 },
+ { 0x00, BBRG_R11 },
+};
+static const struct bb_mem_contains partial_pt_regs_plus_1[] = {
+ { 0x48, BBRG_RDI },
+ { 0x40, BBRG_RSI },
+ { 0x38, BBRG_RDX },
+ { 0x30, BBRG_RCX },
+ { 0x28, BBRG_RAX },
+ { 0x20, BBRG_R8 },
+ { 0x18, BBRG_R9 },
+ { 0x10, BBRG_R10 },
+ { 0x08, BBRG_R11 },
+};
+static const struct bb_mem_contains partial_pt_regs_plus_2[] = {
+ { 0x50, BBRG_RDI },
+ { 0x48, BBRG_RSI },
+ { 0x40, BBRG_RDX },
+ { 0x38, BBRG_RCX },
+ { 0x30, BBRG_RAX },
+ { 0x28, BBRG_R8 },
+ { 0x20, BBRG_R9 },
+ { 0x18, BBRG_R10 },
+ { 0x10, BBRG_R11 },
+};
+static const struct bb_mem_contains no_memory[] = {
+};
+/* Hardware has already pushed an error_code on the stack. Use undefined just
+ * to set the initial stack offset.
+ */
+static const struct bb_mem_contains error_code[] = {
+ { 0x0, BBRG_UNDEFINED },
+};
+/* error_code plus original rax */
+static const struct bb_mem_contains error_code_rax[] = {
+ { 0x8, BBRG_UNDEFINED },
+ { 0x0, BBRG_RAX },
+};
+
+static const struct bb_reg_contains all_regs[] = {
+ [BBRG_RAX] = { BBRG_RAX, 0 },
+ [BBRG_RBX] = { BBRG_RBX, 0 },
+ [BBRG_RCX] = { BBRG_RCX, 0 },
+ [BBRG_RDX] = { BBRG_RDX, 0 },
+ [BBRG_RDI] = { BBRG_RDI, 0 },
+ [BBRG_RSI] = { BBRG_RSI, 0 },
+ [BBRG_RBP] = { BBRG_RBP, 0 },
+ [BBRG_RSP] = { BBRG_OSP, 0 },
+ [BBRG_R8 ] = { BBRG_R8, 0 },
+ [BBRG_R9 ] = { BBRG_R9, 0 },
+ [BBRG_R10] = { BBRG_R10, 0 },
+ [BBRG_R11] = { BBRG_R11, 0 },
+ [BBRG_R12] = { BBRG_R12, 0 },
+ [BBRG_R13] = { BBRG_R13, 0 },
+ [BBRG_R14] = { BBRG_R14, 0 },
+ [BBRG_R15] = { BBRG_R15, 0 },
+};
+static const struct bb_reg_contains no_regs[] = {
+};
+
+static struct bb_name_state bb_special_cases[] = {
+
+ /* First the cases that pass data only in memory. We do not check any
+ * register state for these cases.
+ */
+
+ /* Simple cases, no exceptions */
+ NS_MEM("ia32_ptregs_common", partial_pt_regs_plus_1, 0),
+ NS_MEM("ia32_sysret", partial_pt_regs, 0),
+ NS_MEM("int_careful", partial_pt_regs, 0),
++ NS_MEM("ia32_badarg", partial_pt_regs, 0),
+ NS_MEM("int_restore_rest", full_pt_regs, 0),
+ NS_MEM("int_signal", full_pt_regs, 0),
+ NS_MEM("int_very_careful", partial_pt_regs, 0),
- NS_MEM("int_with_check", partial_pt_regs, 0),
- #ifdef CONFIG_TRACE_IRQFLAGS
- NS_MEM("paranoid_exit0", full_pt_regs, 0),
- #endif /* CONFIG_TRACE_IRQFLAGS */
- NS_MEM("paranoid_exit1", full_pt_regs, 0),
- NS_MEM("ptregscall_common", partial_pt_regs_plus_1, 0),
- NS_MEM("restore_norax", partial_pt_regs, 0),
- NS_MEM("restore", partial_pt_regs, 0),
++ NS_MEM("ptregscall_common", full_pt_regs_plus_1, 0),
+ NS_MEM("ret_from_intr", partial_pt_regs_plus_2, 0),
+ NS_MEM("stub32_clone", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_execve", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_fork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_iopl", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_rt_sigreturn", partial_pt_regs_plus_1, 0),
- NS_MEM("stub32_rt_sigsuspend", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_sigaltstack", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_sigreturn", partial_pt_regs_plus_1, 0),
- NS_MEM("stub32_sigsuspend", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_vfork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_clone", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_execve", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_fork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_iopl", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_rt_sigreturn", partial_pt_regs_plus_1, 0),
- NS_MEM("stub_rt_sigsuspend", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_sigaltstack", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_vfork", partial_pt_regs_plus_1, 0),
++ NS_MEM("sysenter_auditsys", partial_pt_regs,
++ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
++
++ NS_MEM("paranoid_exit", error_code_full_pt_regs, 0),
+
+ NS_MEM_FROM("ia32_badsys", "ia32_sysenter_target",
+ partial_pt_regs,
+ /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ * some paths. It also stomps on RAX.
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("ia32_badsys", "ia32_cstar_target",
+ partial_pt_regs,
+ /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ * paths. It also stomps on RAX. Even more confusing, instead
+ * of storing RCX it stores RBP. WTF?
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
++ NS_MEM_FROM("ia32_badsys", "ia32_syscall",
++ partial_pt_regs,
++ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
+ NS_MEM("ia32_badsys", partial_pt_regs, 0),
+
++#ifdef CONFIG_AUDITSYSCALL
++ NS_MEM_FROM("int_with_check", "sysexit_audit", partial_pt_regs,
++ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
++ BB_SKIP(RAX)),
++ NS_MEM_FROM("int_with_check", "ia32_cstar_target", partial_pt_regs,
++ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
++ BB_SKIP(RAX) | BB_SKIP(RCX)),
++#endif
++ NS_MEM("int_with_check", no_memory, 0),
++
+ /* Various bits of code branch to int_ret_from_sys_call, with slightly
+ * different missing values in pt_regs.
+ */
+ NS_MEM_FROM("int_ret_from_sys_call", "ret_from_fork",
+ partial_pt_regs,
+ BB_SKIP(R11)),
+ NS_MEM_FROM("int_ret_from_sys_call", "stub_execve",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "stub_rt_sigreturn",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "kernel_execve",
+ partial_pt_regs,
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_syscall",
+ partial_pt_regs,
+ /* ia32_syscall only saves RDI through RCX. */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_sysenter_target",
+ partial_pt_regs,
+ /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ * some paths. It also stomps on RAX.
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_cstar_target",
+ partial_pt_regs,
+ /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ * paths. It also stomps on RAX. Even more confusing, instead
+ * of storing RCX it stores RBP. WTF?
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
++ NS_MEM_FROM("int_ret_from_sys_call", "ia32_badsys",
++ partial_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("int_ret_from_sys_call", partial_pt_regs, 0),
+
+#ifdef CONFIG_PREEMPT
+ NS_MEM("retint_kernel", partial_pt_regs, BB_SKIP(RAX)),
+#endif /* CONFIG_PREEMPT */
+
+ NS_MEM("retint_careful", partial_pt_regs, BB_SKIP(RAX)),
+
+ /* Horrible hack: For a brand new x86_64 task, switch_to() branches to
+ * ret_from_fork with a totally different stack state from all the
+ * other tasks that come out of switch_to(). This non-standard state
+ * cannot be represented so just ignore the branch from switch_to() to
+ * ret_from_fork. Due to inlining and linker labels, switch_to() can
+ * appear as several different function labels, including schedule,
+ * context_switch and __sched_text_start.
+ */
+ NS_MEM_FROM("ret_from_fork", "schedule", no_memory, 0),
++ NS_MEM_FROM("ret_from_fork", "__schedule", no_memory, 0),
+ NS_MEM_FROM("ret_from_fork", "__sched_text_start", no_memory, 0),
+ NS_MEM_FROM("ret_from_fork", "context_switch", no_memory, 0),
+ NS_MEM("ret_from_fork", full_pt_regs, 0),
+
-
+ NS_MEM_FROM("ret_from_sys_call", "ret_from_fork",
+ partial_pt_regs,
+ BB_SKIP(R11)),
+ NS_MEM("ret_from_sys_call", partial_pt_regs, 0),
+
+ NS_MEM("retint_restore_args",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_MEM("retint_swapgs",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ /* Now the cases that pass data in registers. We do not check any
+ * memory state for these cases.
+ */
+
+ NS_REG("bad_put_user",
- all_regs,
- BB_SKIP(RAX) | BB_SKIP(RCX) | BB_SKIP(R8)),
++ all_regs, BB_SKIP(RBX)),
+
+ NS_REG("bad_get_user",
- all_regs,
- BB_SKIP(RAX) | BB_SKIP(RCX) | BB_SKIP(R8)),
++ all_regs, BB_SKIP(RAX) | BB_SKIP(RDX)),
+
+ NS_REG("bad_to_user",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("ia32_ptregs_common",
+ all_regs,
+ 0),
+
+ NS_REG("copy_user_generic_unrolled",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("copy_user_generic_string",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("irq_return",
+ all_regs,
+ 0),
+
+ /* Finally the cases that pass data in both registers and memory.
+ */
+
+ NS("invalid_TSS", error_code, all_regs, 0, 0, 0),
+ NS("segment_not_present", error_code, all_regs, 0, 0, 0),
+ NS("alignment_check", error_code, all_regs, 0, 0, 0),
+ NS("page_fault", error_code, all_regs, 0, 0, 0),
+ NS("general_protection", error_code, all_regs, 0, 0, 0),
+ NS("error_entry", error_code_rax, all_regs, 0, BB_SKIP(RAX), -0x10),
++ NS("error_exit", error_code_full_pt_regs, no_regs, 0, 0, 0x30),
+ NS("common_interrupt", error_code, all_regs, 0, 0, -0x8),
+ NS("save_args", error_code, all_regs, 0, 0, -0x50),
++ NS("int3", no_memory, all_regs, 0, 0, -0x80),
+};
+
+static const char *bb_spurious[] = {
+ /* schedule */
+ "thread_return",
- /* ret_from_fork */
- "rff_action",
- "rff_trace",
+ /* system_call */
++ "system_call_after_swapgs",
++ "system_call_fastpath",
+ "ret_from_sys_call",
+ "sysret_check",
+ "sysret_careful",
+ "sysret_signal",
+ "badsys",
++#ifdef CONFIG_AUDITSYSCALL
++ "auditsys",
++ "sysret_audit",
++#endif
+ "tracesys",
+ "int_ret_from_sys_call",
+ "int_with_check",
+ "int_careful",
+ "int_very_careful",
+ "int_signal",
+ "int_restore_rest",
+ /* common_interrupt */
+ "ret_from_intr",
+ "exit_intr",
+ "retint_with_reschedule",
+ "retint_check",
+ "retint_swapgs",
+ "retint_restore_args",
+ "restore_args",
+ "irq_return",
+ "bad_iret",
+ "retint_careful",
+ "retint_signal",
+#ifdef CONFIG_PREEMPT
+ "retint_kernel",
+#endif /* CONFIG_PREEMPT */
- /* .macro paranoidexit */
- #ifdef CONFIG_TRACE_IRQFLAGS
- "paranoid_exit0",
- "paranoid_userspace0",
- "paranoid_restore0",
- "paranoid_swapgs0",
- "paranoid_schedule0",
- #endif /* CONFIG_TRACE_IRQFLAGS */
- "paranoid_exit1",
- "paranoid_swapgs1",
- "paranoid_restore1",
- "paranoid_userspace1",
- "paranoid_schedule1",
++ /* paranoid_exit */
++ "paranoid_swapgs",
++ "paranoid_restore",
++ "paranoid_userspace",
++ "paranoid_schedule",
+ /* error_entry */
+ "error_swapgs",
+ "error_sti",
- "error_exit",
+ "error_kernelspace",
++ /* nmi */
++#ifdef CONFIG_TRACE_IRQFLAGS
++ "nmi_swapgs",
++ "nmi_restore",
++ "nmi_userspace",
++ "nmi_schedule",
++#endif
+ /* load_gs_index */
+ "gs_change",
+ "bad_gs",
+ /* ia32_sysenter_target */
+ "sysenter_do_call",
++ "sysenter_dispatch",
++ "sysexit_from_sys_call",
++#ifdef CONFIG_AUDITSYSCALL
++ "sysenter_auditsys",
++ "sysexit_audit",
++#endif
+ "sysenter_tracesys",
+ /* ia32_cstar_target */
+ "cstar_do_call",
++ "cstar_dispatch",
++ "sysretl_from_sys_call",
++#ifdef CONFIG_AUDITSYSCALL
++ "cstar_auditsys",
++ "sysretl_audit",
++#endif
+ "cstar_tracesys",
- "ia32_badarg",
+ /* ia32_syscall */
- "ia32_do_syscall",
++ "ia32_do_call",
+ "ia32_sysret",
+ "ia32_tracesys",
- "ia32_badsys",
+#ifdef CONFIG_HIBERNATION
+ /* restore_image */
+ "loop",
+ "done",
+#endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_KPROBES
+ /* jprobe_return */
+ "jprobe_return_end",
+ /* kretprobe_trampoline_holder */
+ "kretprobe_trampoline",
+#endif /* CONFIG_KPROBES */
+#ifdef CONFIG_KEXEC
+ /* relocate_kernel */
+ "relocate_new_kernel",
+#endif /* CONFIG_KEXEC */
- #ifdef CONFIG_PARAVIRT_XEN
++#ifdef CONFIG_XEN
+ /* arch/i386/xen/xen-asm.S */
+ "xen_irq_enable_direct_end",
+ "xen_irq_disable_direct_end",
+ "xen_save_fl_direct_end",
+ "xen_restore_fl_direct_end",
+ "xen_iret_start_crit",
+ "iret_restore_end",
+ "xen_iret_end_crit",
+ "hyper_iret",
+#endif /* CONFIG_XEN */
+};
+
+static const char *bb_hardware_handlers[] = {
+ "system_call",
+ "common_interrupt",
+ "error_entry",
+ "debug",
+ "nmi",
+ "int3",
+ "double_fault",
+ "stack_segment",
+ "machine_check",
+ "kdb_call",
+};
+
+static int
+bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar)
+{
+ /* x86_64 interrupt stacks are 16 byte aligned and you must get the
+ * next rsp from stack, it cannot be statically calculated. Do not
+ * include the word at rsp, it is pushed by hardware but is treated as
+ * a normal software return value.
+ *
+ * When an IST switch occurs (e.g. NMI) then the saved rsp points to
+ * another stack entirely. Assume that the IST stack is 16 byte
+ * aligned and just return the size of the hardware data on this stack.
+ * The stack unwind code will take care of the stack switch.
+ */
+ kdb_machreg_t saved_rsp = *((kdb_machreg_t *)rsp + 3);
+ int hardware_pushed = saved_rsp - rsp - KDB_WORD_SIZE;
+ if (hardware_pushed < 4 * KDB_WORD_SIZE ||
+ saved_rsp < ar->stack.logical_start ||
+ saved_rsp >= ar->stack.logical_end)
+ return 4 * KDB_WORD_SIZE;
+ else
+ return hardware_pushed;
+}
+
+static void
+bb_start_block0(void)
+{
+ bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+ bb_reg_code_set_value(BBRG_R8, BBRG_R8);
+ bb_reg_code_set_value(BBRG_R9, BBRG_R9);
+ bb_reg_code_set_value(BBRG_R10, BBRG_R10);
+ bb_reg_code_set_value(BBRG_R11, BBRG_R11);
+ bb_reg_code_set_value(BBRG_R12, BBRG_R12);
+ bb_reg_code_set_value(BBRG_R13, BBRG_R13);
+ bb_reg_code_set_value(BBRG_R14, BBRG_R14);
+ bb_reg_code_set_value(BBRG_R15, BBRG_R15);
+}
+
+/* x86_64 does not have a special case for __switch_to */
+
+static void
+bb_fixup_switch_to(char *p)
+{
+}
+
+static int
+bb_asmlinkage_arch(void)
+{
+ return strncmp(bb_func_name, "__down", 6) == 0 ||
+ strncmp(bb_func_name, "__up", 4) == 0 ||
+ strncmp(bb_func_name, "stub_", 5) == 0 ||
+ strcmp(bb_func_name, "ret_from_fork") == 0 ||
+ strcmp(bb_func_name, "ptregscall_common") == 0;
+}
+
+#else /* !CONFIG_X86_64 */
+
+/* Registers that can be used to pass parameters, in the order that parameters
+ * are passed.
+ */
+
+const static enum bb_reg_code
+bb_param_reg[] = {
+ BBRG_RAX,
+ BBRG_RDX,
+ BBRG_RCX,
+};
+
+const static enum bb_reg_code
+bb_preserved_reg[] = {
+ BBRG_RBX,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_RSI,
+ BBRG_RDI,
+};
+
+static const struct bb_mem_contains full_pt_regs[] = {
+ { 0x18, BBRG_RAX },
+ { 0x14, BBRG_RBP },
+ { 0x10, BBRG_RDI },
+ { 0x0c, BBRG_RSI },
+ { 0x08, BBRG_RDX },
+ { 0x04, BBRG_RCX },
+ { 0x00, BBRG_RBX },
+};
+static const struct bb_mem_contains no_memory[] = {
+};
+/* Hardware has already pushed an error_code on the stack. Use undefined just
+ * to set the initial stack offset.
+ */
+static const struct bb_mem_contains error_code[] = {
+ { 0x0, BBRG_UNDEFINED },
+};
+/* rbx already pushed */
+static const struct bb_mem_contains rbx_pushed[] = {
+ { 0x0, BBRG_RBX },
+};
+#ifdef CONFIG_MATH_EMULATION
+static const struct bb_mem_contains mem_fpu_reg_round[] = {
+ { 0xc, BBRG_RBP },
+ { 0x8, BBRG_RSI },
+ { 0x4, BBRG_RDI },
+ { 0x0, BBRG_RBX },
+};
+#endif /* CONFIG_MATH_EMULATION */
+
+static const struct bb_reg_contains all_regs[] = {
+ [BBRG_RAX] = { BBRG_RAX, 0 },
+ [BBRG_RBX] = { BBRG_RBX, 0 },
+ [BBRG_RCX] = { BBRG_RCX, 0 },
+ [BBRG_RDX] = { BBRG_RDX, 0 },
+ [BBRG_RDI] = { BBRG_RDI, 0 },
+ [BBRG_RSI] = { BBRG_RSI, 0 },
+ [BBRG_RBP] = { BBRG_RBP, 0 },
+ [BBRG_RSP] = { BBRG_OSP, 0 },
+};
+static const struct bb_reg_contains no_regs[] = {
+};
+#ifdef CONFIG_MATH_EMULATION
+static const struct bb_reg_contains reg_fpu_reg_round[] = {
+ [BBRG_RBP] = { BBRG_OSP, -0x4 },
+ [BBRG_RSP] = { BBRG_OSP, -0x10 },
+};
+#endif /* CONFIG_MATH_EMULATION */
+
+static struct bb_name_state bb_special_cases[] = {
+
+ /* First the cases that pass data only in memory. We do not check any
+ * register state for these cases.
+ */
+
+ /* Simple cases, no exceptions */
+ NS_MEM("check_userspace", full_pt_regs, 0),
+ NS_MEM("device_not_available_emulate", full_pt_regs, 0),
+ NS_MEM("ldt_ss", full_pt_regs, 0),
+ NS_MEM("no_singlestep", full_pt_regs, 0),
+ NS_MEM("restore_all", full_pt_regs, 0),
+ NS_MEM("restore_nocheck", full_pt_regs, 0),
+ NS_MEM("restore_nocheck_notrace", full_pt_regs, 0),
+ NS_MEM("ret_from_exception", full_pt_regs, 0),
+ NS_MEM("ret_from_fork", full_pt_regs, 0),
+ NS_MEM("ret_from_intr", full_pt_regs, 0),
+ NS_MEM("work_notifysig", full_pt_regs, 0),
+ NS_MEM("work_pending", full_pt_regs, 0),
+
+#ifdef CONFIG_PREEMPT
+ NS_MEM("resume_kernel", full_pt_regs, 0),
+#endif /* CONFIG_PREEMPT */
+
+ NS_MEM("common_interrupt", error_code, 0),
+ NS_MEM("error_code", error_code, 0),
+
+ NS_MEM("bad_put_user", rbx_pushed, 0),
+
+ NS_MEM_FROM("resume_userspace", "syscall_badsys",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM_FROM("resume_userspace", "syscall_fault",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM_FROM("resume_userspace", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ /* Too difficult to trace through the various vm86 functions for now.
+ * They are C functions that start off with some memory state, fiddle
+ * the registers then jmp directly to resume_userspace. For the
+ * moment, just assume that they are valid and do no checks.
+ */
+ NS_FROM("resume_userspace", "do_int",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "do_sys_vm86",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "handle_vm86_fault",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "handle_vm86_trap",
+ no_memory, no_regs, 0, 0, 0),
+ NS_MEM("resume_userspace", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_badsys", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RBP)),
+ NS_MEM("syscall_badsys", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_call", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_call", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_exit", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_exit", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_exit_work", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RAX) | BB_SKIP(RBP)),
+ NS_MEM_FROM("syscall_exit_work", "system_call",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_exit_work", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_trace_entry", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RBP)),
+ NS_MEM_FROM("syscall_trace_entry", "system_call",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_trace_entry", full_pt_regs, 0),
+
+ /* Now the cases that pass data in registers. We do not check any
+ * memory state for these cases.
+ */
+
+ NS_REG("syscall_fault", all_regs, 0),
+
+ NS_REG("bad_get_user", all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RDX)),
+
+ /* Finally the cases that pass data in both registers and memory.
+ */
+
+ /* This entry is redundant now because bb_fixup_switch_to() hides the
+ * jmp __switch_to case, however the entry is left here as
+ * documentation.
+ *
+ * NS("__switch_to", no_memory, no_regs, 0, 0, 0),
+ */
+
+ NS("iret_exc", no_memory, all_regs, 0, 0, 0x20),
+
+#ifdef CONFIG_MATH_EMULATION
+ NS("fpu_reg_round", mem_fpu_reg_round, reg_fpu_reg_round, 0, 0, 0),
+#endif /* CONFIG_MATH_EMULATION */
+};
+
+static const char *bb_spurious[] = {
+ /* ret_from_exception */
+ "ret_from_intr",
+ "check_userspace",
+ "resume_userspace",
+ /* resume_kernel */
+#ifdef CONFIG_PREEMPT
+ "need_resched",
+#endif /* CONFIG_PREEMPT */
+ /* ia32_sysenter_target */
+ "sysenter_past_esp",
+ /* system_call */
+ "no_singlestep",
+ "syscall_call",
+ "syscall_exit",
+ "restore_all",
+ "restore_nocheck",
+ "restore_nocheck_notrace",
+ "ldt_ss",
+ /* do not include iret_exc, it is in a .fixup section */
+ /* work_pending */
+ "work_resched",
+ "work_notifysig",
+#ifdef CONFIG_VM86
+ "work_notifysig_v86",
+#endif /* CONFIG_VM86 */
+ /* page_fault */
+ "error_code",
+ /* device_not_available */
+ "device_not_available_emulate",
+ /* debug */
+ "debug_esp_fix_insn",
+ "debug_stack_correct",
+ /* nmi */
+ "nmi_stack_correct",
+ "nmi_stack_fixup",
+ "nmi_debug_stack_check",
+ "nmi_espfix_stack",
+#ifdef CONFIG_HIBERNATION
+ /* restore_image */
+ "copy_loop",
+ "done",
+#endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_KPROBES
+ /* jprobe_return */
+ "jprobe_return_end",
+#endif /* CONFIG_KPROBES */
+#ifdef CONFIG_KEXEC
+ /* relocate_kernel */
+ "relocate_new_kernel",
+#endif /* CONFIG_KEXEC */
+#ifdef CONFIG_MATH_EMULATION
+ /* assorted *.S files in arch/i386/math_emu */
+ "Denorm_done",
+ "Denorm_shift_more_than_32",
+ "Denorm_shift_more_than_63",
+ "Denorm_shift_more_than_64",
+ "Do_unmasked_underflow",
+ "Exp_not_underflow",
+ "fpu_Arith_exit",
+ "fpu_reg_round",
+ "fpu_reg_round_signed_special_exit",
+ "fpu_reg_round_special_exit",
+ "L_accum_done",
+ "L_accum_loaded",
+ "L_accum_loop",
+ "L_arg1_larger",
+ "L_bugged",
+ "L_bugged_1",
+ "L_bugged_2",
+ "L_bugged_3",
+ "L_bugged_4",
+ "L_bugged_denorm_486",
+ "L_bugged_round24",
+ "L_bugged_round53",
+ "L_bugged_round64",
+ "LCheck_24_round_up",
+ "LCheck_53_round_up",
+ "LCheck_Round_Overflow",
+ "LCheck_truncate_24",
+ "LCheck_truncate_53",
+ "LCheck_truncate_64",
+ "LDenormal_adj_exponent",
+ "L_deNormalised",
+ "LDo_24_round_up",
+ "LDo_2nd_32_bits",
+ "LDo_2nd_div",
+ "LDo_3rd_32_bits",
+ "LDo_3rd_div",
+ "LDo_53_round_up",
+ "LDo_64_round_up",
+ "L_done",
+ "LDo_truncate_24",
+ "LDown_24",
+ "LDown_53",
+ "LDown_64",
+ "L_entry_bugged",
+ "L_error_exit",
+ "L_exactly_32",
+ "L_exception_exit",
+ "L_exit",
+ "L_exit_nuo_valid",
+ "L_exit_nuo_zero",
+ "L_exit_valid",
+ "L_extent_zero",
+ "LFirst_div_done",
+ "LFirst_div_not_1",
+ "L_Full_Division",
+ "LGreater_Half_24",
+ "LGreater_Half_53",
+ "LGreater_than_1",
+ "LLess_than_1",
+ "L_Make_denorm",
+ "L_more_31_no_low",
+ "L_more_63_no_low",
+ "L_more_than_31",
+ "L_more_than_63",
+ "L_more_than_64",
+ "L_more_than_65",
+ "L_more_than_95",
+ "L_must_be_zero",
+ "L_n_exit",
+ "L_no_adjust",
+ "L_no_bit_lost",
+ "L_no_overflow",
+ "L_no_precision_loss",
+ "L_Normalised",
+ "L_norm_bugged",
+ "L_n_shift_1",
+ "L_nuo_shift_1",
+ "L_overflow",
+ "L_precision_lost_down",
+ "L_precision_lost_up",
+ "LPrevent_2nd_overflow",
+ "LPrevent_3rd_overflow",
+ "LPseudoDenormal",
+ "L_Re_normalise",
+ "LResult_Normalised",
+ "L_round",
+ "LRound_large",
+ "LRound_nearest_24",
+ "LRound_nearest_53",
+ "LRound_nearest_64",
+ "LRound_not_small",
+ "LRound_ovfl",
+ "LRound_precision",
+ "LRound_prep",
+ "L_round_the_result",
+ "LRound_To_24",
+ "LRound_To_53",
+ "LRound_To_64",
+ "LSecond_div_done",
+ "LSecond_div_not_1",
+ "L_shift_1",
+ "L_shift_32",
+ "L_shift_65_nc",
+ "L_shift_done",
+ "Ls_less_than_32",
+ "Ls_more_than_63",
+ "Ls_more_than_95",
+ "L_Store_significand",
+ "L_subtr",
+ "LTest_over",
+ "LTruncate_53",
+ "LTruncate_64",
+ "L_underflow",
+ "L_underflow_to_zero",
+ "LUp_24",
+ "LUp_53",
+ "LUp_64",
+ "L_zero",
+ "Normalise_result",
+ "Signal_underflow",
+ "sqrt_arg_ge_2",
+ "sqrt_get_more_precision",
+ "sqrt_more_prec_large",
+ "sqrt_more_prec_ok",
+ "sqrt_more_prec_small",
+ "sqrt_near_exact",
+ "sqrt_near_exact_large",
+ "sqrt_near_exact_ok",
+ "sqrt_near_exact_small",
+ "sqrt_near_exact_x",
+ "sqrt_prelim_no_adjust",
+ "sqrt_round_result",
+ "sqrt_stage_2_done",
+ "sqrt_stage_2_error",
+ "sqrt_stage_2_finish",
+ "sqrt_stage_2_positive",
+ "sqrt_stage_3_error",
+ "sqrt_stage_3_finished",
+ "sqrt_stage_3_no_error",
+ "sqrt_stage_3_positive",
+ "Unmasked_underflow",
+ "xExp_not_underflow",
+#endif /* CONFIG_MATH_EMULATION */
+};
+
+static const char *bb_hardware_handlers[] = {
+ "ret_from_exception",
+ "system_call",
+ "work_pending",
+ "syscall_fault",
+ "page_fault",
+ "coprocessor_error",
+ "simd_coprocessor_error",
+ "device_not_available",
+ "debug",
+ "nmi",
+ "int3",
+ "overflow",
+ "bounds",
+ "invalid_op",
+ "coprocessor_segment_overrun",
+ "invalid_TSS",
+ "segment_not_present",
+ "stack_segment",
+ "general_protection",
+ "alignment_check",
+ "kdb_call",
+ "divide_error",
+ "machine_check",
+ "spurious_interrupt_bug",
+};
+
+static int
+bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar)
+{
+ return (2 * KDB_WORD_SIZE);
+}
+
+static void
+bb_start_block0(void)
+{
+ bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+}
+
+/* The i386 code that switches stack in a context switch is an extremely
+ * special case. It saves the rip pointing to a label that is not otherwise
+ * referenced, saves the current rsp then pushes a word. The magic code that
+ * resumes the new task picks up the saved rip and rsp, effectively referencing
+ * a label that otherwise is not used and ignoring the pushed word.
+ *
+ * The simplest way to handle this very strange case is to recognise jmp
+ * address <__switch_to> and treat it as a popfl instruction. This avoids
+ * terminating the block on this jmp and removes one word from the stack state,
+ * which is the end effect of all the magic code.
+ *
+ * Called with the instruction line, starting after the first ':'.
+ */
+
+static void
+bb_fixup_switch_to(char *p)
+{
+ char *p1 = p;
+ p += strspn(p, " \t"); /* start of instruction */
+ if (strncmp(p, "jmp", 3))
+ return;
+ p += strcspn(p, " \t"); /* end of instruction */
+ p += strspn(p, " \t"); /* start of address */
+ p += strcspn(p, " \t"); /* end of address */
+ p += strspn(p, " \t"); /* start of comment */
+ if (strcmp(p, "<__switch_to>") == 0)
+ strcpy(p1, "popfl");
+}
+
+static int
+bb_asmlinkage_arch(void)
+{
+ return strcmp(bb_func_name, "ret_from_exception") == 0 ||
+ strcmp(bb_func_name, "syscall_trace_entry") == 0;
+}
+
+#endif /* CONFIG_X86_64 */
+
+
+/*============================================================================*/
+/* */
+/* Common code and data. */
+/* */
+/*============================================================================*/
+
+
+/* Tracking registers by decoding the instructions is quite a bit harder than
+ * doing the same tracking using compiler generated information. Register
+ * contents can remain in the same register, they can be copied to other
+ * registers, they can be stored on stack or they can be modified/overwritten.
+ * At any one time, there are 0 or more copies of the original value that was
+ * supplied in each register on input to the current function. If a register
+ * exists in multiple places, one copy of that register is the master version,
+ * the others are temporary copies which may or may not be destroyed before the
+ * end of the function.
+ *
+ * The compiler knows which copy of a register is the master and which are
+ * temporary copies, which makes it relatively easy to track register contents
+ * as they are saved and restored. Without that compiler based knowledge, this
+ * code has to track _every_ possible copy of each register, simply because we
+ * do not know which is the master copy and which are temporary copies which
+ * may be destroyed later.
+ *
+ * It gets worse: registers that contain parameters can be copied to other
+ * registers which are then saved on stack in a lower level function. Also the
+ * stack pointer may be held in multiple registers (typically RSP and RBP)
+ * which contain different offsets from the base of the stack on entry to this
+ * function. All of which means that we have to track _all_ register
+ * movements, or at least as much as possible.
+ *
+ * Start with the basic block that contains the start of the function, by
+ * definition all registers contain their initial value. Track each
+ * instruction's effect on register contents, this includes reading from a
+ * parameter register before any write to that register, IOW the register
+ * really does contain a parameter. The register state is represented by a
+ * dynamically sized array with each entry containing :-
+ *
+ * Register name
+ * Location it is copied to (another register or stack + offset)
+ *
+ * Besides the register tracking array, we track which parameter registers are
+ * read before being written, to determine how many parameters are passed in
+ * registers. We also track which registers contain stack pointers, including
+ * their offset from the original stack pointer on entry to the function.
+ *
+ * At each exit from the current basic block (via JMP instruction or drop
+ * through), the register state is cloned to form the state on input to the
+ * target basic block and the target is marked for processing using this state.
+ * When there are multiple ways to enter a basic block (e.g. several JMP
+ * instructions referencing the same target) then there will be multiple sets
+ * of register state to form the "input" for that basic block, there is no
+ * guarantee that all paths to that block will have the same register state.
+ *
+ * As each target block is processed, all the known sets of register state are
+ * merged to form a suitable subset of the state which agrees with all the
+ * inputs. The most common case is where one path to this block copies a
+ * register to another register but another path does not, therefore the copy
+ * is only a temporary and should not be propogated into this block.
+ *
+ * If the target block already has an input state from the current transfer
+ * point and the new input state is identical to the previous input state then
+ * we have reached a steady state for the arc from the current location to the
+ * target block. Therefore there is no need to process the target block again.
+ *
+ * The steps of "process a block, create state for target block(s), pick a new
+ * target block, merge state for target block, process target block" will
+ * continue until all the state changes have propogated all the way down the
+ * basic block tree, including round any cycles in the tree. The merge step
+ * only deletes tracking entries from the input state(s), it never adds a
+ * tracking entry. Therefore the overall algorithm is guaranteed to converge
+ * to a steady state, the worst possible case is that every tracking entry into
+ * a block is deleted, which will result in an empty output state.
+ *
+ * As each instruction is decoded, it is checked to see if this is the point at
+ * which execution left this function. This can be a call to another function
+ * (actually the return address to this function) or is the instruction which
+ * was about to be executed when an interrupt occurred (including an oops).
+ * Save the register state at this point.
+ *
+ * We always know what the registers contain when execution left this function.
+ * For an interrupt, the registers are in struct pt_regs. For a call to
+ * another function, we have already deduced the register state on entry to the
+ * other function by unwinding to the start of that function. Given the
+ * register state on exit from this function plus the known register contents
+ * on entry to the next function, we can determine the stack pointer value on
+ * input to this function. That in turn lets us calculate the address of input
+ * registers that have been stored on stack, giving us the input parameters.
+ * Finally the stack pointer gives us the return address which is the exit
+ * point from the calling function, repeat the unwind process on that function.
+ *
+ * The data that tracks which registers contain input parameters is function
+ * global, not local to any basic block. To determine which input registers
+ * contain parameters, we have to decode the entire function. Otherwise an
+ * exit early in the function might not have read any parameters yet.
+ */
+
+/* Record memory contents in terms of the values that were passed to this
+ * function, IOW track which memory locations contain an input value. A memory
+ * location's contents can be undefined, it can contain an input register value
+ * or it can contain an offset from the original stack pointer.
+ *
+ * This structure is used to record register contents that have been stored in
+ * memory. Location (BBRG_OSP + 'offset_address') contains the input value
+ * from register 'value'. When 'value' is BBRG_OSP then offset_value contains
+ * the offset from the original stack pointer that was stored in this memory
+ * location. When 'value' is not BBRG_OSP then the memory location contains
+ * the original contents of an input register and offset_value is ignored.
+ *
+ * An input register 'value' can be stored in more than one register and/or in
+ * more than one memory location.
+ */
+
+struct bb_memory_contains
+{
+ short offset_address;
+ enum bb_reg_code value: 8;
+ short offset_value;
+};
+
+/* Track the register state in each basic block. */
+
+struct bb_reg_state
+{
+ /* Indexed by register value 'reg - BBRG_RAX' */
+ struct bb_reg_contains contains[KDB_INT_REGISTERS];
+ int ref_count;
+ int mem_count;
+ /* dynamic size for memory locations, see mem_count */
+ struct bb_memory_contains memory[0];
+};
+
+static struct bb_reg_state *bb_reg_state, *bb_exit_state;
+static int bb_reg_state_max, bb_reg_params, bb_memory_params;
+
+struct bb_actual
+{
+ bfd_vma value;
+ int valid;
+};
+
+/* Contains the actual hex value of a register, plus a valid bit. Indexed by
+ * register value 'reg - BBRG_RAX'
+ */
+static struct bb_actual bb_actual[KDB_INT_REGISTERS];
+
+static bfd_vma bb_func_start, bb_func_end;
+static bfd_vma bb_common_interrupt, bb_error_entry, bb_ret_from_intr,
+ bb_thread_return, bb_sync_regs, bb_save_v86_state,
+ bb__sched_text_start, bb__sched_text_end,
- bb_save_args;
++ bb_save_args, bb_save_rest, bb_save_paranoid;
+
+/* Record jmp instructions, both conditional and unconditional. These form the
+ * arcs between the basic blocks. This is also used to record the state when
+ * one block drops through into the next.
+ *
+ * A bb can have multiple associated bb_jmp entries, one for each jcc
+ * instruction plus at most one bb_jmp for the drop through case. If a bb
+ * drops through to the next bb then the drop through bb_jmp entry will be the
+ * last entry in the set of bb_jmp's that are associated with the bb. This is
+ * enforced by the fact that jcc entries are added during the disassembly phase
+ * of pass 1, the drop through entries are added near the end of pass 1.
+ *
+ * At address 'from' in this block, we have a jump to address 'to'. The
+ * register state at 'from' is copied to the target block.
+ */
+
+struct bb_jmp
+{
+ bfd_vma from;
+ bfd_vma to;
+ struct bb_reg_state *state;
+ unsigned int drop_through: 1;
+};
+
+struct bb
+{
+ bfd_vma start;
+ /* The end address of a basic block is sloppy. It can be the first
+ * byte of the last instruction in the block or it can be the last byte
+ * of the block.
+ */
+ bfd_vma end;
+ unsigned int changed: 1;
+ unsigned int drop_through: 1;
+};
+
+static struct bb **bb_list, *bb_curr;
+static int bb_max, bb_count;
+
+static struct bb_jmp *bb_jmp_list;
+static int bb_jmp_max, bb_jmp_count;
+
+/* Add a new bb entry to the list. This does an insert sort. */
+
+static struct bb *
+bb_new(bfd_vma order)
+{
+ int i, j;
+ struct bb *bb, *p;
+ if (bb_giveup)
+ return NULL;
+ if (bb_count == bb_max) {
+ struct bb **bb_list_new;
+ bb_max += 10;
+ bb_list_new = debug_kmalloc(bb_max*sizeof(*bb_list_new),
+ GFP_ATOMIC);
+ if (!bb_list_new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memcpy(bb_list_new, bb_list, bb_count*sizeof(*bb_list));
+ debug_kfree(bb_list);
+ bb_list = bb_list_new;
+ }
+ bb = debug_kmalloc(sizeof(*bb), GFP_ATOMIC);
+ if (!bb) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memset(bb, 0, sizeof(*bb));
+ for (i = 0; i < bb_count; ++i) {
+ p = bb_list[i];
+ if ((p->start && p->start > order) ||
+ (p->end && p->end > order))
+ break;
+ }
+ for (j = bb_count-1; j >= i; --j)
+ bb_list[j+1] = bb_list[j];
+ bb_list[i] = bb;
+ ++bb_count;
+ return bb;
+}
+
+/* Add a new bb_jmp entry to the list. This list is not sorted. */
+
+static struct bb_jmp *
+bb_jmp_new(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ struct bb_jmp *bb_jmp;
+ if (bb_giveup)
+ return NULL;
+ if (bb_jmp_count == bb_jmp_max) {
+ struct bb_jmp *bb_jmp_list_new;
+ bb_jmp_max += 10;
+ bb_jmp_list_new =
+ debug_kmalloc(bb_jmp_max*sizeof(*bb_jmp_list_new),
+ GFP_ATOMIC);
+ if (!bb_jmp_list_new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memcpy(bb_jmp_list_new, bb_jmp_list,
+ bb_jmp_count*sizeof(*bb_jmp_list));
+ debug_kfree(bb_jmp_list);
+ bb_jmp_list = bb_jmp_list_new;
+ }
+ bb_jmp = bb_jmp_list + bb_jmp_count++;
+ bb_jmp->from = from;
+ bb_jmp->to = to;
+ bb_jmp->drop_through = drop_through;
+ bb_jmp->state = NULL;
+ return bb_jmp;
+}
+
+static void
+bb_delete(int i)
+{
+ struct bb *bb = bb_list[i];
+ memcpy(bb_list+i, bb_list+i+1, (bb_count-i-1)*sizeof(*bb_list));
+ bb_list[--bb_count] = NULL;
+ debug_kfree(bb);
+}
+
+static struct bb *
+bb_add(bfd_vma start, bfd_vma end)
+{
+ int i;
+ struct bb *bb;
+ /* Ignore basic blocks whose start address is outside the current
+ * function. These occur for call instructions and for tail recursion.
+ */
+ if (start &&
+ (start < bb_func_start || start >= bb_func_end))
+ return NULL;
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ if ((start && bb->start == start) ||
+ (end && bb->end == end))
+ return bb;
+ }
+ bb = bb_new(start ? start : end);
+ if (bb) {
+ bb->start = start;
+ bb->end = end;
+ }
+ return bb;
+}
+
+static struct bb_jmp *
+bb_jmp_add(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ int i;
+ struct bb_jmp *bb_jmp;
+ for (i = 0, bb_jmp = bb_jmp_list; i < bb_jmp_count; ++i, ++bb_jmp) {
+ if (bb_jmp->from == from &&
+ bb_jmp->to == to &&
+ bb_jmp->drop_through == drop_through)
+ return bb_jmp;
+ }
+ bb_jmp = bb_jmp_new(from, to, drop_through);
+ return bb_jmp;
+}
+
+static unsigned long bb_curr_addr, bb_exit_addr;
+static char bb_buffer[256]; /* A bit too big to go on stack */
+
+/* Computed jmp uses 'jmp *addr(,%reg,[48])' where 'addr' is the start of a
+ * table of addresses that point into the current function. Run the table and
+ * generate bb starts for each target address plus a bb_jmp from this address
+ * to the target address.
+ *
+ * Only called for 'jmp' instructions, with the pointer starting at 'jmp'.
+ */
+
+static void
+bb_pass1_computed_jmp(char *p)
+{
+ unsigned long table, scale;
+ kdb_machreg_t addr;
+ struct bb* bb;
+ p += strcspn(p, " \t"); /* end of instruction */
+ p += strspn(p, " \t"); /* start of address */
+ if (*p++ != '*')
+ return;
+ table = simple_strtoul(p, &p, 0);
+ if (strncmp(p, "(,%", 3) != 0)
+ return;
+ p += 3;
+ p += strcspn(p, ","); /* end of reg */
+ if (*p++ != ',')
+ return;
+ scale = simple_strtoul(p, &p, 0);
+ if (scale != KDB_WORD_SIZE || strcmp(p, ")"))
+ return;
+ while (!bb_giveup) {
+ if (kdb_getword(&addr, table, sizeof(addr)))
+ return;
+ if (addr < bb_func_start || addr >= bb_func_end)
+ return;
+ bb = bb_add(addr, 0);
+ if (bb)
+ bb_jmp_add(bb_curr_addr, addr, 0);
+ table += KDB_WORD_SIZE;
+ }
+}
+
+/* Pass 1, identify the start and end of each basic block */
+
+static int
+bb_dis_pass1(PTR file, const char *fmt, ...)
+{
+ int l = strlen(bb_buffer);
+ char *p;
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ va_end(ap);
+ if ((p = strchr(bb_buffer, '\n'))) {
+ *p = '\0';
+ /* ret[q], iret[q], sysexit, sysret, ud2a or jmp[q] end a
+ * block. As does a call to a function marked noret.
+ */
+ p = bb_buffer;
+ p += strcspn(p, ":");
+ if (*p++ == ':') {
+ bb_fixup_switch_to(p);
+ p += strspn(p, " \t"); /* start of instruction */
+ if (strncmp(p, "ret", 3) == 0 ||
+ strncmp(p, "iret", 4) == 0 ||
+ strncmp(p, "sysexit", 7) == 0 ||
+ strncmp(p, "sysret", 6) == 0 ||
+ strncmp(p, "ud2a", 4) == 0 ||
+ strncmp(p, "jmp", 3) == 0) {
+ if (strncmp(p, "jmp", 3) == 0)
+ bb_pass1_computed_jmp(p);
+ bb_add(0, bb_curr_addr);
+ };
+ if (strncmp(p, "call", 4) == 0) {
+ strsep(&p, " \t"); /* end of opcode */
+ if (p)
+ p += strspn(p, " \t"); /* operand(s) */
+ if (p && strchr(p, '<')) {
+ p = strchr(p, '<') + 1;
+ *strchr(p, '>') = '\0';
+ if (bb_noret(p))
+ bb_add(0, bb_curr_addr);
+ }
+ };
+ }
+ bb_buffer[0] = '\0';
+ }
+ return 0;
+}
+
+static void
+bb_printaddr_pass1(bfd_vma addr, disassemble_info *dip)
+{
+ kdb_symtab_t symtab;
+ unsigned int offset;
+ struct bb* bb;
+ /* disasm only calls the printaddr routine for the target of jmp, loop
+ * or call instructions, i.e. the start of a basic block. call is
+ * ignored by bb_add because the target address is outside the current
+ * function.
+ */
+ dip->fprintf_func(dip->stream, "0x%lx", addr);
+ kdbnearsym(addr, &symtab);
+ if (symtab.sym_name) {
+ dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ if ((offset = addr - symtab.sym_start))
+ dip->fprintf_func(dip->stream, "+0x%x", offset);
+ dip->fprintf_func(dip->stream, ">");
+ }
+ bb = bb_add(addr, 0);
+ if (bb)
+ bb_jmp_add(bb_curr_addr, addr, 0);
+}
+
+static void
+bb_pass1(void)
+{
+ int i;
+ unsigned long addr;
+ struct bb *bb;
+ struct bb_jmp *bb_jmp;
+
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: func_name %s func_start " kdb_bfd_vma_fmt0
+ " func_end " kdb_bfd_vma_fmt0 "\n",
+ __FUNCTION__,
+ bb_func_name,
+ bb_func_start,
+ bb_func_end);
+ kdb_di.fprintf_func = bb_dis_pass1;
+ kdb_di.print_address_func = bb_printaddr_pass1;
+
+ bb_add(bb_func_start, 0);
+ for (bb_curr_addr = bb_func_start;
+ bb_curr_addr < bb_func_end;
+ ++bb_curr_addr) {
+ unsigned char c;
+ if (kdb_getarea(c, bb_curr_addr)) {
+ kdb_printf("%s: unreadable function code at ",
+ __FUNCTION__);
+ kdb_symbol_print(bb_curr_addr, NULL, KDB_SP_DEFAULT);
+ kdb_printf(", giving up\n");
+ bb_giveup = 1;
+ return;
+ }
+ }
+ for (addr = bb_func_start; addr < bb_func_end; ) {
+ bb_curr_addr = addr;
+ addr += kdba_id_printinsn(addr, &kdb_di);
+ kdb_di.fprintf_func(NULL, "\n");
+ }
+ if (bb_giveup)
+ goto out;
+
+ /* Special case: a block consisting of a single instruction which is
+ * both the target of a jmp and is also an ending instruction, so we
+ * add two blocks using the same address, one as a start and one as an
+ * end, in no guaranteed order. The end must be ordered after the
+ * start.
+ */
+ for (i = 0; i < bb_count-1; ++i) {
+ struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ if (bb1->end && bb1->end == bb2->start) {
+ bb = bb_list[i+1];
+ bb_list[i+1] = bb_list[i];
+ bb_list[i] = bb;
+ }
+ }
+
+ /* Some bb have a start address, some have an end address. Collapse
+ * them into entries that have both start and end addresses. The first
+ * entry is guaranteed to have a start address.
+ */
+ for (i = 0; i < bb_count-1; ++i) {
+ struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ if (bb1->end)
+ continue;
+ if (bb2->start) {
+ bb1->end = bb2->start - 1;
+ bb1->drop_through = 1;
+ bb_jmp_add(bb1->end, bb2->start, 1);
+ } else {
+ bb1->end = bb2->end;
+ bb_delete(i+1);
+ }
+ }
+ bb = bb_list[bb_count-1];
+ if (!bb->end)
+ bb->end = bb_func_end - 1;
+
+ /* It would be nice to check that all bb have a valid start and end
+ * address but there is just too much garbage code in the kernel to do
+ * that check. Aligned functions in assembler code mean that there is
+ * space between the end of one function and the start of the next and
+ * that space contains previous code from the assembler's buffers. It
+ * looks like dead code with nothing that branches to it, so no start
+ * address. do_sys_vm86() ends with 'jmp resume_userspace' which the C
+ * compiler does not know about so gcc appends the normal exit code,
+ * again nothing branches to this dangling code.
+ *
+ * The best we can do is delete bb entries with no start address.
+ */
+ for (i = 0; i < bb_count; ++i) {
+ struct bb *bb = bb_list[i];
+ if (!bb->start)
+ bb_delete(i--);
+ }
+ for (i = 0; i < bb_count; ++i) {
+ struct bb *bb = bb_list[i];
+ if (!bb->end) {
+ kdb_printf("%s: incomplete bb state\n", __FUNCTION__);
+ bb_giveup = 1;
+ goto debug;
+ }
+ }
+
+out:
+ if (!KDB_DEBUG(BB))
+ return;
+debug:
+ kdb_printf("%s: end\n", __FUNCTION__);
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ kdb_printf(" bb[%d] start "
+ kdb_bfd_vma_fmt0
+ " end " kdb_bfd_vma_fmt0
+ " drop_through %d",
+ i, bb->start, bb->end, bb->drop_through);
+ kdb_printf("\n");
+ }
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ kdb_printf(" bb_jmp[%d] from "
+ kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d\n",
+ i, bb_jmp->from, bb_jmp->to, bb_jmp->drop_through);
+ }
+}
+
+/* Pass 2, record register changes in each basic block */
+
+/* For each opcode that we care about, indicate how it uses its operands. Most
+ * opcodes can be handled generically because they completely specify their
+ * operands in the instruction, however many opcodes have side effects such as
+ * reading or writing rax or updating rsp. Instructions that change registers
+ * that are not listed in the operands must be handled as special cases. In
+ * addition, instructions that copy registers while preserving their contents
+ * (push, pop, mov) or change the contents in a well defined way (add with an
+ * immediate, lea) must be handled as special cases in order to track the
+ * register contents.
+ *
+ * The tables below only list opcodes that are actually used in the Linux
+ * kernel, so they omit most of the floating point and all of the SSE type
+ * instructions. The operand usage entries only cater for accesses to memory
+ * and to the integer registers, accesses to floating point registers and flags
+ * are not relevant for kernel backtraces.
+ */
+
+enum bb_operand_usage {
+ BBOU_UNKNOWN = 0,
+ /* generic entries. because xchg can do any combinations of
+ * read src, write src, read dst and write dst we need to
+ * define all 16 possibilities. These are ordered by rs = 1,
+ * rd = 2, ws = 4, wd = 8, bb_usage_x*() functions rely on this
+ * order.
+ */
+ BBOU_RS = 1, /* read src */ /* 1 */
+ BBOU_RD, /* read dst */ /* 2 */
+ BBOU_RSRD, /* 3 */
+ BBOU_WS, /* write src */ /* 4 */
+ BBOU_RSWS, /* 5 */
+ BBOU_RDWS, /* 6 */
+ BBOU_RSRDWS, /* 7 */
+ BBOU_WD, /* write dst */ /* 8 */
+ BBOU_RSWD, /* 9 */
+ BBOU_RDWD, /* 10 */
+ BBOU_RSRDWD, /* 11 */
+ BBOU_WSWD, /* 12 */
+ BBOU_RSWSWD, /* 13 */
+ BBOU_RDWSWD, /* 14 */
+ BBOU_RSRDWSWD, /* 15 */
+ /* opcode specific entries */
+ BBOU_ADD,
++ BBOU_AND,
+ BBOU_CALL,
+ BBOU_CBW,
+ BBOU_CMOV,
+ BBOU_CMPXCHG,
+ BBOU_CMPXCHGD,
+ BBOU_CPUID,
+ BBOU_CWD,
+ BBOU_DIV,
+ BBOU_IDIV,
+ BBOU_IMUL,
+ BBOU_IRET,
+ BBOU_JMP,
+ BBOU_LAHF,
+ BBOU_LEA,
+ BBOU_LEAVE,
+ BBOU_LODS,
+ BBOU_LOOP,
+ BBOU_LSS,
+ BBOU_MONITOR,
+ BBOU_MOV,
+ BBOU_MOVS,
+ BBOU_MUL,
+ BBOU_MWAIT,
+ BBOU_NOP,
+ BBOU_OUTS,
+ BBOU_POP,
+ BBOU_POPF,
+ BBOU_PUSH,
+ BBOU_PUSHF,
+ BBOU_RDMSR,
+ BBOU_RDTSC,
+ BBOU_RET,
+ BBOU_SAHF,
+ BBOU_SCAS,
+ BBOU_SUB,
+ BBOU_SYSEXIT,
+ BBOU_SYSRET,
+ BBOU_WRMSR,
+ BBOU_XADD,
+ BBOU_XCHG,
+ BBOU_XOR,
+};
+
+struct bb_opcode_usage {
+ int length;
+ enum bb_operand_usage usage;
+ const char *opcode;
+};
+
+/* This table is sorted in alphabetical order of opcode, except that the
+ * trailing '"' is treated as a high value. For example, 'in' sorts after
+ * 'inc', 'bt' after 'btc'. This modified sort order ensures that shorter
+ * opcodes come after long ones. A normal sort would put 'in' first, so 'in'
+ * would match both 'inc' and 'in'. When adding any new entries to this table,
+ * be careful to put shorter entries last in their group.
+ *
+ * To automatically sort the table (in vi)
+ * Mark the first and last opcode line with 'a and 'b
+ * 'a
+ * !'bsed -e 's/"}/}}/' | LANG=C sort -t '"' -k2 | sed -e 's/}}/"}/'
+ *
+ * If a new instruction has to be added, first consider if it affects registers
+ * other than those listed in the operands. Also consider if you want to track
+ * the results of issuing the instruction, IOW can you extract useful
+ * information by looking in detail at the modified registers or memory. If
+ * either test is true then you need a special case to handle the instruction.
+ *
+ * The generic entries at the start of enum bb_operand_usage all have one thing
+ * in common, if a register or memory location is updated then that location
+ * becomes undefined, i.e. we lose track of anything that was previously saved
+ * in that location. So only use a generic BBOU_* value when the result of the
+ * instruction cannot be calculated exactly _and_ when all the affected
+ * registers are listed in the operands.
+ *
+ * Examples:
+ *
+ * 'call' does not generate a known result, but as a side effect of call,
+ * several scratch registers become undefined, so it needs a special BBOU_CALL
+ * entry.
+ *
+ * 'adc' generates a variable result, it depends on the carry flag, so 'adc'
+ * gets a generic entry. 'add' can generate an exact result (add with
+ * immediate on a register that points to the stack) or it can generate an
+ * unknown result (add a variable, or add immediate to a register that does not
+ * contain a stack pointer) so 'add' has its own BBOU_ADD entry.
+ */
+
+static const struct bb_opcode_usage
+bb_opcode_usage_all[] = {
+ {3, BBOU_RSRDWD, "adc"},
+ {3, BBOU_ADD, "add"},
- {3, BBOU_RSRDWD, "and"},
++ {3, BBOU_AND, "and"},
+ {3, BBOU_RSWD, "bsf"},
+ {3, BBOU_RSWD, "bsr"},
+ {5, BBOU_RSWS, "bswap"},
+ {3, BBOU_RSRDWD, "btc"},
+ {3, BBOU_RSRDWD, "btr"},
+ {3, BBOU_RSRDWD, "bts"},
+ {2, BBOU_RSRD, "bt"},
+ {4, BBOU_CALL, "call"},
+ {4, BBOU_CBW, "cbtw"}, /* Intel cbw */
+ {3, BBOU_NOP, "clc"},
+ {3, BBOU_NOP, "cld"},
+ {7, BBOU_RS, "clflush"},
+ {4, BBOU_NOP, "clgi"},
+ {3, BBOU_NOP, "cli"},
+ {4, BBOU_CWD, "cltd"}, /* Intel cdq */
+ {4, BBOU_CBW, "cltq"}, /* Intel cdqe */
+ {4, BBOU_NOP, "clts"},
+ {4, BBOU_CMOV, "cmov"},
+ {9, BBOU_CMPXCHGD,"cmpxchg16"},
+ {8, BBOU_CMPXCHGD,"cmpxchg8"},
+ {7, BBOU_CMPXCHG, "cmpxchg"},
+ {3, BBOU_RSRD, "cmp"},
+ {5, BBOU_CPUID, "cpuid"},
+ {4, BBOU_CWD, "cqto"}, /* Intel cdo */
+ {4, BBOU_CWD, "cwtd"}, /* Intel cwd */
+ {4, BBOU_CBW, "cwtl"}, /* Intel cwde */
+ {4, BBOU_NOP, "data"}, /* alternative ASM_NOP<n> generates data16 on x86_64 */
+ {3, BBOU_RSWS, "dec"},
+ {3, BBOU_DIV, "div"},
+ {5, BBOU_RS, "fdivl"},
+ {5, BBOU_NOP, "finit"},
+ {6, BBOU_RS, "fistpl"},
+ {4, BBOU_RS, "fldl"},
+ {4, BBOU_RS, "fmul"},
+ {6, BBOU_NOP, "fnclex"},
+ {6, BBOU_NOP, "fninit"},
+ {6, BBOU_RS, "fnsave"},
+ {7, BBOU_NOP, "fnsetpm"},
+ {6, BBOU_RS, "frstor"},
+ {5, BBOU_WS, "fstsw"},
+ {5, BBOU_RS, "fsubp"},
+ {5, BBOU_NOP, "fwait"},
+ {7, BBOU_RS, "fxrstor"},
+ {6, BBOU_RS, "fxsave"},
+ {3, BBOU_NOP, "hlt"},
+ {4, BBOU_IDIV, "idiv"},
+ {4, BBOU_IMUL, "imul"},
+ {3, BBOU_RSWS, "inc"},
+ {3, BBOU_NOP, "int"},
+ {7, BBOU_RSRD, "invlpga"},
+ {6, BBOU_RS, "invlpg"},
+ {2, BBOU_RSWD, "in"},
+ {4, BBOU_IRET, "iret"},
+ {1, BBOU_JMP, "j"},
+ {4, BBOU_LAHF, "lahf"},
+ {3, BBOU_RSWD, "lar"},
+ {5, BBOU_RS, "lcall"},
+ {5, BBOU_LEAVE, "leave"},
+ {3, BBOU_LEA, "lea"},
+ {6, BBOU_NOP, "lfence"},
+ {4, BBOU_RS, "lgdt"},
+ {4, BBOU_RS, "lidt"},
+ {4, BBOU_RS, "ljmp"},
+ {4, BBOU_RS, "lldt"},
+ {4, BBOU_RS, "lmsw"},
+ {4, BBOU_LODS, "lods"},
+ {4, BBOU_LOOP, "loop"},
+ {4, BBOU_NOP, "lret"},
+ {3, BBOU_RSWD, "lsl"},
+ {3, BBOU_LSS, "lss"},
+ {3, BBOU_RS, "ltr"},
+ {6, BBOU_NOP, "mfence"},
+ {7, BBOU_MONITOR, "monitor"},
+ {4, BBOU_MOVS, "movs"},
+ {3, BBOU_MOV, "mov"},
+ {3, BBOU_MUL, "mul"},
+ {5, BBOU_MWAIT, "mwait"},
+ {3, BBOU_RSWS, "neg"},
+ {3, BBOU_NOP, "nop"},
+ {3, BBOU_RSWS, "not"},
+ {2, BBOU_RSRDWD, "or"},
+ {4, BBOU_OUTS, "outs"},
+ {3, BBOU_RSRD, "out"},
+ {5, BBOU_NOP, "pause"},
+ {4, BBOU_POPF, "popf"},
+ {3, BBOU_POP, "pop"},
+ {8, BBOU_RS, "prefetch"},
+ {5, BBOU_PUSHF, "pushf"},
+ {4, BBOU_PUSH, "push"},
+ {3, BBOU_RSRDWD, "rcl"},
+ {3, BBOU_RSRDWD, "rcr"},
+ {5, BBOU_RDMSR, "rdmsr"},
+ {5, BBOU_RDMSR, "rdpmc"}, /* same side effects as rdmsr */
+ {5, BBOU_RDTSC, "rdtsc"},
+ {3, BBOU_RET, "ret"},
+ {3, BBOU_RSRDWD, "rol"},
+ {3, BBOU_RSRDWD, "ror"},
+ {4, BBOU_SAHF, "sahf"},
+ {3, BBOU_RSRDWD, "sar"},
+ {3, BBOU_RSRDWD, "sbb"},
+ {4, BBOU_SCAS, "scas"},
+ {3, BBOU_WS, "set"},
+ {6, BBOU_NOP, "sfence"},
+ {4, BBOU_WS, "sgdt"},
+ {3, BBOU_RSRDWD, "shl"},
+ {3, BBOU_RSRDWD, "shr"},
+ {4, BBOU_WS, "sidt"},
+ {4, BBOU_WS, "sldt"},
+ {3, BBOU_NOP, "stc"},
+ {3, BBOU_NOP, "std"},
+ {4, BBOU_NOP, "stgi"},
+ {3, BBOU_NOP, "sti"},
+ {4, BBOU_SCAS, "stos"},
+ {4, BBOU_WS, "strl"},
+ {3, BBOU_WS, "str"},
+ {3, BBOU_SUB, "sub"},
+ {6, BBOU_NOP, "swapgs"},
+ {7, BBOU_SYSEXIT, "sysexit"},
+ {6, BBOU_SYSRET, "sysret"},
+ {4, BBOU_NOP, "test"},
+ {4, BBOU_NOP, "ud2a"},
+ {7, BBOU_RS, "vmclear"},
+ {8, BBOU_NOP, "vmlaunch"},
+ {6, BBOU_RS, "vmload"},
+ {7, BBOU_RS, "vmptrld"},
+ {6, BBOU_WD, "vmread"}, /* vmread src is an encoding, not a register */
+ {8, BBOU_NOP, "vmresume"},
+ {5, BBOU_RS, "vmrun"},
+ {6, BBOU_RS, "vmsave"},
+ {7, BBOU_WD, "vmwrite"}, /* vmwrite src is an encoding, not a register */
++ {3, BBOU_NOP, "vmxoff"},
+ {6, BBOU_NOP, "wbinvd"},
+ {5, BBOU_WRMSR, "wrmsr"},
+ {4, BBOU_XADD, "xadd"},
+ {4, BBOU_XCHG, "xchg"},
+ {3, BBOU_XOR, "xor"},
++ {4, BBOU_NOP, "xrstor"},
++ {4, BBOU_NOP, "xsave"},
+ {10, BBOU_WS, "xstore-rng"},
+};
+
+/* To speed up searching, index bb_opcode_usage_all by the first letter of each
+ * opcode.
+ */
+static struct {
+ const struct bb_opcode_usage *opcode;
+ int size;
+} bb_opcode_usage[26];
+
+struct bb_operand {
+ char *base;
+ char *index;
+ char *segment;
+ long disp;
+ unsigned int scale;
+ enum bb_reg_code base_rc; /* UNDEFINED or RAX through R15 */
+ enum bb_reg_code index_rc; /* UNDEFINED or RAX through R15 */
+ unsigned int present :1;
+ unsigned int disp_present :1;
+ unsigned int indirect :1; /* must be combined with reg or memory */
+ unsigned int immediate :1; /* exactly one of these 3 must be set */
+ unsigned int reg :1;
+ unsigned int memory :1;
+};
+
+struct bb_decode {
+ char *prefix;
+ char *opcode;
+ const struct bb_opcode_usage *match;
+ struct bb_operand src;
+ struct bb_operand dst;
+ struct bb_operand dst2;
+};
+
+static struct bb_decode bb_decode;
+
+static enum bb_reg_code
+bb_reg_map(const char *reg)
+{
+ int lo, hi, c;
+ const struct bb_reg_code_map *p;
+ lo = 0;
+ hi = ARRAY_SIZE(bb_reg_code_map) - 1;
+ while (lo <= hi) {
+ int mid = (hi + lo) / 2;
+ p = bb_reg_code_map + mid;
+ c = strcmp(p->name, reg+1);
+ if (c == 0)
+ return p->reg;
+ else if (c > 0)
+ hi = mid - 1;
+ else
+ lo = mid + 1;
+ }
+ return BBRG_UNDEFINED;
+}
+
+static void
+bb_parse_operand(char *str, struct bb_operand *operand)
+{
+ char *p = str;
+ int sign = 1;
+ operand->present = 1;
+ /* extract any segment prefix */
+ if (p[0] == '%' && p[1] && p[2] == 's' && p[3] == ':') {
+ operand->memory = 1;
+ operand->segment = p;
+ p[3] = '\0';
+ p += 4;
+ }
+ /* extract displacement, base, index, scale */
+ if (*p == '*') {
+ /* jmp/call *disp(%reg), *%reg or *0xnnn */
+ operand->indirect = 1;
+ ++p;
+ }
+ if (*p == '-') {
+ sign = -1;
+ ++p;
+ }
+ if (*p == '$') {
+ operand->immediate = 1;
+ operand->disp_present = 1;
+ operand->disp = simple_strtoul(p+1, &p, 0);
+ } else if (isdigit(*p)) {
+ operand->memory = 1;
+ operand->disp_present = 1;
+ operand->disp = simple_strtoul(p, &p, 0) * sign;
+ }
+ if (*p == '%') {
+ operand->reg = 1;
+ operand->base = p;
+ } else if (*p == '(') {
+ operand->memory = 1;
+ operand->base = ++p;
+ p += strcspn(p, ",)");
+ if (p == operand->base)
+ operand->base = NULL;
+ if (*p == ',') {
+ *p = '\0';
+ operand->index = ++p;
+ p += strcspn(p, ",)");
+ if (p == operand->index)
+ operand->index = NULL;
+ }
+ if (*p == ',') {
+ *p = '\0';
+ operand->scale = simple_strtoul(p+1, &p, 0);
+ }
+ *p = '\0';
+ } else if (*p) {
+ kdb_printf("%s: unexpected token '%c' after disp '%s'\n",
+ __FUNCTION__, *p, str);
+ bb_giveup = 1;
+ }
+ if ((operand->immediate + operand->reg + operand->memory != 1) ||
+ (operand->indirect && operand->immediate)) {
+ kdb_printf("%s: incorrect decode '%s' N %d I %d R %d M %d\n",
+ __FUNCTION__, str,
+ operand->indirect, operand->immediate, operand->reg,
+ operand->memory);
+ bb_giveup = 1;
+ }
+ if (operand->base)
+ operand->base_rc = bb_reg_map(operand->base);
+ if (operand->index)
+ operand->index_rc = bb_reg_map(operand->index);
+}
+
+static void
+bb_print_operand(const char *type, const struct bb_operand *operand)
+{
+ if (!operand->present)
+ return;
+ kdb_printf(" %s %c%c: ",
+ type,
+ operand->indirect ? 'N' : ' ',
+ operand->immediate ? 'I' :
+ operand->reg ? 'R' :
+ operand->memory ? 'M' :
+ '?'
+ );
+ if (operand->segment)
+ kdb_printf("%s:", operand->segment);
+ if (operand->immediate) {
+ kdb_printf("$0x%lx", operand->disp);
+ } else if (operand->reg) {
+ if (operand->indirect)
+ kdb_printf("*");
+ kdb_printf("%s", operand->base);
+ } else if (operand->memory) {
+ if (operand->indirect && (operand->base || operand->index))
+ kdb_printf("*");
+ if (operand->disp_present) {
+ kdb_printf("0x%lx", operand->disp);
+ }
+ if (operand->base || operand->index || operand->scale) {
+ kdb_printf("(");
+ if (operand->base)
+ kdb_printf("%s", operand->base);
+ if (operand->index || operand->scale)
+ kdb_printf(",");
+ if (operand->index)
+ kdb_printf("%s", operand->index);
+ if (operand->scale)
+ kdb_printf(",%d", operand->scale);
+ kdb_printf(")");
+ }
+ }
+ if (operand->base_rc)
+ kdb_printf(" base_rc %d (%s)",
+ operand->base_rc, bbrg_name[operand->base_rc]);
+ if (operand->index_rc)
+ kdb_printf(" index_rc %d (%s)",
+ operand->index_rc,
+ bbrg_name[operand->index_rc]);
+ kdb_printf("\n");
+}
+
+static void
+bb_print_opcode(void)
+{
+ const struct bb_opcode_usage *o = bb_decode.match;
+ kdb_printf(" ");
+ if (bb_decode.prefix)
+ kdb_printf("%s ", bb_decode.prefix);
+ kdb_printf("opcode '%s' matched by '%s', usage %d\n",
+ bb_decode.opcode, o->opcode, o->usage);
+}
+
+static int
+bb_parse_opcode(void)
+{
+ int c, i;
+ const struct bb_opcode_usage *o;
+ static int bb_parse_opcode_error_limit = 5;
+ c = bb_decode.opcode[0] - 'a';
+ if (c < 0 || c >= ARRAY_SIZE(bb_opcode_usage))
+ goto nomatch;
+ o = bb_opcode_usage[c].opcode;
+ if (!o)
+ goto nomatch;
+ for (i = 0; i < bb_opcode_usage[c].size; ++i, ++o) {
+ if (strncmp(bb_decode.opcode, o->opcode, o->length) == 0) {
+ bb_decode.match = o;
+ if (KDB_DEBUG(BB))
+ bb_print_opcode();
+ return 0;
+ }
+ }
+nomatch:
+ if (!bb_parse_opcode_error_limit)
+ return 1;
+ --bb_parse_opcode_error_limit;
+ kdb_printf("%s: no match at [%s]%s " kdb_bfd_vma_fmt0 " - '%s'\n",
+ __FUNCTION__,
+ bb_mod_name, bb_func_name, bb_curr_addr,
+ bb_decode.opcode);
+ return 1;
+}
+
+static bool
+bb_is_int_reg(enum bb_reg_code reg)
+{
+ return reg >= BBRG_RAX && reg < (BBRG_RAX + KDB_INT_REGISTERS);
+}
+
+static bool
+bb_is_simple_memory(const struct bb_operand *operand)
+{
+ return operand->memory &&
+ bb_is_int_reg(operand->base_rc) &&
+ !operand->index_rc &&
+ operand->scale == 0 &&
+ !operand->segment;
+}
+
+static bool
+bb_is_static_disp(const struct bb_operand *operand)
+{
+ return operand->memory &&
+ !operand->base_rc &&
+ !operand->index_rc &&
+ operand->scale == 0 &&
+ !operand->segment &&
+ !operand->indirect;
+}
+
+static enum bb_reg_code
+bb_reg_code_value(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_reg_state->contains[reg - BBRG_RAX].value;
+}
+
+static short
+bb_reg_code_offset(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_reg_state->contains[reg - BBRG_RAX].offset;
+}
+
+static void
+bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src)
+{
+ BB_CHECK(!bb_is_int_reg(dst), dst, );
+ bb_reg_state->contains[dst - BBRG_RAX].value = src;
+}
+
+static void
+bb_reg_code_set_offset(enum bb_reg_code dst, short offset)
+{
+ BB_CHECK(!bb_is_int_reg(dst), dst, );
+ bb_reg_state->contains[dst - BBRG_RAX].offset = offset;
+}
+
+static bool
+bb_is_osp_defined(enum bb_reg_code reg)
+{
+ if (bb_is_int_reg(reg))
+ return bb_reg_code_value(reg) == BBRG_OSP;
+ else
+ return 0;
+}
+
+static bfd_vma
+bb_actual_value(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_actual[reg - BBRG_RAX].value;
+}
+
+static int
+bb_actual_valid(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_actual[reg - BBRG_RAX].valid;
+}
+
+static void
+bb_actual_set_value(enum bb_reg_code reg, bfd_vma value)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, );
+ bb_actual[reg - BBRG_RAX].value = value;
+}
+
+static void
+bb_actual_set_valid(enum bb_reg_code reg, int valid)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, );
+ bb_actual[reg - BBRG_RAX].valid = valid;
+}
+
+/* The scheduler code switches RSP then does PUSH, it is not an error for RSP
+ * to be undefined in this area of the code.
+ */
+static bool
+bb_is_scheduler_address(void)
+{
+ return bb_curr_addr >= bb__sched_text_start &&
+ bb_curr_addr < bb__sched_text_end;
+}
+
+static void
+bb_reg_read(enum bb_reg_code reg)
+{
+ int i, r = 0;
+ if (!bb_is_int_reg(reg) ||
+ bb_reg_code_value(reg) != reg)
+ return;
+ for (i = 0;
+ i < min_t(unsigned int, REGPARM, ARRAY_SIZE(bb_param_reg));
+ ++i) {
+ if (reg == bb_param_reg[i]) {
+ r = i + 1;
+ break;
+ }
+ }
+ bb_reg_params = max(bb_reg_params, r);
+}
+
+static void
+bb_do_reg_state_print(const struct bb_reg_state *s)
+{
+ int i, offset_address, offset_value;
+ const struct bb_memory_contains *c;
+ enum bb_reg_code value;
+ kdb_printf(" bb_reg_state %p\n", s);
+ for (i = 0; i < ARRAY_SIZE(s->contains); ++i) {
+ value = s->contains[i].value;
+ offset_value = s->contains[i].offset;
+ kdb_printf(" %s = %s",
+ bbrg_name[i + BBRG_RAX], bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ kdb_printf("\n");
+ }
+ for (i = 0, c = s->memory; i < s->mem_count; ++i, ++c) {
+ offset_address = c->offset_address;
+ value = c->value;
+ offset_value = c->offset_value;
+ kdb_printf(" slot %d offset_address %c0x%x %s",
+ i,
+ offset_address >= 0 ? '+' : '-',
+ offset_address >= 0 ? offset_address : -offset_address,
+ bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ kdb_printf("\n");
+ }
+}
+
+static void
+bb_reg_state_print(const struct bb_reg_state *s)
+{
+ if (KDB_DEBUG(BB))
+ bb_do_reg_state_print(s);
+}
+
+/* Set register 'dst' to contain the value from 'src'. This includes reading
+ * from 'src' and writing to 'dst'. The offset value is copied iff 'src'
+ * contains a stack pointer.
+ *
+ * Be very careful about the context here. 'dst' and 'src' reflect integer
+ * registers by name, _not_ by the value of their contents. "mov %rax,%rsi"
+ * will call this function as bb_reg_set_reg(BBRG_RSI, BBRG_RAX), which
+ * reflects what the assembler code is doing. However we need to track the
+ * _values_ in the registers, not their names. IOW, we really care about "what
+ * value does rax contain when it is copied into rsi?", so we can record the
+ * fact that we now have two copies of that value, one in rax and one in rsi.
+ */
+
+static void
+bb_reg_set_reg(enum bb_reg_code dst, enum bb_reg_code src)
+{
+ enum bb_reg_code src_value = BBRG_UNDEFINED;
+ short offset_value = 0;
+ KDB_DEBUG_BB(" %s = %s", bbrg_name[dst], bbrg_name[src]);
+ if (bb_is_int_reg(src)) {
+ bb_reg_read(src);
+ src_value = bb_reg_code_value(src);
+ KDB_DEBUG_BB(" (%s", bbrg_name[src_value]);
+ if (bb_is_osp_defined(src)) {
+ offset_value = bb_reg_code_offset(src);
+ KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ }
+ KDB_DEBUG_BB(")");
+ }
+ if (bb_is_int_reg(dst)) {
+ bb_reg_code_set_value(dst, src_value);
+ bb_reg_code_set_offset(dst, offset_value);
+ }
+ KDB_DEBUG_BB("\n");
+}
+
+static void
+bb_reg_set_undef(enum bb_reg_code dst)
+{
+ bb_reg_set_reg(dst, BBRG_UNDEFINED);
+}
+
+/* Delete any record of a stored register held in osp + 'offset' */
+
+static void
+bb_delete_memory(short offset)
+{
+ int i;
+ struct bb_memory_contains *c;
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++c) {
+ if (c->offset_address == offset &&
+ c->value != BBRG_UNDEFINED) {
+ KDB_DEBUG_BB(" delete %s from ",
+ bbrg_name[c->value]);
+ KDB_DEBUG_BB_OFFSET(offset, "osp", "");
+ KDB_DEBUG_BB(" slot %d\n",
+ (int)(c - bb_reg_state->memory));
+ memset(c, BBRG_UNDEFINED, sizeof(*c));
+ if (i == bb_reg_state->mem_count - 1)
+ --bb_reg_state->mem_count;
+ }
+ }
+}
+
+/* Set memory location *('dst' + 'offset_address') to contain the supplied
+ * value and offset. 'dst' is assumed to be a register that contains a stack
+ * pointer.
+ */
+
+static void
+bb_memory_set_reg_value(enum bb_reg_code dst, short offset_address,
+ enum bb_reg_code value, short offset_value)
+{
+ int i;
+ struct bb_memory_contains *c, *free = NULL;
+ BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ KDB_DEBUG_BB(" *(%s", bbrg_name[dst]);
+ KDB_DEBUG_BB_OFFSET(offset_address, "", "");
+ offset_address += bb_reg_code_offset(dst);
+ KDB_DEBUG_BB_OFFSET(offset_address, " osp", ") = ");
+ KDB_DEBUG_BB("%s", bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state_max;
+ ++i, ++c) {
+ if (c->offset_address == offset_address)
+ free = c;
+ else if (c->value == BBRG_UNDEFINED && !free)
+ free = c;
+ }
+ if (!free) {
+ struct bb_reg_state *new, *old = bb_reg_state;
+ size_t old_size, new_size;
+ int slot;
+ old_size = sizeof(*old) + bb_reg_state_max *
+ sizeof(old->memory[0]);
+ slot = bb_reg_state_max;
+ bb_reg_state_max += 5;
+ new_size = sizeof(*new) + bb_reg_state_max *
+ sizeof(new->memory[0]);
+ new = debug_kmalloc(new_size, GFP_ATOMIC);
+ if (!new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ } else {
+ memcpy(new, old, old_size);
+ memset((char *)new + old_size, BBRG_UNDEFINED,
+ new_size - old_size);
+ bb_reg_state = new;
+ debug_kfree(old);
+ free = bb_reg_state->memory + slot;
+ }
+ }
+ if (free) {
+ int slot = free - bb_reg_state->memory;
+ free->offset_address = offset_address;
+ free->value = value;
+ free->offset_value = offset_value;
+ KDB_DEBUG_BB(" slot %d", slot);
+ bb_reg_state->mem_count = max(bb_reg_state->mem_count, slot+1);
+ }
+ KDB_DEBUG_BB("\n");
+}
+
+/* Set memory location *('dst' + 'offset') to contain the value from register
+ * 'src'. 'dst' is assumed to be a register that contains a stack pointer.
+ * This differs from bb_memory_set_reg_value because it takes a src register
+ * which contains a value and possibly an offset, bb_memory_set_reg_value is
+ * passed the value and offset directly.
+ */
+
+static void
+bb_memory_set_reg(enum bb_reg_code dst, enum bb_reg_code src,
+ short offset_address)
+{
+ int offset_value;
+ enum bb_reg_code value;
+ BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ if (!bb_is_int_reg(src))
+ return;
+ value = bb_reg_code_value(src);
+ if (value == BBRG_UNDEFINED) {
+ bb_delete_memory(offset_address + bb_reg_code_offset(dst));
+ return;
+ }
+ offset_value = bb_reg_code_offset(src);
+ bb_reg_read(src);
+ bb_memory_set_reg_value(dst, offset_address, value, offset_value);
+}
+
+/* Set register 'dst' to contain the value from memory *('src' + offset_address).
+ * 'src' is assumed to be a register that contains a stack pointer.
+ */
+
+static void
+bb_reg_set_memory(enum bb_reg_code dst, enum bb_reg_code src, short offset_address)
+{
+ int i, defined = 0;
+ struct bb_memory_contains *s;
+ BB_CHECK(!bb_is_osp_defined(src), src, );
+ KDB_DEBUG_BB(" %s = *(%s",
+ bbrg_name[dst], bbrg_name[src]);
+ KDB_DEBUG_BB_OFFSET(offset_address, "", ")");
+ offset_address += bb_reg_code_offset(src);
+ KDB_DEBUG_BB_OFFSET(offset_address, " (osp", ")");
+ for (i = 0, s = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++s) {
+ if (s->offset_address == offset_address && bb_is_int_reg(dst)) {
+ bb_reg_code_set_value(dst, s->value);
+ KDB_DEBUG_BB(" value %s", bbrg_name[s->value]);
+ if (s->value == BBRG_OSP) {
+ bb_reg_code_set_offset(dst, s->offset_value);
+ KDB_DEBUG_BB_OFFSET(s->offset_value, "", "");
+ } else {
+ bb_reg_code_set_offset(dst, 0);
+ }
+ defined = 1;
+ }
+ }
+ if (!defined)
+ bb_reg_set_reg(dst, BBRG_UNDEFINED);
+ else
+ KDB_DEBUG_BB("\n");
+}
+
+/* A generic read from an operand. */
+
+static void
+bb_read_operand(const struct bb_operand *operand)
+{
+ int m = 0;
+ if (operand->base_rc)
+ bb_reg_read(operand->base_rc);
+ if (operand->index_rc)
+ bb_reg_read(operand->index_rc);
+ if (bb_is_simple_memory(operand) &&
+ bb_is_osp_defined(operand->base_rc) &&
+ bb_decode.match->usage != BBOU_LEA) {
+ m = (bb_reg_code_offset(operand->base_rc) + operand->disp +
+ KDB_WORD_SIZE - 1) / KDB_WORD_SIZE;
+ bb_memory_params = max(bb_memory_params, m);
+ }
+}
+
+/* A generic write to an operand, resulting in an undefined value in that
+ * location. All well defined operands are handled separately, this function
+ * only handles the opcodes where the result is undefined.
+ */
+
+static void
+bb_write_operand(const struct bb_operand *operand)
+{
+ enum bb_reg_code base_rc = operand->base_rc;
+ if (operand->memory) {
+ if (base_rc)
+ bb_reg_read(base_rc);
+ if (operand->index_rc)
+ bb_reg_read(operand->index_rc);
+ } else if (operand->reg && base_rc) {
+ bb_reg_set_undef(base_rc);
+ }
+ if (bb_is_simple_memory(operand) && bb_is_osp_defined(base_rc)) {
+ int offset;
+ offset = bb_reg_code_offset(base_rc) + operand->disp;
+ offset = ALIGN(offset - KDB_WORD_SIZE + 1, KDB_WORD_SIZE);
+ bb_delete_memory(offset);
+ }
+}
+
+/* Adjust a register that contains a stack pointer */
+
+static void
+bb_adjust_osp(enum bb_reg_code reg, int adjust)
+{
+ int offset = bb_reg_code_offset(reg), old_offset = offset;
+ KDB_DEBUG_BB(" %s osp offset ", bbrg_name[reg]);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", " -> ");
+ offset += adjust;
+ bb_reg_code_set_offset(reg, offset);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", "\n");
+ /* When RSP is adjusted upwards, it invalidates any memory
+ * stored between the old and current stack offsets.
+ */
+ if (reg == BBRG_RSP) {
+ while (old_offset < bb_reg_code_offset(reg)) {
+ bb_delete_memory(old_offset);
+ old_offset += KDB_WORD_SIZE;
+ }
+ }
+}
+
+/* The current instruction adjusts a register that contains a stack pointer.
+ * Direction is 1 or -1, depending on whether the instruction is add/lea or
+ * sub.
+ */
+
+static void
+bb_adjust_osp_instruction(int direction)
+{
+ enum bb_reg_code dst_reg = bb_decode.dst.base_rc;
+ if (bb_decode.src.immediate ||
+ bb_decode.match->usage == BBOU_LEA /* lea has its own checks */) {
+ int adjust = direction * bb_decode.src.disp;
+ bb_adjust_osp(dst_reg, adjust);
+ } else {
+ /* variable stack adjustment, osp offset is not well defined */
+ KDB_DEBUG_BB(" %s osp offset ", bbrg_name[dst_reg]);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(dst_reg), "", " -> undefined\n");
+ bb_reg_code_set_value(dst_reg, BBRG_UNDEFINED);
+ bb_reg_code_set_offset(dst_reg, 0);
+ }
+}
+
+/* Some instructions using memory have an explicit length suffix (b, w, l, q).
+ * The equivalent instructions using a register imply the length from the
+ * register name. Deduce the operand length.
+ */
+
+static int
+bb_operand_length(const struct bb_operand *operand, char opcode_suffix)
+{
+ int l = 0;
+ switch (opcode_suffix) {
+ case 'b':
+ l = 8;
+ break;
+ case 'w':
+ l = 16;
+ break;
+ case 'l':
+ l = 32;
+ break;
+ case 'q':
+ l = 64;
+ break;
+ }
+ if (l == 0 && operand->reg) {
+ switch (strlen(operand->base)) {
+ case 3:
+ switch (operand->base[2]) {
+ case 'h':
+ case 'l':
+ l = 8;
+ break;
+ default:
+ l = 16;
+ break;
+ }
+ case 4:
+ if (operand->base[1] == 'r')
+ l = 64;
+ else
+ l = 32;
+ break;
+ }
+ }
+ return l;
+}
+
+static int
+bb_reg_state_size(const struct bb_reg_state *state)
+{
+ return sizeof(*state) +
+ state->mem_count * sizeof(state->memory[0]);
+}
+
+/* Canonicalize the current bb_reg_state so it can be compared against
+ * previously created states. Sort the memory entries in descending order of
+ * offset_address (stack grows down). Empty slots are moved to the end of the
+ * list and trimmed.
+ */
+
+static void
+bb_reg_state_canonicalize(void)
+{
+ int i, order, changed;
+ struct bb_memory_contains *p1, *p2, temp;
+ do {
+ changed = 0;
+ for (i = 0, p1 = bb_reg_state->memory;
+ i < bb_reg_state->mem_count-1;
+ ++i, ++p1) {
+ p2 = p1 + 1;
+ if (p2->value == BBRG_UNDEFINED) {
+ order = 0;
+ } else if (p1->value == BBRG_UNDEFINED) {
+ order = 1;
+ } else if (p1->offset_address < p2->offset_address) {
+ order = 1;
+ } else if (p1->offset_address > p2->offset_address) {
+ order = -1;
+ } else {
+ order = 0;
+ }
+ if (order > 0) {
+ temp = *p2;
+ *p2 = *p1;
+ *p1 = temp;
+ changed = 1;
+ }
+ }
+ } while(changed);
+ for (i = 0, p1 = bb_reg_state->memory;
+ i < bb_reg_state_max;
+ ++i, ++p1) {
+ if (p1->value != BBRG_UNDEFINED)
+ bb_reg_state->mem_count = i + 1;
+ }
+ bb_reg_state_print(bb_reg_state);
+}
+
+static int
+bb_special_case(bfd_vma to)
+{
+ int i, j, rsp_offset, expect_offset, offset, errors = 0, max_errors = 40;
+ enum bb_reg_code reg, expect_value, value;
+ struct bb_name_state *r;
+
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (to == r->address &&
+ (r->fname == NULL || strcmp(bb_func_name, r->fname) == 0))
+ goto match;
+ }
+ /* Some inline assembler code has jumps to .fixup sections which result
+ * in out of line transfers with undefined state, ignore them.
+ */
+ if (strcmp(bb_func_name, "strnlen_user") == 0 ||
+ strcmp(bb_func_name, "copy_from_user") == 0)
+ return 1;
+ return 0;
+
+match:
+ /* Check the running registers match */
+ for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ expect_value = r->regs[reg].value;
+ if (test_bit(expect_value, r->skip_regs.bits)) {
+ /* this regs entry is not defined for this label */
+ continue;
+ }
+ if (expect_value == BBRG_UNDEFINED)
+ continue;
+ expect_offset = r->regs[reg].offset;
+ value = bb_reg_code_value(reg);
+ offset = bb_reg_code_offset(reg);
+ if (expect_value == value &&
+ (value != BBRG_OSP || r->osp_offset == offset))
+ continue;
+ kdb_printf("%s: Expected %s to contain %s",
+ __FUNCTION__,
+ bbrg_name[reg],
+ bbrg_name[expect_value]);
+ if (r->osp_offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(r->osp_offset, "", "");
+ kdb_printf(". It actually contains %s", bbrg_name[value]);
+ if (offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ kdb_printf("\n");
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ /* Check that any memory data on stack matches */
+ i = j = 0;
+ while (i < bb_reg_state->mem_count &&
+ j < r->mem_size) {
+ expect_value = r->mem[j].value;
+ if (test_bit(expect_value, r->skip_mem.bits) ||
+ expect_value == BBRG_UNDEFINED) {
+ /* this memory slot is not defined for this label */
+ ++j;
+ continue;
+ }
+ rsp_offset = bb_reg_state->memory[i].offset_address -
+ bb_reg_code_offset(BBRG_RSP);
+ if (rsp_offset >
+ r->mem[j].offset_address) {
+ /* extra slots in memory are OK */
+ ++i;
+ } else if (rsp_offset <
+ r->mem[j].offset_address) {
+ /* Required memory slot is missing */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "missing memory entry[%d] %s\n",
+ __FUNCTION__, j, bbrg_name[expect_value]);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ ++j;
+ } else {
+ if (bb_reg_state->memory[i].offset_value ||
+ bb_reg_state->memory[i].value != expect_value) {
+ /* memory slot is present but contains wrong
+ * value.
+ */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "wrong value in slot %d, "
+ "should be %s, it is %s\n",
+ __FUNCTION__, i,
+ bbrg_name[expect_value],
+ bbrg_name[bb_reg_state->memory[i].value]);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ ++i;
+ ++j;
+ }
+ }
+ while (j < r->mem_size) {
+ expect_value = r->mem[j].value;
+ if (test_bit(expect_value, r->skip_mem.bits) ||
+ expect_value == BBRG_UNDEFINED)
+ ++j;
+ else
+ break;
+ }
+ if (j != r->mem_size) {
+ /* Hit end of memory before testing all the pt_reg slots */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "missing trailing entries\n",
+ __FUNCTION__);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ if (errors)
+ goto fail;
+ return 1;
+fail:
+ kdb_printf("%s: on transfer to %s\n", __FUNCTION__, r->name);
+ bb_giveup = 1;
+ return 1;
+}
+
+/* Transfer of control to a label outside the current function. If the
+ * transfer is to a known common code path then do a sanity check on the state
+ * at this point.
+ */
+
+static void
+bb_sanity_check(int type)
+{
+ enum bb_reg_code expect, actual;
+ int i, offset, error = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bb_preserved_reg); ++i) {
+ expect = bb_preserved_reg[i];
+ actual = bb_reg_code_value(expect);
+ offset = bb_reg_code_offset(expect);
+ if (expect == actual)
+ continue;
+ /* type == 1 is sysret/sysexit, ignore RSP */
+ if (type && expect == BBRG_RSP)
+ continue;
+ /* type == 1 is sysret/sysexit, ignore RBP for i386 */
+ /* We used to have "#ifndef CONFIG_X86_64" for the type=1 RBP
+ * test; however, x86_64 can run ia32 compatible mode and
+ * hit this problem. Perform the following test anyway!
+ */
+ if (type && expect == BBRG_RBP)
+ continue;
+ /* RSP should contain OSP+0. Except for ptregscall_common and
+ * ia32_ptregs_common, they get a partial pt_regs, fudge the
+ * stack to make it a full pt_regs then reverse the effect on
+ * exit, so the offset is -0x50 on exit.
+ */
+ if (expect == BBRG_RSP &&
+ bb_is_osp_defined(expect) &&
+ (offset == 0 ||
+ (offset == -0x50 &&
+ (strcmp(bb_func_name, "ptregscall_common") == 0 ||
+ strcmp(bb_func_name, "ia32_ptregs_common") == 0))))
+ continue;
++ /* The put_user and save_paranoid functions are special.
++ * %rbx gets clobbered */
++ if (expect == BBRG_RBX &&
++ (strncmp(bb_func_name, "__put_user_", 11) == 0 ||
++ strcmp(bb_func_name, "save_paranoid") == 0))
++ continue;
++ /* Ignore rbp and rsp for error_entry */
++ if ((strcmp(bb_func_name, "error_entry") == 0) &&
++ (expect == BBRG_RBX ||
++ (expect == BBRG_RSP && bb_is_osp_defined(expect) && offset == -0x10)))
++ continue;
+ kdb_printf("%s: Expected %s, got %s",
+ __FUNCTION__,
+ bbrg_name[expect], bbrg_name[actual]);
+ if (offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ kdb_printf("\n");
+ error = 1;
+ }
+ BB_CHECK(error, error, );
+}
+
+/* Transfer of control. Follow the arc and save the current state as input to
+ * another basic block.
+ */
+
+static void
+bb_transfer(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ int i, found;
+ size_t size;
+ struct bb* bb = NULL; /*stupid gcc */
+ struct bb_jmp *bb_jmp;
+ struct bb_reg_state *state;
+ bb_reg_state_canonicalize();
+ found = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ if (bb_jmp->from == from &&
+ bb_jmp->to == to &&
+ bb_jmp->drop_through == drop_through) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ /* Transfer outside the current function. Check the special
+ * cases (mainly in entry.S) first. If it is not a known
+ * special case then check if the target address is the start
+ * of a function or not. If it is the start of a function then
+ * assume tail recursion and require that the state be the same
+ * as on entry. Otherwise assume out of line code (e.g.
+ * spinlock contention path) and ignore it, the state can be
+ * anything.
+ */
+ kdb_symtab_t symtab;
+ if (bb_special_case(to))
+ return;
+ kdbnearsym(to, &symtab);
+ if (symtab.sym_start != to)
+ return;
+ bb_sanity_check(0);
+ if (bb_giveup)
+ return;
+#ifdef NO_SIBLINGS
+ /* Only print this message when the kernel is compiled with
+ * -fno-optimize-sibling-calls. Otherwise it would print a
+ * message for every tail recursion call. If you see the
+ * message below then you probably have an assembler label that
+ * is not listed in the special cases.
+ */
+ kdb_printf(" not matched: from "
+ kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d bb_jmp[%d]\n",
+ from, to, drop_through, i);
+#endif /* NO_SIBLINGS */
+ return;
+ }
+ KDB_DEBUG_BB(" matched: from " kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d bb_jmp[%d]\n",
+ from, to, drop_through, i);
+ found = 0;
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ if (bb->start == to) {
+ found = 1;
+ break;
+ }
+ }
+ BB_CHECK(!found, to, );
+ /* If the register state for this arc has already been set (we are
+ * rescanning the block that originates the arc) and the state is the
+ * same as the previous state for this arc then this input to the
+ * target block is the same as last time, so there is no need to rescan
+ * the target block.
+ */
+ state = bb_jmp->state;
+ size = bb_reg_state_size(bb_reg_state);
+ if (state) {
+ bb_reg_state->ref_count = state->ref_count;
+ if (memcmp(state, bb_reg_state, size) == 0) {
+ KDB_DEBUG_BB(" no state change\n");
+ return;
+ }
+ if (--state->ref_count == 0)
+ debug_kfree(state);
+ bb_jmp->state = NULL;
+ }
+ /* New input state is required. To save space, check if any other arcs
+ * have the same state and reuse them where possible. The overall set
+ * of inputs to the target block is now different so the target block
+ * must be rescanned.
+ */
+ bb->changed = 1;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ state = bb_jmp_list[i].state;
+ if (!state)
+ continue;
+ bb_reg_state->ref_count = state->ref_count;
+ if (memcmp(state, bb_reg_state, size) == 0) {
+ KDB_DEBUG_BB(" reuse bb_jmp[%d]\n", i);
+ bb_jmp->state = state;
+ ++state->ref_count;
+ return;
+ }
+ }
+ state = debug_kmalloc(size, GFP_ATOMIC);
+ if (!state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(state, bb_reg_state, size);
+ state->ref_count = 1;
+ bb_jmp->state = state;
+ KDB_DEBUG_BB(" new state %p\n", state);
+}
+
+/* Isolate the processing for 'mov' so it can be used for 'xadd'/'xchg' as
+ * well.
+ *
+ * xadd/xchg expect this function to return BBOU_NOP for special cases,
+ * otherwise it returns BBOU_RSWD. All special cases must be handled entirely
+ * within this function, including doing bb_read_operand or bb_write_operand
+ * where necessary.
+ */
+
+static enum bb_operand_usage
+bb_usage_mov(const struct bb_operand *src, const struct bb_operand *dst, int l)
+{
+ int full_register_src, full_register_dst;
+ full_register_src = bb_operand_length(src, bb_decode.opcode[l])
+ == KDB_WORD_SIZE * 8;
+ full_register_dst = bb_operand_length(dst, bb_decode.opcode[l])
+ == KDB_WORD_SIZE * 8;
+ /* If both src and dst are full integer registers then record the
+ * register change.
+ */
+ if (src->reg &&
+ bb_is_int_reg(src->base_rc) &&
+ dst->reg &&
+ bb_is_int_reg(dst->base_rc) &&
+ full_register_src &&
+ full_register_dst) {
+ /* Special case for the code that switches stacks in
+ * jprobe_return. That code must modify RSP but it does it in
+ * a well defined manner. Do not invalidate RSP.
+ */
+ if (src->base_rc == BBRG_RBX &&
+ dst->base_rc == BBRG_RSP &&
+ strcmp(bb_func_name, "jprobe_return") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ /* math_abort takes the equivalent of a longjmp structure and
+ * resets the stack. Ignore this, it leaves RSP well defined.
+ */
+ if (dst->base_rc == BBRG_RSP &&
+ strcmp(bb_func_name, "math_abort") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ bb_reg_set_reg(dst->base_rc, src->base_rc);
+ return BBOU_NOP;
+ }
+ /* If the move is from a full integer register to stack then record it.
+ */
+ if (src->reg &&
+ bb_is_simple_memory(dst) &&
+ bb_is_osp_defined(dst->base_rc) &&
+ full_register_src) {
+ /* Ugly special case. Initializing list heads on stack causes
+ * false references to stack variables when the list head is
+ * used. Static code analysis cannot detect that the list head
+ * has been changed by a previous execution loop and that a
+ * basic block is only executed after the list head has been
+ * changed.
+ *
+ * These false references can result in valid stack variables
+ * being incorrectly cleared on some logic paths. Ignore
+ * stores to stack variables which point to themselves or to
+ * the previous word so the list head initialization is not
+ * recorded.
+ */
+ if (bb_is_osp_defined(src->base_rc)) {
+ int stack1 = bb_reg_code_offset(src->base_rc);
+ int stack2 = bb_reg_code_offset(dst->base_rc) +
+ dst->disp;
+ if (stack1 == stack2 ||
+ stack1 == stack2 - KDB_WORD_SIZE)
+ return BBOU_NOP;
+ }
+ bb_memory_set_reg(dst->base_rc, src->base_rc, dst->disp);
+ return BBOU_NOP;
+ }
+ /* If the move is from stack to a full integer register then record it.
+ */
+ if (bb_is_simple_memory(src) &&
+ bb_is_osp_defined(src->base_rc) &&
+ dst->reg &&
+ bb_is_int_reg(dst->base_rc) &&
+ full_register_dst) {
+#ifdef CONFIG_X86_32
- #ifndef TSS_sysenter_sp0
- #define TSS_sysenter_sp0 SYSENTER_stack_sp0
- #endif
+ /* mov from TSS_sysenter_sp0+offset to esp to fix up the
+ * sysenter stack, it leaves esp well defined. mov
+ * TSS_ysenter_sp0+offset(%esp),%esp is followed by up to 5
+ * push instructions to mimic the hardware stack push. If
+ * TSS_sysenter_sp0 is offset then only 3 words will be
+ * pushed.
+ */
+ if (dst->base_rc == BBRG_RSP &&
+ src->disp >= TSS_sysenter_sp0 &&
+ bb_is_osp_defined(BBRG_RSP)) {
+ int pushes;
+ pushes = src->disp == TSS_sysenter_sp0 ? 5 : 3;
+ bb_reg_code_set_offset(BBRG_RSP,
+ bb_reg_code_offset(BBRG_RSP) +
+ pushes * KDB_WORD_SIZE);
+ KDB_DEBUG_BB_OFFSET(
+ bb_reg_code_offset(BBRG_RSP),
+ " sysenter fixup, RSP",
+ "\n");
+ return BBOU_NOP;
+ }
+#endif /* CONFIG_X86_32 */
+ bb_read_operand(src);
+ bb_reg_set_memory(dst->base_rc, src->base_rc, src->disp);
+ return BBOU_NOP;
+ }
+ /* move %gs:0x<nn>,%rsp is used to unconditionally switch to another
+ * stack. Ignore this special case, it is handled by the stack
+ * unwinding code.
+ */
+ if (src->segment &&
+ strcmp(src->segment, "%gs") == 0 &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP)
+ return BBOU_NOP;
+ /* move %reg,%reg is a nop */
+ if (src->reg &&
+ dst->reg &&
+ !src->segment &&
+ !dst->segment &&
+ strcmp(src->base, dst->base) == 0)
+ return BBOU_NOP;
+ /* Special case for the code that switches stacks in the scheduler
+ * (switch_to()). That code must modify RSP but it does it in a well
+ * defined manner. Do not invalidate RSP.
+ */
+ if (dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ full_register_dst &&
+ bb_is_scheduler_address()) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ /* Special case for the code that switches stacks in resume from
+ * hibernation code. That code must modify RSP but it does it in a
+ * well defined manner. Do not invalidate RSP.
+ */
+ if (src->memory &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ full_register_dst &&
+ strcmp(bb_func_name, "restore_image") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ return BBOU_RSWD;
+}
+
+static enum bb_operand_usage
+bb_usage_xadd(const struct bb_operand *src, const struct bb_operand *dst)
+{
+ /* Simulate xadd as a series of instructions including mov, that way we
+ * get the benefit of all the special cases already handled by
+ * BBOU_MOV.
+ *
+ * tmp = src + dst, src = dst, dst = tmp.
+ *
+ * For tmp, pick a register that is undefined. If all registers are
+ * defined then pick one that is not being used by xadd.
+ */
+ enum bb_reg_code reg = BBRG_UNDEFINED;
+ struct bb_operand tmp;
+ struct bb_reg_contains save_tmp;
+ enum bb_operand_usage usage;
+ int undefined = 0;
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ undefined = 1;
+ break;
+ }
+ }
+ if (!undefined) {
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (reg != src->base_rc &&
+ reg != src->index_rc &&
+ reg != dst->base_rc &&
+ reg != dst->index_rc &&
+ reg != BBRG_RSP)
+ break;
+ }
+ }
+ KDB_DEBUG_BB(" %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ bb_reg_set_undef(reg);
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.present = 1;
+ tmp.reg = 1;
+ tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ if (tmp.base) {
+ tmp.base[0] = '%';
+ strcpy(tmp.base + 1, bbrg_name[reg]);
+ }
+ tmp.base_rc = reg;
+ bb_read_operand(src);
+ bb_read_operand(dst);
+ if (bb_usage_mov(src, dst, sizeof("xadd")-1) == BBOU_NOP)
+ usage = BBOU_RSRD;
+ else
+ usage = BBOU_RSRDWS;
+ bb_usage_mov(&tmp, dst, sizeof("xadd")-1);
+ KDB_DEBUG_BB(" %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ debug_kfree(tmp.base);
+ return usage;
+}
+
+static enum bb_operand_usage
+bb_usage_xchg(const struct bb_operand *src, const struct bb_operand *dst)
+{
+ /* Simulate xchg as a series of mov instructions, that way we get the
+ * benefit of all the special cases already handled by BBOU_MOV.
+ *
+ * mov dst,tmp; mov src,dst; mov tmp,src;
+ *
+ * For tmp, pick a register that is undefined. If all registers are
+ * defined then pick one that is not being used by xchg.
+ */
+ enum bb_reg_code reg = BBRG_UNDEFINED;
+ int rs = BBOU_RS, rd = BBOU_RD, ws = BBOU_WS, wd = BBOU_WD;
+ struct bb_operand tmp;
+ struct bb_reg_contains save_tmp;
+ int undefined = 0;
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ undefined = 1;
+ break;
+ }
+ }
+ if (!undefined) {
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (reg != src->base_rc &&
+ reg != src->index_rc &&
+ reg != dst->base_rc &&
+ reg != dst->index_rc &&
+ reg != BBRG_RSP)
+ break;
+ }
+ }
+ KDB_DEBUG_BB(" %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.present = 1;
+ tmp.reg = 1;
+ tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ if (tmp.base) {
+ tmp.base[0] = '%';
+ strcpy(tmp.base + 1, bbrg_name[reg]);
+ }
+ tmp.base_rc = reg;
+ if (bb_usage_mov(dst, &tmp, sizeof("xchg")-1) == BBOU_NOP)
+ rd = 0;
+ if (bb_usage_mov(src, dst, sizeof("xchg")-1) == BBOU_NOP) {
+ rs = 0;
+ wd = 0;
+ }
+ if (bb_usage_mov(&tmp, src, sizeof("xchg")-1) == BBOU_NOP)
+ ws = 0;
+ KDB_DEBUG_BB(" %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ debug_kfree(tmp.base);
+ return rs | rd | ws | wd;
+}
+
+/* Invalidate all the scratch registers */
+
+static void
+bb_invalidate_scratch_reg(void)
+{
+ int i, j;
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ for (j = 0; j < ARRAY_SIZE(bb_preserved_reg); ++j) {
+ if (i == bb_preserved_reg[j])
+ goto preserved;
+ }
+ bb_reg_set_undef(i);
+preserved:
+ continue;
+ }
+}
+
+static void
+bb_pass2_computed_jmp(const struct bb_operand *src)
+{
+ unsigned long table = src->disp;
+ kdb_machreg_t addr;
+ while (!bb_giveup) {
+ if (kdb_getword(&addr, table, sizeof(addr)))
+ return;
+ if (addr < bb_func_start || addr >= bb_func_end)
+ return;
+ bb_transfer(bb_curr_addr, addr, 0);
+ table += KDB_WORD_SIZE;
+ }
+}
+
+/* The current instruction has been decoded and all the information is in
+ * bb_decode. Based on the opcode, track any operand usage that we care about.
+ */
+
+static void
+bb_usage(void)
+{
+ enum bb_operand_usage usage = bb_decode.match->usage;
+ struct bb_operand *src = &bb_decode.src;
+ struct bb_operand *dst = &bb_decode.dst;
+ struct bb_operand *dst2 = &bb_decode.dst2;
+ int opcode_suffix, operand_length;
+
+ /* First handle all the special usage cases, and map them to a generic
+ * case after catering for the side effects.
+ */
+
+ if (usage == BBOU_IMUL &&
+ src->present && !dst->present && !dst2->present) {
+ /* single operand imul, same effects as mul */
+ usage = BBOU_MUL;
+ }
+
+ /* AT&T syntax uses movs<l1><l2> for move with sign extension, instead
+ * of the Intel movsx. The AT&T syntax causes problems for the opcode
+ * mapping; movs with sign extension needs to be treated as a generic
+ * read src, write dst, but instead it falls under the movs I/O
+ * instruction. Fix it.
+ */
+ if (usage == BBOU_MOVS && strlen(bb_decode.opcode) > 5)
+ usage = BBOU_RSWD;
+
+ /* This switch statement deliberately does not use 'default' at the top
+ * level. That way the compiler will complain if a new BBOU_ enum is
+ * added above and not explicitly handled here.
+ */
+ switch (usage) {
+ case BBOU_UNKNOWN: /* drop through */
+ case BBOU_RS: /* drop through */
+ case BBOU_RD: /* drop through */
+ case BBOU_RSRD: /* drop through */
+ case BBOU_WS: /* drop through */
+ case BBOU_RSWS: /* drop through */
+ case BBOU_RDWS: /* drop through */
+ case BBOU_RSRDWS: /* drop through */
+ case BBOU_WD: /* drop through */
+ case BBOU_RSWD: /* drop through */
+ case BBOU_RDWD: /* drop through */
+ case BBOU_RSRDWD: /* drop through */
+ case BBOU_WSWD: /* drop through */
+ case BBOU_RSWSWD: /* drop through */
+ case BBOU_RDWSWD: /* drop through */
+ case BBOU_RSRDWSWD:
+ break; /* ignore generic usage for now */
+ case BBOU_ADD:
+ /* Special case for add instructions that adjust registers
+ * which are mapping the stack.
+ */
+ if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ bb_adjust_osp_instruction(1);
+ usage = BBOU_RS;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
++ case BBOU_AND:
++ /* Special case when trying to round the stack pointer
++ * to achieve byte alignment
++ */
++ if (dst->reg && dst->base_rc == BBRG_RSP &&
++ src->immediate && strncmp(bb_func_name, "efi_call", 8) == 0) {
++ usage = BBOU_NOP;
++ } else {
++ usage = BBOU_RSRDWD;
++ }
++ break;
+ case BBOU_CALL:
+ bb_reg_state_print(bb_reg_state);
++ usage = BBOU_NOP;
+ if (bb_is_static_disp(src)) {
- /* Function sync_regs and save_v86_state are special.
- * Their return value is the new stack pointer
- */
- if (src->disp == bb_sync_regs) {
- bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
- } else if (src->disp == bb_save_v86_state) {
- bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
- bb_adjust_osp(BBRG_RAX, +KDB_WORD_SIZE);
- }
- /* Function save_args is special also. It saves
++ /* save_args is special. It saves
+ * a partial pt_regs onto the stack and switches
+ * to the interrupt stack.
+ */
- else if (src->disp == bb_save_args) {
++ if (src->disp == bb_save_args) {
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x48);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x40);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x38);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x30);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x28);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R8, 0x20);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R9, 0x18);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x10);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x08);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0);
+ /* This is actually on the interrupt stack,
+ * but we fudge it so the unwind works.
+ */
+ bb_memory_set_reg_value(BBRG_RSP, -0x8, BBRG_RBP, 0);
+ bb_reg_set_reg(BBRG_RBP, BBRG_RSP);
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ }
++ /* save_rest juggles the stack frame to append the
++ * rest of the pt_regs onto a stack where SAVE_ARGS
++ * or save_args has already been done.
++ */
++ else if (src->disp == bb_save_rest) {
++ bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x30);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x28);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x20);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x18);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x10);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0x08);
++ }
++ /* error_entry and save_paranoid save a full pt_regs.
++ * Break out so the scratch registers aren't invalidated.
++ */
++ else if (src->disp == bb_error_entry || src->disp == bb_save_paranoid) {
++ bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x70);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x68);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x60);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x58);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x50);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R8, 0x48);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R9, 0x40);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x38);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x30);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x28);
++ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x20);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x18);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x10);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x08);
++ bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0);
++ break;
++ }
+ }
+ /* Invalidate the scratch registers */
+ bb_invalidate_scratch_reg();
- usage = BBOU_NOP;
++
++ /* These special cases need scratch registers invalidated first */
++ if (bb_is_static_disp(src)) {
++ /* Function sync_regs and save_v86_state are special.
++ * Their return value is the new stack pointer
++ */
++ if (src->disp == bb_sync_regs) {
++ bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
++ } else if (src->disp == bb_save_v86_state) {
++ bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
++ bb_adjust_osp(BBRG_RAX, +KDB_WORD_SIZE);
++ }
++ }
+ break;
+ case BBOU_CBW:
+ /* Convert word in RAX. Read RAX, write RAX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_CMOV:
+ /* cmove %gs:0x<nn>,%rsp is used to conditionally switch to
+ * another stack. Ignore this special case, it is handled by
+ * the stack unwinding code.
+ */
+ if (src->segment &&
+ strcmp(src->segment, "%gs") == 0 &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP)
+ usage = BBOU_NOP;
+ else
+ usage = BBOU_RSWD;
+ break;
+ case BBOU_CMPXCHG:
+ /* Read RAX, write RAX plus src read, dst write */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_RSWD;
+ break;
+ case BBOU_CMPXCHGD:
+ /* Read RAX, RBX, RCX, RDX, write RAX, RDX plus src read/write */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RBX);
+ bb_reg_read(BBRG_RCX);
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_RSWS;
+ break;
+ case BBOU_CPUID:
+ /* Read RAX, write RAX, RBX, RCX, RDX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RBX);
+ bb_reg_set_undef(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_CWD:
+ /* Convert word in RAX, RDX. Read RAX, write RDX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_DIV: /* drop through */
+ case BBOU_IDIV:
+ /* The 8 bit variants only affect RAX, the 16, 32 and 64 bit
+ * variants affect RDX as well.
+ */
+ switch (usage) {
+ case BBOU_DIV:
+ opcode_suffix = bb_decode.opcode[3];
+ break;
+ case BBOU_IDIV:
+ opcode_suffix = bb_decode.opcode[4];
+ break;
+ default:
+ opcode_suffix = 'q';
+ break;
+ }
+ operand_length = bb_operand_length(src, opcode_suffix);
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ if (operand_length != 8) {
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RDX);
+ }
+ usage = BBOU_RS;
+ break;
+ case BBOU_IMUL:
+ /* Only the two and three operand forms get here. The one
+ * operand form is treated as mul.
+ */
+ if (dst2->present) {
+ /* The three operand form is a special case, read the first two
+ * operands, write the third.
+ */
+ bb_read_operand(src);
+ bb_read_operand(dst);
+ bb_write_operand(dst2);
+ usage = BBOU_NOP;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_IRET:
+ bb_sanity_check(0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_JMP:
+ if (bb_is_static_disp(src))
+ bb_transfer(bb_curr_addr, src->disp, 0);
+ else if (src->indirect &&
+ src->disp &&
+ src->base == NULL &&
+ src->index &&
+ src->scale == KDB_WORD_SIZE)
+ bb_pass2_computed_jmp(src);
+ usage = BBOU_RS;
+ break;
+ case BBOU_LAHF:
+ /* Write RAX */
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LEA:
+ /* dst = src + disp. Often used to calculate offsets into the
+ * stack, so check if it uses a stack pointer.
+ */
+ usage = BBOU_RSWD;
+ if (bb_is_simple_memory(src)) {
+ if (bb_is_osp_defined(src->base_rc)) {
+ bb_reg_set_reg(dst->base_rc, src->base_rc);
+ bb_adjust_osp_instruction(1);
+ usage = BBOU_RS;
+ } else if (src->disp == 0 &&
+ src->base_rc == dst->base_rc) {
+ /* lea 0(%reg),%reg is generated by i386
+ * GENERIC_NOP7.
+ */
+ usage = BBOU_NOP;
+ } else if (src->disp == 4096 &&
+ (src->base_rc == BBRG_R8 ||
+ src->base_rc == BBRG_RDI) &&
+ strcmp(bb_func_name, "relocate_kernel") == 0) {
+ /* relocate_kernel: setup a new stack at the
+ * end of the physical control page, using
+ * (x86_64) lea 4096(%r8),%rsp or (i386) lea
+ * 4096(%edi),%esp
+ */
+ usage = BBOU_NOP;
+ }
+ }
+ break;
+ case BBOU_LEAVE:
+ /* RSP = RBP; RBP = *(RSP); RSP += KDB_WORD_SIZE; */
+ bb_reg_set_reg(BBRG_RSP, BBRG_RBP);
+ if (bb_is_osp_defined(BBRG_RSP))
+ bb_reg_set_memory(BBRG_RBP, BBRG_RSP, 0);
+ else
+ bb_reg_set_undef(BBRG_RBP);
+ if (bb_is_osp_defined(BBRG_RSP))
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ /* common_interrupt uses leave in a non-standard manner */
+ if (strcmp(bb_func_name, "common_interrupt") != 0)
+ bb_sanity_check(0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LODS:
+ /* Read RSI, write RAX, RSI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RSI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LOOP:
+ /* Read and write RCX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RCX);
+ if (bb_is_static_disp(src))
+ bb_transfer(bb_curr_addr, src->disp, 0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LSS:
+ /* lss offset(%esp),%esp leaves esp well defined */
+ if (dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ bb_is_simple_memory(src) &&
+ src->base_rc == BBRG_RSP) {
+ bb_adjust_osp(BBRG_RSP, 2*KDB_WORD_SIZE + src->disp);
+ usage = BBOU_NOP;
+ } else {
+ usage = BBOU_RSWD;
+ }
+ break;
+ case BBOU_MONITOR:
+ /* Read RAX, RCX, RDX */
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MOV:
+ usage = bb_usage_mov(src, dst, sizeof("mov")-1);
+ break;
+ case BBOU_MOVS:
+ /* Read RSI, RDI, write RSI, RDI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_read(BBRG_RDI);
+ bb_reg_set_undef(BBRG_RSI);
+ bb_reg_set_undef(BBRG_RDI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MUL:
+ /* imul (one operand form only) or mul. Read RAX. If the
+ * operand length is not 8 then write RDX.
+ */
+ if (bb_decode.opcode[0] == 'i')
+ opcode_suffix = bb_decode.opcode[4];
+ else
+ opcode_suffix = bb_decode.opcode[3];
+ operand_length = bb_operand_length(src, opcode_suffix);
+ bb_reg_read(BBRG_RAX);
+ if (operand_length != 8)
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MWAIT:
+ /* Read RAX, RCX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RCX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_NOP:
+ break;
+ case BBOU_OUTS:
+ /* Read RSI, RDX, write RSI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RSI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_POP:
+ /* Complicated by the fact that you can pop from top of stack
+ * to a stack location, for this case the destination location
+ * is calculated after adjusting RSP. Analysis of the kernel
+ * code shows that gcc only uses this strange format to get the
+ * flags into a local variable, e.g. pushf; popl 0x10(%esp); so
+ * I am going to ignore this special case.
+ */
+ usage = BBOU_WS;
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("pop when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ if (src->reg) {
+ bb_reg_set_memory(src->base_rc, BBRG_RSP, 0);
+ usage = BBOU_NOP;
+ }
+ /* pop %rsp does not adjust rsp */
+ if (!src->reg ||
+ src->base_rc != BBRG_RSP)
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ }
+ break;
+ case BBOU_POPF:
+ /* Do not care about flags, just adjust RSP */
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("popf when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ }
+ usage = BBOU_WS;
+ break;
+ case BBOU_PUSH:
+ /* Complicated by the fact that you can push from a stack
+ * location to top of stack, the source location is calculated
+ * before adjusting RSP. Analysis of the kernel code shows
+ * that gcc only uses this strange format to restore the flags
+ * from a local variable, e.g. pushl 0x10(%esp); popf; so I am
+ * going to ignore this special case.
+ */
+ usage = BBOU_RS;
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("push when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ if (src->reg &&
+ bb_reg_code_offset(BBRG_RSP) <= 0)
+ bb_memory_set_reg(BBRG_RSP, src->base_rc, 0);
+ }
+ break;
+ case BBOU_PUSHF:
+ /* Do not care about flags, just adjust RSP */
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("pushf when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ }
+ usage = BBOU_WS;
+ break;
+ case BBOU_RDMSR:
+ /* Read RCX, write RAX, RDX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_RDTSC:
+ /* Write RAX, RDX */
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_RET:
+ usage = BBOU_NOP;
++ if (src->immediate && bb_is_osp_defined(BBRG_RSP)) {
++ bb_adjust_osp(BBRG_RSP, src->disp);
++ }
+ /* Functions that restore state which was saved by another
+ * function or build new kernel stacks. We cannot verify what
+ * is being restored so skip the sanity check.
+ */
+ if (strcmp(bb_func_name, "restore_image") == 0 ||
+ strcmp(bb_func_name, "relocate_kernel") == 0 ||
+ strcmp(bb_func_name, "identity_mapped") == 0 ||
+ strcmp(bb_func_name, "xen_iret_crit_fixup") == 0 ||
+ strcmp(bb_func_name, "math_abort") == 0 ||
- strcmp(bb_func_name, "save_args") == 0)
++ strcmp(bb_func_name, "save_args") == 0 ||
++ strcmp(bb_func_name, "kretprobe_trampoline_holder") == 0)
+ break;
+ bb_sanity_check(0);
+ break;
+ case BBOU_SAHF:
+ /* Read RAX */
+ bb_reg_read(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SCAS:
+ /* Read RAX, RDI, write RDI */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RDI);
+ bb_reg_set_undef(BBRG_RDI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SUB:
+ /* Special case for sub instructions that adjust registers
+ * which are mapping the stack.
+ */
+ if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ bb_adjust_osp_instruction(-1);
+ usage = BBOU_RS;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_SYSEXIT:
+ bb_sanity_check(1);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SYSRET:
+ bb_sanity_check(1);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_WRMSR:
+ /* Read RCX, RAX, RDX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_XADD:
+ usage = bb_usage_xadd(src, dst);
+ break;
+ case BBOU_XCHG:
+ /* i386 do_IRQ with 4K stacks does xchg %ebx,%esp; call
+ * irq_handler; mov %ebx,%esp; to switch stacks. Ignore this
+ * stack switch when tracking registers, it is handled by
+ * higher level backtrace code. Convert xchg %ebx,%esp to mov
+ * %esp,%ebx so the later mov %ebx,%esp becomes a NOP and the
+ * stack remains defined so we can backtrace through do_IRQ's
+ * stack switch.
+ *
+ * Ditto for do_softirq.
+ */
+ if (src->reg &&
+ dst->reg &&
+ src->base_rc == BBRG_RBX &&
+ dst->base_rc == BBRG_RSP &&
+ (strcmp(bb_func_name, "do_IRQ") == 0 ||
+ strcmp(bb_func_name, "do_softirq") == 0)) {
+ strcpy(bb_decode.opcode, "mov");
+ usage = bb_usage_mov(dst, src, sizeof("mov")-1);
+ } else {
+ usage = bb_usage_xchg(src, dst);
+ }
+ break;
+ case BBOU_XOR:
+ /* xor %reg,%reg only counts as a register write, the original
+ * contents of reg are irrelevant.
+ */
+ if (src->reg && dst->reg && src->base_rc == dst->base_rc)
+ usage = BBOU_WS;
+ else
+ usage = BBOU_RSRDWD;
+ break;
+ }
+
+ /* The switch statement above handled all the special cases. Every
+ * opcode should now have a usage of NOP or one of the generic cases.
+ */
+ if (usage == BBOU_UNKNOWN || usage == BBOU_NOP) {
+ /* nothing to do */
+ } else if (usage >= BBOU_RS && usage <= BBOU_RSRDWSWD) {
+ if (usage & BBOU_RS)
+ bb_read_operand(src);
+ if (usage & BBOU_RD)
+ bb_read_operand(dst);
+ if (usage & BBOU_WS)
+ bb_write_operand(src);
+ if (usage & BBOU_WD)
+ bb_write_operand(dst);
+ } else {
+ kdb_printf("%s: opcode not fully handled\n", __FUNCTION__);
+ if (!KDB_DEBUG(BB)) {
+ bb_print_opcode();
+ if (bb_decode.src.present)
+ bb_print_operand("src", &bb_decode.src);
+ if (bb_decode.dst.present)
+ bb_print_operand("dst", &bb_decode.dst);
+ if (bb_decode.dst2.present)
+ bb_print_operand("dst2", &bb_decode.dst2);
+ }
+ bb_giveup = 1;
+ }
+}
+
+static void
+bb_parse_buffer(void)
+{
+ char *p, *src, *dst = NULL, *dst2 = NULL;
+ int paren = 0;
+ p = bb_buffer;
+ memset(&bb_decode, 0, sizeof(bb_decode));
+ KDB_DEBUG_BB(" '%s'\n", p);
+ p += strcspn(p, ":"); /* skip address and function name+offset: */
+ if (*p++ != ':') {
+ kdb_printf("%s: cannot find ':' in buffer '%s'\n",
+ __FUNCTION__, bb_buffer);
+ bb_giveup = 1;
+ return;
+ }
+ p += strspn(p, " \t"); /* step to opcode */
+ if (strncmp(p, "(bad)", 5) == 0)
+ strcpy(p, "nop");
+ /* separate any opcode prefix */
+ if (strncmp(p, "lock", 4) == 0 ||
+ strncmp(p, "rep", 3) == 0 ||
+ strncmp(p, "rex", 3) == 0 ||
+ strncmp(p, "addr", 4) == 0) {
+ bb_decode.prefix = p;
+ p += strcspn(p, " \t");
+ *p++ = '\0';
+ p += strspn(p, " \t");
+ }
+ bb_decode.opcode = p;
+ strsep(&p, " \t"); /* step to end of opcode */
+ if (bb_parse_opcode())
+ return;
+ if (!p)
+ goto no_operands;
+ p += strspn(p, " \t"); /* step to operand(s) */
+ if (!*p)
+ goto no_operands;
+ src = p;
+ p = strsep(&p, " \t"); /* strip comments after operands */
+ /* split 'src','dst' but ignore ',' inside '(' ')' */
+ while (*p) {
+ if (*p == '(') {
+ ++paren;
+ } else if (*p == ')') {
+ --paren;
+ } else if (*p == ',' && paren == 0) {
+ *p = '\0';
+ if (dst)
+ dst2 = p+1;
+ else
+ dst = p+1;
+ }
+ ++p;
+ }
+ bb_parse_operand(src, &bb_decode.src);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("src", &bb_decode.src);
+ if (dst && !bb_giveup) {
+ bb_parse_operand(dst, &bb_decode.dst);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("dst", &bb_decode.dst);
+ }
+ if (dst2 && !bb_giveup) {
+ bb_parse_operand(dst2, &bb_decode.dst2);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("dst2", &bb_decode.dst2);
+ }
+no_operands:
+ if (!bb_giveup)
+ bb_usage();
+}
+
+static int
+bb_dis_pass2(PTR file, const char *fmt, ...)
+{
+ char *p;
+ int l = strlen(bb_buffer);
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ va_end(ap);
+ if ((p = strchr(bb_buffer, '\n'))) {
+ *p = '\0';
+ p = bb_buffer;
+ p += strcspn(p, ":");
+ if (*p++ == ':')
+ bb_fixup_switch_to(p);
+ bb_parse_buffer();
+ bb_buffer[0] = '\0';
+ }
+ return 0;
+}
+
+static void
+bb_printaddr_pass2(bfd_vma addr, disassemble_info *dip)
+{
+ kdb_symtab_t symtab;
+ unsigned int offset;
+ dip->fprintf_func(dip->stream, "0x%lx", addr);
+ kdbnearsym(addr, &symtab);
+ if (symtab.sym_name) {
+ dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ if ((offset = addr - symtab.sym_start))
+ dip->fprintf_func(dip->stream, "+0x%x", offset);
+ dip->fprintf_func(dip->stream, ">");
+ }
+}
+
+/* Set the starting register and memory state for the current bb */
+
+static void
+bb_start_block0_special(void)
+{
+ int i;
+ short offset_address;
+ enum bb_reg_code reg, value;
+ struct bb_name_state *r;
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (bb_func_start == r->address && r->fname == NULL)
+ goto match;
+ }
+ return;
+match:
+ /* Set the running registers */
+ for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ value = r->regs[reg].value;
+ if (test_bit(value, r->skip_regs.bits)) {
+ /* this regs entry is not defined for this label */
+ continue;
+ }
+ bb_reg_code_set_value(reg, value);
+ bb_reg_code_set_offset(reg, r->regs[reg].offset);
+ }
+ /* Set any memory contents, e.g. pt_regs. Adjust RSP as required. */
+ offset_address = 0;
+ for (i = 0; i < r->mem_size; ++i) {
+ offset_address = max_t(int,
+ r->mem[i].offset_address + KDB_WORD_SIZE,
+ offset_address);
+ }
+ if (bb_reg_code_offset(BBRG_RSP) > -offset_address)
+ bb_adjust_osp(BBRG_RSP, -offset_address - bb_reg_code_offset(BBRG_RSP));
+ for (i = 0; i < r->mem_size; ++i) {
+ value = r->mem[i].value;
+ if (test_bit(value, r->skip_mem.bits)) {
+ /* this memory entry is not defined for this label */
+ continue;
+ }
+ bb_memory_set_reg_value(BBRG_RSP, r->mem[i].offset_address,
+ value, 0);
+ bb_reg_set_undef(value);
+ }
+ return;
+}
+
+static void
+bb_pass2_start_block(int number)
+{
+ int i, j, k, first, changed;
+ size_t size;
+ struct bb_jmp *bb_jmp;
+ struct bb_reg_state *state;
+ struct bb_memory_contains *c1, *c2;
+ bb_reg_state->mem_count = bb_reg_state_max;
+ size = bb_reg_state_size(bb_reg_state);
+ memset(bb_reg_state, 0, size);
+
+ if (number == 0) {
+ /* The first block is assumed to have well defined inputs */
+ bb_start_block0();
+ /* Some assembler labels have non-standard entry
+ * states.
+ */
+ bb_start_block0_special();
+ bb_reg_state_print(bb_reg_state);
+ return;
+ }
+
+ /* Merge all the input states for the current bb together */
+ first = 1;
+ changed = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ if (bb_jmp->to != bb_curr->start)
+ continue;
+ state = bb_jmp->state;
+ if (!state)
+ continue;
+ if (first) {
+ size = bb_reg_state_size(state);
+ memcpy(bb_reg_state, state, size);
+ KDB_DEBUG_BB(" first state %p\n", state);
+ bb_reg_state_print(bb_reg_state);
+ first = 0;
+ continue;
+ }
+
+ KDB_DEBUG_BB(" merging state %p\n", state);
+ /* Merge the register states */
+ for (j = 0; j < ARRAY_SIZE(state->contains); ++j) {
+ if (memcmp(bb_reg_state->contains + j,
+ state->contains + j,
+ sizeof(bb_reg_state->contains[0]))) {
+ /* Different states for this register from two
+ * or more inputs, make it undefined.
+ */
+ if (bb_reg_state->contains[j].value ==
+ BBRG_UNDEFINED) {
+ KDB_DEBUG_BB(" ignoring %s\n",
+ bbrg_name[j + BBRG_RAX]);
+ } else {
+ bb_reg_set_undef(BBRG_RAX + j);
+ changed = 1;
+ }
+ }
+ }
+
+ /* Merge the memory states. This relies on both
+ * bb_reg_state->memory and state->memory being sorted in
+ * descending order, with undefined entries at the end.
+ */
+ c1 = bb_reg_state->memory;
+ c2 = state->memory;
+ j = k = 0;
+ while (j < bb_reg_state->mem_count &&
+ k < state->mem_count) {
+ if (c1->offset_address < c2->offset_address) {
+ KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ " ignoring c2->offset_address ",
+ "\n");
+ ++c2;
+ ++k;
+ continue;
+ }
+ if (c1->offset_address > c2->offset_address) {
+ /* Memory location is not in all input states,
+ * delete the memory location.
+ */
+ bb_delete_memory(c1->offset_address);
+ changed = 1;
+ ++c1;
+ ++j;
+ continue;
+ }
+ if (memcmp(c1, c2, sizeof(*c1))) {
+ /* Same location, different contents, delete
+ * the memory location.
+ */
+ bb_delete_memory(c1->offset_address);
+ KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ " ignoring c2->offset_address ",
+ "\n");
+ changed = 1;
+ }
+ ++c1;
+ ++c2;
+ ++j;
+ ++k;
+ }
+ while (j < bb_reg_state->mem_count) {
+ bb_delete_memory(c1->offset_address);
+ changed = 1;
+ ++c1;
+ ++j;
+ }
+ }
+ if (changed) {
+ KDB_DEBUG_BB(" final state\n");
+ bb_reg_state_print(bb_reg_state);
+ }
+}
+
+/* We have reached the exit point from the current function, either a call to
+ * the next function or the instruction that was about to executed when an
+ * interrupt occurred. Save the current register state in bb_exit_state.
+ */
+
+static void
+bb_save_exit_state(void)
+{
+ size_t size;
+ debug_kfree(bb_exit_state);
+ bb_exit_state = NULL;
+ bb_reg_state_canonicalize();
+ size = bb_reg_state_size(bb_reg_state);
+ bb_exit_state = debug_kmalloc(size, GFP_ATOMIC);
+ if (!bb_exit_state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(bb_exit_state, bb_reg_state, size);
+}
+
+static int
+bb_pass2_do_changed_blocks(int allow_missing)
+{
+ int i, j, missing, changed, maxloops;
+ unsigned long addr;
+ struct bb_jmp *bb_jmp;
+ KDB_DEBUG_BB("\n %s: allow_missing %d\n", __FUNCTION__, allow_missing);
+ /* Absolute worst case is we have to iterate over all the basic blocks
+ * in an "out of order" state, each iteration losing one register or
+ * memory state. Any more loops than that is a bug. "out of order"
+ * means that the layout of blocks in memory does not match the logic
+ * flow through those blocks so (for example) block 27 comes before
+ * block 2. To allow for out of order blocks, multiply maxloops by the
+ * number of blocks.
+ */
+ maxloops = (KDB_INT_REGISTERS + bb_reg_state_max) * bb_count;
+ changed = 1;
+ do {
+ changed = 0;
+ for (i = 0; i < bb_count; ++i) {
+ bb_curr = bb_list[i];
+ if (!bb_curr->changed)
+ continue;
+ missing = 0;
+ for (j = 0, bb_jmp = bb_jmp_list;
+ j < bb_jmp_count;
+ ++j, ++bb_jmp) {
+ if (bb_jmp->to == bb_curr->start &&
+ !bb_jmp->state)
+ ++missing;
+ }
+ if (missing > allow_missing)
+ continue;
+ bb_curr->changed = 0;
+ changed = 1;
+ KDB_DEBUG_BB("\n bb[%d]\n", i);
+ bb_pass2_start_block(i);
+ for (addr = bb_curr->start;
+ addr <= bb_curr->end; ) {
+ bb_curr_addr = addr;
+ if (addr == bb_exit_addr)
+ bb_save_exit_state();
+ addr += kdba_id_printinsn(addr, &kdb_di);
+ kdb_di.fprintf_func(NULL, "\n");
+ if (bb_giveup)
+ goto done;
+ }
+ if (!bb_exit_state) {
+ /* ATTRIB_NORET functions are a problem with
+ * the current gcc. Allow the trailing address
+ * a bit of leaway.
+ */
+ if (addr == bb_exit_addr ||
+ addr == bb_exit_addr + 1)
+ bb_save_exit_state();
+ }
+ if (bb_curr->drop_through)
+ bb_transfer(bb_curr->end,
+ bb_list[i+1]->start, 1);
+ }
+ if (maxloops-- == 0) {
+ kdb_printf("\n\n%s maxloops reached\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ goto done;
+ }
+ } while(changed);
+done:
+ for (i = 0; i < bb_count; ++i) {
+ bb_curr = bb_list[i];
+ if (bb_curr->changed)
+ return 1; /* more to do, increase allow_missing */
+ }
+ return 0; /* all blocks done */
+}
+
+/* Assume that the current function is a pass through function that does not
+ * refer to its register parameters. Exclude known asmlinkage functions and
+ * assume the other functions actually use their registers.
+ */
+
+static void
+bb_assume_pass_through(void)
+{
+ static int first_time = 1;
+ if (strncmp(bb_func_name, "sys_", 4) == 0 ||
+ strncmp(bb_func_name, "compat_sys_", 11) == 0 ||
+ strcmp(bb_func_name, "schedule") == 0 ||
+ strcmp(bb_func_name, "do_softirq") == 0 ||
+ strcmp(bb_func_name, "printk") == 0 ||
+ strcmp(bb_func_name, "vprintk") == 0 ||
+ strcmp(bb_func_name, "preempt_schedule") == 0 ||
+ strcmp(bb_func_name, "start_kernel") == 0 ||
+ strcmp(bb_func_name, "csum_partial") == 0 ||
+ strcmp(bb_func_name, "csum_partial_copy_generic") == 0 ||
+ strcmp(bb_func_name, "math_state_restore") == 0 ||
+ strcmp(bb_func_name, "panic") == 0 ||
+ strcmp(bb_func_name, "kdb_printf") == 0 ||
+ strcmp(bb_func_name, "kdb_interrupt") == 0)
+ return;
+ if (bb_asmlinkage_arch())
+ return;
+ bb_reg_params = REGPARM;
+ if (first_time) {
+ kdb_printf(" %s has memory parameters but no register "
+ "parameters.\n Assuming it is a 'pass "
+ "through' function that does not refer to "
+ "its register\n parameters and setting %d "
+ "register parameters\n",
+ bb_func_name, REGPARM);
+ first_time = 0;
+ return;
+ }
+ kdb_printf(" Assuming %s is 'pass through' with %d register "
+ "parameters\n",
+ bb_func_name, REGPARM);
+}
+
+static void
+bb_pass2(void)
+{
+ int allow_missing;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: start\n", __FUNCTION__);
+
+ kdb_di.fprintf_func = bb_dis_pass2;
+ kdb_di.print_address_func = bb_printaddr_pass2;
+
+ bb_reg_state = debug_kmalloc(sizeof(*bb_reg_state), GFP_ATOMIC);
+ if (!bb_reg_state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ bb_list[0]->changed = 1;
+
+ /* If a block does not have all its input states available then it is
+ * possible for a register to initially appear to hold a known value,
+ * but when other inputs are available then it becomes a variable
+ * value. The initial false state of "known" can generate false values
+ * for other registers and can even make it look like stack locations
+ * are being changed.
+ *
+ * To avoid these false positives, only process blocks which have all
+ * their inputs defined. That gives a clean depth first traversal of
+ * the tree, except for loops. If there are any loops, then start
+ * processing blocks with one missing input, then two missing inputs
+ * etc.
+ *
+ * Absolute worst case is we have to iterate over all the jmp entries,
+ * each iteration allowing one more missing input. Any more loops than
+ * that is a bug. Watch out for the corner case of 0 jmp entries.
+ */
+ for (allow_missing = 0; allow_missing <= bb_jmp_count; ++allow_missing) {
+ if (!bb_pass2_do_changed_blocks(allow_missing))
+ break;
+ if (bb_giveup)
+ break;
+ }
+ if (allow_missing > bb_jmp_count) {
+ kdb_printf("\n\n%s maxloops reached\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+
+ if (bb_memory_params && bb_reg_params)
+ bb_reg_params = REGPARM;
+ if (REGPARM &&
+ bb_memory_params &&
+ !bb_reg_params)
+ bb_assume_pass_through();
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: end bb_reg_params %d bb_memory_params %d\n",
+ __FUNCTION__, bb_reg_params, bb_memory_params);
+ if (bb_exit_state) {
+ kdb_printf("%s: bb_exit_state at " kdb_bfd_vma_fmt0 "\n",
+ __FUNCTION__, bb_exit_addr);
+ bb_do_reg_state_print(bb_exit_state);
+ }
+ }
+}
+
+static void
+bb_cleanup(void)
+{
+ int i;
+ struct bb* bb;
+ struct bb_reg_state *state;
+ while (bb_count) {
+ bb = bb_list[0];
+ bb_delete(0);
+ }
+ debug_kfree(bb_list);
+ bb_list = NULL;
+ bb_count = bb_max = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ state = bb_jmp_list[i].state;
+ if (state && --state->ref_count == 0)
+ debug_kfree(state);
+ }
+ debug_kfree(bb_jmp_list);
+ bb_jmp_list = NULL;
+ bb_jmp_count = bb_jmp_max = 0;
+ debug_kfree(bb_reg_state);
+ bb_reg_state = NULL;
+ bb_reg_state_max = 0;
+ debug_kfree(bb_exit_state);
+ bb_exit_state = NULL;
+ bb_reg_params = bb_memory_params = 0;
+ bb_giveup = 0;
+}
+
+static int
+bb_spurious_global_label(const char *func_name)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ if (strcmp(bb_spurious[i], func_name) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* Given the current actual register contents plus the exit state deduced from
+ * a basic block analysis of the current function, rollback the actual register
+ * contents to the values they had on entry to this function.
+ */
+
+static void
+bb_actual_rollback(const struct kdb_activation_record *ar)
+{
+ int i, offset_address;
+ struct bb_memory_contains *c;
+ enum bb_reg_code reg;
+ unsigned long address, osp = 0;
+ struct bb_actual new[ARRAY_SIZE(bb_actual)];
+
+
+ if (!bb_exit_state) {
+ kdb_printf("%s: no bb_exit_state, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(bb_reg_state, bb_exit_state, bb_reg_state_size(bb_exit_state));
+ memset(new, 0, sizeof(new));
+
+ /* The most important register for obtaining saved state is rsp so get
+ * its new value first. Prefer rsp if it is valid, then other
+ * registers. Saved values of rsp in memory are unusable without a
+ * register that points to memory.
+ */
+ if (!bb_actual_valid(BBRG_RSP)) {
+ kdb_printf("%s: no starting value for RSP, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: rsp " kdb_bfd_vma_fmt0,
+ __FUNCTION__, bb_actual_value(BBRG_RSP));
+ i = BBRG_RSP;
+ if (!bb_is_osp_defined(i)) {
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ if (bb_is_osp_defined(i) && bb_actual_valid(i))
+ break;
+ }
+ }
+ if (bb_is_osp_defined(i) && bb_actual_valid(i)) {
+ osp = new[BBRG_RSP - BBRG_RAX].value =
+ bb_actual_value(i) - bb_reg_code_offset(i);
+ new[BBRG_RSP - BBRG_RAX].valid = 1;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(" -> osp " kdb_bfd_vma_fmt0 "\n", osp);
+ } else {
+ bb_actual_set_valid(BBRG_RSP, 0);
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(" -> undefined\n");
+ kdb_printf("%s: no ending value for RSP, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+
+ /* Now the other registers. First look at register values that have
+ * been copied to other registers.
+ */
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ reg = bb_reg_code_value(i);
+ if (bb_is_int_reg(reg)) {
+ new[reg - BBRG_RAX] = bb_actual[i - BBRG_RAX];
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: %s is in %s ",
+ __FUNCTION__,
+ bbrg_name[reg],
+ bbrg_name[i]);
+ if (bb_actual_valid(i))
+ kdb_printf(" -> " kdb_bfd_vma_fmt0 "\n",
+ bb_actual_value(i));
+ else
+ kdb_printf("(invalid)\n");
+ }
+ }
+ }
+
+ /* Finally register values that have been saved on stack */
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++c) {
+ offset_address = c->offset_address;
+ reg = c->value;
+ if (!bb_is_int_reg(reg))
+ continue;
+ address = osp + offset_address;
+ if (address < ar->stack.logical_start ||
+ address >= ar->stack.logical_end) {
+ new[reg - BBRG_RAX].value = 0;
+ new[reg - BBRG_RAX].valid = 0;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: %s -> undefined\n",
+ __FUNCTION__,
+ bbrg_name[reg]);
+ } else {
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: %s -> *(osp",
+ __FUNCTION__,
+ bbrg_name[reg]);
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_address, "", " ");
+ kdb_printf(kdb_bfd_vma_fmt0, address);
+ }
+ new[reg - BBRG_RAX].value = *(bfd_vma *)address;
+ new[reg - BBRG_RAX].valid = 1;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(") = " kdb_bfd_vma_fmt0 "\n",
+ new[reg - BBRG_RAX].value);
+ }
+ }
+
+ memcpy(bb_actual, new, sizeof(bb_actual));
+}
+
+/* Return true if the current function is an interrupt handler */
+
+static bool
+bb_interrupt_handler(kdb_machreg_t rip)
+{
+ unsigned long disp8, disp32, target, addr = (unsigned long)rip;
+ unsigned char code[5];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bb_hardware_handlers); ++i)
+ if (strcmp(bb_func_name, bb_hardware_handlers[i]) == 0)
+ return 1;
+
+ /* Given the large number of interrupt handlers, it is easiest to look
+ * at the next instruction and see if it is a jmp to the common exit
+ * routines.
+ */
+ if (kdb_getarea(code, addr) ||
+ kdb_getword(&disp32, addr+1, 4) ||
+ kdb_getword(&disp8, addr+1, 1))
+ return 0; /* not a valid code address */
+ if (code[0] == 0xe9) {
+ target = addr + (s32) disp32 + 5; /* jmp disp32 */
+ if (target == bb_ret_from_intr ||
+ target == bb_common_interrupt ||
+ target == bb_error_entry)
+ return 1;
+ }
+ if (code[0] == 0xeb) {
+ target = addr + (s8) disp8 + 2; /* jmp disp8 */
+ if (target == bb_ret_from_intr ||
+ target == bb_common_interrupt ||
+ target == bb_error_entry)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Copy argument information that was deduced by the basic block analysis and
+ * rollback into the kdb stack activation record.
+ */
+
+static void
+bb_arguments(struct kdb_activation_record *ar)
+{
+ int i;
+ enum bb_reg_code reg;
+ kdb_machreg_t rsp;
+ ar->args = bb_reg_params + bb_memory_params;
+ bitmap_zero(ar->valid.bits, KDBA_MAXARGS);
+ for (i = 0; i < bb_reg_params; ++i) {
+ reg = bb_param_reg[i];
+ if (bb_actual_valid(reg)) {
+ ar->arg[i] = bb_actual_value(reg);
+ set_bit(i, ar->valid.bits);
+ }
+ }
+ if (!bb_actual_valid(BBRG_RSP))
+ return;
+ rsp = bb_actual_value(BBRG_RSP);
+ for (i = bb_reg_params; i < ar->args; ++i) {
+ rsp += KDB_WORD_SIZE;
+ if (kdb_getarea(ar->arg[i], rsp) == 0)
+ set_bit(i, ar->valid.bits);
+ }
+}
+
+/* Given an exit address from a function, decompose the entire function into
+ * basic blocks and determine the register state at the exit point.
+ */
+
+static void
+kdb_bb(unsigned long exit)
+{
+ kdb_symtab_t symtab;
+ if (!kdbnearsym(exit, &symtab)) {
+ kdb_printf("%s: address " kdb_bfd_vma_fmt0 " not recognised\n",
+ __FUNCTION__, exit);
+ bb_giveup = 1;
+ return;
+ }
+ bb_exit_addr = exit;
+ bb_mod_name = symtab.mod_name;
+ bb_func_name = symtab.sym_name;
+ bb_func_start = symtab.sym_start;
+ bb_func_end = symtab.sym_end;
+ /* Various global labels exist in the middle of assembler code and have
+ * a non-standard state. Ignore these labels and use the start of the
+ * previous label instead.
+ */
+ while (bb_spurious_global_label(symtab.sym_name)) {
+ if (!kdbnearsym(symtab.sym_start - 1, &symtab))
+ break;
+ bb_func_start = symtab.sym_start;
+ }
+ bb_mod_name = symtab.mod_name;
+ bb_func_name = symtab.sym_name;
+ bb_func_start = symtab.sym_start;
+ /* Ignore spurious labels past this point and use the next non-spurious
+ * label as the end point.
+ */
+ if (kdbnearsym(bb_func_end, &symtab)) {
+ while (bb_spurious_global_label(symtab.sym_name)) {
+ bb_func_end = symtab.sym_end;
+ if (!kdbnearsym(symtab.sym_end + 1, &symtab))
+ break;
+ }
+ }
+ bb_pass1();
+ if (!bb_giveup)
+ bb_pass2();
+ if (bb_giveup)
+ kdb_printf("%s: " kdb_bfd_vma_fmt0
+ " [%s]%s failed at " kdb_bfd_vma_fmt0 "\n\n",
+ __FUNCTION__, exit,
+ bb_mod_name, bb_func_name, bb_curr_addr);
+}
+
+static int
+kdb_bb1(int argc, const char **argv)
+{
+ int diag, nextarg = 1;
+ kdb_machreg_t addr;
+ unsigned long offset;
+
+ bb_cleanup(); /* in case previous command was interrupted */
+ kdba_id_init(&kdb_di);
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ if (!addr)
+ return KDB_BADADDR;
+ kdb_save_flags();
+ kdb_flags |= KDB_DEBUG_FLAG_BB << KDB_DEBUG_FLAG_SHIFT;
+ kdb_bb(addr);
+ bb_cleanup();
+ kdb_restore_flags();
+ kdbnearsym_cleanup();
+ return 0;
+}
+
+/* Run a basic block analysis on every function in the base kernel. Used as a
+ * global sanity check to find errors in the basic block code.
+ */
+
+static int
+kdb_bb_all(int argc, const char **argv)
+{
+ loff_t pos = 0;
+ const char *symname;
+ unsigned long addr;
+ int i, max_errors = 20;
+ struct bb_name_state *r;
+ kdb_printf("%s: build variables:"
+ " CCVERSION \"" __stringify(CCVERSION) "\""
+#ifdef CONFIG_X86_64
+ " CONFIG_X86_64"
+#endif
+#ifdef CONFIG_4KSTACKS
+ " CONFIG_4KSTACKS"
+#endif
+#ifdef CONFIG_PREEMPT
+ " CONFIG_PREEMPT"
+#endif
+#ifdef CONFIG_VM86
+ " CONFIG_VM86"
+#endif
+#ifdef CONFIG_FRAME_POINTER
+ " CONFIG_FRAME_POINTER"
+#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+ " CONFIG_TRACE_IRQFLAGS"
+#endif
+#ifdef CONFIG_HIBERNATION
+ " CONFIG_HIBERNATION"
+#endif
+#ifdef CONFIG_KPROBES
+ " CONFIG_KPROBES"
+#endif
+#ifdef CONFIG_KEXEC
+ " CONFIG_KEXEC"
+#endif
+#ifdef CONFIG_MATH_EMULATION
+ " CONFIG_MATH_EMULATION"
+#endif
- #ifdef CONFIG_PARAVIRT_XEN
++#ifdef CONFIG_XEN
+ " CONFIG_XEN"
+#endif
+#ifdef CONFIG_DEBUG_INFO
+ " CONFIG_DEBUG_INFO"
+#endif
+#ifdef NO_SIBLINGS
+ " NO_SIBLINGS"
+#endif
+ " REGPARM=" __stringify(REGPARM)
+ "\n\n", __FUNCTION__);
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (!r->address)
+ kdb_printf("%s: cannot find special_case name %s\n",
+ __FUNCTION__, r->name);
+ }
+ for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ if (!kallsyms_lookup_name(bb_spurious[i]))
+ kdb_printf("%s: cannot find spurious label %s\n",
+ __FUNCTION__, bb_spurious[i]);
+ }
+ while ((symname = kdb_walk_kallsyms(&pos))) {
+ if (strcmp(symname, "_stext") == 0 ||
+ strcmp(symname, "stext") == 0)
+ break;
+ }
+ if (!symname) {
+ kdb_printf("%s: cannot find _stext\n", __FUNCTION__);
+ return 0;
+ }
+ kdba_id_init(&kdb_di);
+ i = 0;
+ while ((symname = kdb_walk_kallsyms(&pos))) {
+ if (strcmp(symname, "_etext") == 0)
+ break;
+ if (i++ % 100 == 0)
+ kdb_printf(".");
+ /* x86_64 has some 16 bit functions that appear between stext
+ * and _etext. Skip them.
+ */
+ if (strcmp(symname, "verify_cpu") == 0 ||
+ strcmp(symname, "verify_cpu_noamd") == 0 ||
+ strcmp(symname, "verify_cpu_sse_test") == 0 ||
+ strcmp(symname, "verify_cpu_no_longmode") == 0 ||
+ strcmp(symname, "verify_cpu_sse_ok") == 0 ||
+ strcmp(symname, "mode_seta") == 0 ||
+ strcmp(symname, "bad_address") == 0 ||
+ strcmp(symname, "wakeup_code") == 0 ||
+ strcmp(symname, "wakeup_code_start") == 0 ||
+ strcmp(symname, "wakeup_start") == 0 ||
+ strcmp(symname, "wakeup_32_vector") == 0 ||
+ strcmp(symname, "wakeup_32") == 0 ||
+ strcmp(symname, "wakeup_long64_vector") == 0 ||
+ strcmp(symname, "wakeup_long64") == 0 ||
+ strcmp(symname, "gdta") == 0 ||
+ strcmp(symname, "idt_48a") == 0 ||
+ strcmp(symname, "gdt_48a") == 0 ||
+ strcmp(symname, "bogus_real_magic") == 0 ||
+ strcmp(symname, "bogus_64_magic") == 0 ||
+ strcmp(symname, "no_longmode") == 0 ||
+ strcmp(symname, "mode_set") == 0 ||
+ strcmp(symname, "mode_seta") == 0 ||
+ strcmp(symname, "setbada") == 0 ||
+ strcmp(symname, "check_vesa") == 0 ||
+ strcmp(symname, "check_vesaa") == 0 ||
+ strcmp(symname, "_setbada") == 0 ||
+ strcmp(symname, "wakeup_stack_begin") == 0 ||
+ strcmp(symname, "wakeup_stack") == 0 ||
+ strcmp(symname, "wakeup_level4_pgt") == 0 ||
+ strcmp(symname, "acpi_copy_wakeup_routine") == 0 ||
+ strcmp(symname, "wakeup_end") == 0 ||
+ strcmp(symname, "do_suspend_lowlevel_s4bios") == 0 ||
+ strcmp(symname, "do_suspend_lowlevel") == 0 ||
+ strcmp(symname, "wakeup_pmode_return") == 0 ||
+ strcmp(symname, "restore_registers") == 0)
+ continue;
+ /* __kprobes_text_end contains branches to the middle of code,
+ * with undefined states.
+ */
+ if (strcmp(symname, "__kprobes_text_end") == 0)
+ continue;
+ /* Data in the middle of the text segment :( */
+ if (strcmp(symname, "level2_kernel_pgt") == 0 ||
+ strcmp(symname, "level3_kernel_pgt") == 0)
+ continue;
+ if (bb_spurious_global_label(symname))
+ continue;
+ if ((addr = kallsyms_lookup_name(symname)) == 0)
+ continue;
+ // kdb_printf("BB " kdb_bfd_vma_fmt0 " %s\n", addr, symname);
+ bb_cleanup(); /* in case previous command was interrupted */
+ kdbnearsym_cleanup();
+ kdb_bb(addr);
+ touch_nmi_watchdog();
+ if (bb_giveup) {
+ if (max_errors-- == 0) {
+ kdb_printf("%s: max_errors reached, giving up\n",
+ __FUNCTION__);
+ break;
+ } else {
+ bb_giveup = 0;
+ }
+ }
+ }
+ kdb_printf("\n");
+ bb_cleanup();
+ kdbnearsym_cleanup();
+ return 0;
+}
+
+/*
+ *=============================================================================
+ *
+ * Everything above this line is doing basic block analysis, function by
+ * function. Everything below this line uses the basic block data to do a
+ * complete backtrace over all functions that are used by a process.
+ *
+ *=============================================================================
+ */
+
+
+/*============================================================================*/
+/* */
+/* Most of the backtrace code and data is common to x86_64 and i386. This */
+/* large ifdef contains all of the differences between the two architectures. */
+/* */
+/* Make sure you update the correct section of this ifdef. */
+/* */
+/*============================================================================*/
+#define XCS "cs"
+#define RSP "sp"
+#define RIP "ip"
+#define ARCH_RSP sp
+#define ARCH_RIP ip
+
+#ifdef CONFIG_X86_64
+
+#define ARCH_NORMAL_PADDING (16 * 8)
+
+/* x86_64 has multiple alternate stacks, with different sizes and different
- * offsets to get the link from one stack to the next. Some of the stacks are
- * referenced via cpu_pda, some via per_cpu orig_ist. Debug events can even
- * have multiple nested stacks within the single physical stack, each nested
- * stack has its own link and some of those links are wrong.
++ * offsets to get the link from one stack to the next. All of the stacks are
++ * in the per_cpu area: either in the orig_ist or irq_stack_ptr. Debug events
++ * can even have multiple nested stacks within the single physical stack,
++ * each nested stack has its own link and some of those links are wrong.
+ *
+ * Consistent it's not!
+ *
+ * Do not assume that these stacks are aligned on their size.
+ */
+#define INTERRUPT_STACK (N_EXCEPTION_STACKS + 1)
+void
+kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ struct kdb_activation_record *ar)
+{
+ static struct {
+ const char *id;
+ unsigned int total_size;
+ unsigned int nested_size;
+ unsigned int next;
+ } *sdp, stack_data[] = {
+ [STACKFAULT_STACK - 1] = { "stackfault", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [DOUBLEFAULT_STACK - 1] = { "doublefault", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [NMI_STACK - 1] = { "nmi", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [DEBUG_STACK - 1] = { "debug", DEBUG_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [MCE_STACK - 1] = { "machine check", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
- [INTERRUPT_STACK - 1] = { "interrupt", IRQ_STACK_SIZE, IRQ_STACK_SIZE, IRQ_STACK_SIZE - sizeof(void *) },
++ [INTERRUPT_STACK - 1] = { "interrupt", IRQ_STACK_SIZE, IRQ_STACK_SIZE, IRQ_STACK_SIZE - sizeof(void *) },
+ };
+ unsigned long total_start = 0, total_size, total_end;
+ int sd, found = 0;
+ extern unsigned long kdba_orig_ist(int, int);
+
+ for (sd = 0, sdp = stack_data;
+ sd < ARRAY_SIZE(stack_data);
+ ++sd, ++sdp) {
+ total_size = sdp->total_size;
+ if (!total_size)
+ continue; /* in case stack_data[] has any holes */
+ if (cpu < 0) {
+ /* Arbitrary address which can be on any cpu, see if it
+ * falls within any of the alternate stacks
+ */
+ int c;
+ for_each_online_cpu(c) {
+ if (sd == INTERRUPT_STACK - 1)
- total_end = (unsigned long) per_cpu(irq_stack_ptr, c);
++ total_end = (unsigned long)per_cpu(irq_stack_ptr, c);
+ else
+ total_end = per_cpu(orig_ist, c).ist[sd];
+ total_start = total_end - total_size;
+ if (addr >= total_start && addr < total_end) {
+ found = 1;
+ cpu = c;
+ break;
+ }
+ }
+ if (!found)
+ continue;
+ }
+ /* Only check the supplied or found cpu */
+ if (sd == INTERRUPT_STACK - 1)
- total_end = (unsigned long) per_cpu(irq_stack_ptr, cpu);
++ total_end = (unsigned long)per_cpu(irq_stack_ptr, cpu);
+ else
+ total_end = per_cpu(orig_ist, cpu).ist[sd];
+ total_start = total_end - total_size;
+ if (addr >= total_start && addr < total_end) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ /* find which nested stack the address is in */
+ while (addr > total_start + sdp->nested_size)
+ total_start += sdp->nested_size;
+ ar->stack.physical_start = total_start;
+ ar->stack.physical_end = total_start + sdp->nested_size;
+ ar->stack.logical_start = total_start;
+ ar->stack.logical_end = total_start + sdp->next;
+ ar->stack.next = *(unsigned long *)ar->stack.logical_end;
+ ar->stack.id = sdp->id;
+
+ /* Nasty: when switching to the interrupt stack, the stack state of the
+ * caller is split over two stacks, the original stack and the
+ * interrupt stack. One word (the previous frame pointer) is stored on
+ * the interrupt stack, the rest of the interrupt data is in the old
+ * frame. To make the interrupted stack state look as though it is
+ * contiguous, copy the missing word from the interrupt stack to the
+ * original stack and adjust the new stack pointer accordingly.
+ */
+
+ if (sd == INTERRUPT_STACK - 1) {
+ *(unsigned long *)(ar->stack.next - KDB_WORD_SIZE) =
+ ar->stack.next;
+ ar->stack.next -= KDB_WORD_SIZE;
+ }
+}
+
+/* rip is not in the thread struct for x86_64. We know that the stack value
+ * was saved in schedule near the label thread_return. Setting rip to
+ * thread_return lets the stack trace find that we are in schedule and
+ * correctly decode its prologue.
+ */
+
+static kdb_machreg_t
+kdba_bt_stack_rip(const struct task_struct *p)
+{
+ return bb_thread_return;
+}
+
+#else /* !CONFIG_X86_64 */
+
+#define ARCH_NORMAL_PADDING (19 * 4)
+
+#ifdef CONFIG_4KSTACKS
+static struct thread_info **kdba_hardirq_ctx, **kdba_softirq_ctx;
+#endif /* CONFIG_4KSTACKS */
+
+/* On a 4K stack kernel, hardirq_ctx and softirq_ctx are [NR_CPUS] arrays. The
+ * first element of each per-cpu stack is a struct thread_info.
+ */
+void
+kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ struct kdb_activation_record *ar)
+{
+#ifdef CONFIG_4KSTACKS
+ struct thread_info *tinfo;
+ tinfo = (struct thread_info *)(addr & -THREAD_SIZE);
+ if (cpu < 0) {
+ /* Arbitrary address, see if it falls within any of the irq
+ * stacks
+ */
+ int found = 0;
+ for_each_online_cpu(cpu) {
+ if (tinfo == kdba_hardirq_ctx[cpu] ||
+ tinfo == kdba_softirq_ctx[cpu]) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ }
+ if (tinfo == kdba_hardirq_ctx[cpu] ||
+ tinfo == kdba_softirq_ctx[cpu]) {
+ ar->stack.physical_start = (kdb_machreg_t)tinfo;
+ ar->stack.physical_end = ar->stack.physical_start + THREAD_SIZE;
+ ar->stack.logical_start = ar->stack.physical_start +
+ sizeof(struct thread_info);
+ ar->stack.logical_end = ar->stack.physical_end;
+ ar->stack.next = tinfo->previous_esp;
+ if (tinfo == kdba_hardirq_ctx[cpu])
+ ar->stack.id = "hardirq_ctx";
+ else
+ ar->stack.id = "softirq_ctx";
+ }
+#endif /* CONFIG_4KSTACKS */
+}
+
+/* rip is in the thread struct for i386 */
+
+static kdb_machreg_t
+kdba_bt_stack_rip(const struct task_struct *p)
+{
+ return p->thread.ip;
+}
+
+#endif /* CONFIG_X86_64 */
+
+/* Given an address which claims to be on a stack, an optional cpu number and
+ * an optional task address, get information about the stack.
+ *
+ * t == NULL, cpu < 0 indicates an arbitrary stack address with no associated
+ * struct task, the address can be in an alternate stack or any task's normal
+ * stack.
+ *
+ * t != NULL, cpu >= 0 indicates a running task, the address can be in an
+ * alternate stack or that task's normal stack.
+ *
+ * t != NULL, cpu < 0 indicates a blocked task, the address can only be in that
+ * task's normal stack.
+ *
+ * t == NULL, cpu >= 0 is not a valid combination.
+ */
+
+static void
+kdba_get_stack_info(kdb_machreg_t rsp, int cpu,
+ struct kdb_activation_record *ar,
+ const struct task_struct *t)
+{
+ struct thread_info *tinfo;
+ struct task_struct *g, *p;
+ memset(&ar->stack, 0, sizeof(ar->stack));
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: " RSP "=0x%lx cpu=%d task=%p\n",
+ __FUNCTION__, rsp, cpu, t);
+ if (t == NULL || cpu >= 0) {
+ kdba_get_stack_info_alternate(rsp, cpu, ar);
+ if (ar->stack.logical_start)
+ goto out;
+ }
+ rsp &= -THREAD_SIZE;
+ tinfo = (struct thread_info *)rsp;
+ if (t == NULL) {
+ /* Arbitrary stack address without an associated task, see if
+ * it falls within any normal process stack, including the idle
+ * tasks.
+ */
+ kdb_do_each_thread(g, p) {
+ if (tinfo == task_thread_info(p)) {
+ t = p;
+ goto found;
+ }
+ } kdb_while_each_thread(g, p);
+ for_each_online_cpu(cpu) {
+ p = idle_task(cpu);
+ if (tinfo == task_thread_info(p)) {
+ t = p;
+ goto found;
+ }
+ }
+ found:
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: found task %p\n", __FUNCTION__, t);
+ } else if (cpu >= 0) {
+ /* running task */
+ struct kdb_running_process *krp = kdb_running_process + cpu;
+ if (krp->p != t || tinfo != task_thread_info(t))
+ t = NULL;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: running task %p\n", __FUNCTION__, t);
+ } else {
+ /* blocked task */
+ if (tinfo != task_thread_info(t))
+ t = NULL;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: blocked task %p\n", __FUNCTION__, t);
+ }
+ if (t) {
+ ar->stack.physical_start = rsp;
+ ar->stack.physical_end = rsp + THREAD_SIZE;
+ ar->stack.logical_start = rsp + sizeof(struct thread_info);
+ ar->stack.logical_end = ar->stack.physical_end - ARCH_NORMAL_PADDING;
+ ar->stack.next = 0;
+ ar->stack.id = "normal";
+ }
+out:
+ if (ar->stack.physical_start && KDB_DEBUG(ARA)) {
+ kdb_printf("%s: ar->stack\n", __FUNCTION__);
+ kdb_printf(" physical_start=0x%lx\n", ar->stack.physical_start);
+ kdb_printf(" physical_end=0x%lx\n", ar->stack.physical_end);
+ kdb_printf(" logical_start=0x%lx\n", ar->stack.logical_start);
+ kdb_printf(" logical_end=0x%lx\n", ar->stack.logical_end);
+ kdb_printf(" next=0x%lx\n", ar->stack.next);
+ kdb_printf(" id=%s\n", ar->stack.id);
+ kdb_printf(" set MDCOUNT %ld\n",
+ (ar->stack.physical_end - ar->stack.physical_start) /
+ KDB_WORD_SIZE);
+ kdb_printf(" mds " kdb_machreg_fmt0 "\n",
+ ar->stack.physical_start);
+ }
+}
+
+static void
+bt_print_one(kdb_machreg_t rip, kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar,
+ const kdb_symtab_t *symtab, int argcount)
+{
+ int btsymarg = 0;
+ int nosect = 0;
+
+ kdbgetintenv("BTSYMARG", &btsymarg);
+ kdbgetintenv("NOSECT", &nosect);
+
+ kdb_printf(kdb_machreg_fmt0, rsp);
+ kdb_symbol_print(rip, symtab,
+ KDB_SP_SPACEB|KDB_SP_VALUE);
+ if (argcount && ar->args) {
+ int i, argc = ar->args;
+ kdb_printf(" (");
+ if (argc > argcount)
+ argc = argcount;
+ for (i = 0; i < argc; i++) {
+ if (i)
+ kdb_printf(", ");
+ if (test_bit(i, ar->valid.bits))
+ kdb_printf("0x%lx", ar->arg[i]);
+ else
+ kdb_printf("invalid");
+ }
+ kdb_printf(")");
+ }
+ kdb_printf("\n");
+ if (symtab->sym_name) {
+ if (!nosect) {
+ kdb_printf(" %s",
+ symtab->mod_name);
+ if (symtab->sec_name && symtab->sec_start)
+ kdb_printf(" 0x%lx 0x%lx",
+ symtab->sec_start, symtab->sec_end);
+ kdb_printf(" 0x%lx 0x%lx\n",
+ symtab->sym_start, symtab->sym_end);
+ }
+ }
+ if (argcount && ar->args && btsymarg) {
+ int i, argc = ar->args;
+ kdb_symtab_t arg_symtab;
+ for (i = 0; i < argc; i++) {
+ kdb_machreg_t arg = ar->arg[i];
+ if (test_bit(i, ar->valid.bits) &&
+ kdbnearsym(arg, &arg_symtab)) {
+ kdb_printf(" ARG %2d ", i);
+ kdb_symbol_print(arg, &arg_symtab,
+ KDB_SP_DEFAULT|KDB_SP_NEWLINE);
+ }
+ }
+ }
+}
+
+static void
+kdba_bt_new_stack(struct kdb_activation_record *ar, kdb_machreg_t *rsp,
+ int *count, int *suppress)
+{
+ /* Nasty: save_args builds a partial pt_regs, with r15 through
+ * rbx not being filled in. It passes struct pt_regs* to do_IRQ (in
+ * rdi) but the stack pointer is not adjusted to account for r15
+ * through rbx. This has two effects :-
+ *
+ * (1) struct pt_regs on an external interrupt actually overlaps with
+ * the local stack area used by do_IRQ. Not only are r15-rbx
+ * undefined, the area that claims to hold their values can even
+ * change as the irq is processed.
+ *
+ * (2) The back stack pointer saved for the new frame is not pointing
+ * at pt_regs, it is pointing at rbx within the pt_regs passed to
+ * do_IRQ.
+ *
+ * There is nothing that I can do about (1) but I have to fix (2)
+ * because kdb backtrace looks for the "start" address of pt_regs as it
+ * walks back through the stacks. When switching from the interrupt
+ * stack to another stack, we have to assume that pt_regs has been
+ * seen and turn off backtrace supression.
+ */
+ int probable_pt_regs = strcmp(ar->stack.id, "interrupt") == 0;
+ *rsp = ar->stack.next;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("new " RSP "=" kdb_machreg_fmt0 "\n", *rsp);
+ bb_actual_set_value(BBRG_RSP, *rsp);
+ kdba_get_stack_info(*rsp, -1, ar, NULL);
+ if (!ar->stack.physical_start) {
+ kdb_printf("+++ Cannot resolve next stack\n");
+ } else if (!*suppress) {
+ kdb_printf(" ======================= <%s>\n",
+ ar->stack.id);
+ ++*count;
+ }
+ if (probable_pt_regs)
+ *suppress = 0;
+}
+
+/*
+ * kdba_bt_stack
+ *
+ * Inputs:
+ * addr Address provided to 'bt' command, if any.
+ * argcount
+ * p Pointer to task for 'btp' command.
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Ultimately all the bt* commands come through this routine. If
+ * old_style is 0 then it uses the basic block analysis to get an accurate
+ * backtrace with arguments, otherwise it falls back to the old method of
+ * printing anything on stack that looks like a kernel address.
+ *
+ * Allowing for the stack data pushed by the hardware is tricky. We
+ * deduce the presence of hardware pushed data by looking for interrupt
+ * handlers, either by name or by the code that they contain. This
+ * information must be applied to the next function up the stack, because
+ * the hardware data is above the saved rip for the interrupted (next)
+ * function.
+ *
+ * To make things worse, the amount of data pushed is arch specific and
+ * may depend on the rsp for the next function, not the current function.
+ * The number of bytes pushed by hardware cannot be calculated until we
+ * are actually processing the stack for the interrupted function and have
+ * its rsp.
+ *
+ * It is also possible for an interrupt to occur in user space and for the
+ * interrupt handler to also be interrupted. Check the code selector
+ * whenever the previous function is an interrupt handler and stop
+ * backtracing if the interrupt was not in kernel space.
+ */
+
+static int
+kdba_bt_stack(kdb_machreg_t addr, int argcount, const struct task_struct *p,
+ int old_style)
+{
+ struct kdb_activation_record ar;
+ kdb_machreg_t rip = 0, rsp = 0, prev_rsp, cs;
+ kdb_symtab_t symtab;
+ int rip_at_rsp = 0, count = 0, btsp = 0, suppress,
+ interrupt_handler = 0, prev_interrupt_handler = 0, hardware_pushed,
+ prev_noret = 0;
+ struct pt_regs *regs = NULL;
+
+ kdbgetintenv("BTSP", &btsp);
+ suppress = !btsp;
+ memset(&ar, 0, sizeof(ar));
+ if (old_style)
+ kdb_printf("Using old style backtrace, unreliable with no arguments\n");
+
+ /*
+ * The caller may have supplied an address at which the stack traceback
+ * operation should begin. This address is assumed by this code to
+ * point to a return address on the stack to be traced back.
+ *
+ * Warning: type in the wrong address and you will get garbage in the
+ * backtrace.
+ */
+ if (addr) {
+ rsp = addr;
+ kdb_getword(&rip, rsp, sizeof(rip));
+ rip_at_rsp = 1;
+ suppress = 0;
+ kdba_get_stack_info(rsp, -1, &ar, NULL);
+ } else {
+ if (task_curr(p)) {
+ struct kdb_running_process *krp =
+ kdb_running_process + task_cpu(p);
+ kdb_machreg_t cs;
+ regs = krp->regs;
+ if (krp->seqno &&
+ krp->p == p &&
+ krp->seqno >= kdb_seqno - 1 &&
+ !KDB_NULL_REGS(regs)) {
+ /* valid saved state, continue processing */
+ } else {
+ kdb_printf
+ ("Process did not save state, cannot backtrace\n");
+ kdb_ps1(p);
+ return 0;
+ }
+ kdba_getregcontents(XCS, regs, &cs);
+ if ((cs & 0xffff) != __KERNEL_CS) {
+ kdb_printf("Stack is not in kernel space, backtrace not available\n");
+ return 0;
+ }
+ rip = krp->arch.ARCH_RIP;
+ rsp = krp->arch.ARCH_RSP;
+ kdba_get_stack_info(rsp, kdb_process_cpu(p), &ar, p);
+ } else {
+ /* Not on cpu, assume blocked. Blocked tasks do not
+ * have pt_regs. p->thread contains some data, alas
+ * what it contains differs between i386 and x86_64.
+ */
+ rip = kdba_bt_stack_rip(p);
+ rsp = p->thread.sp;
+ suppress = 0;
+ kdba_get_stack_info(rsp, -1, &ar, p);
+ }
+ }
+ if (!ar.stack.physical_start) {
+ kdb_printf(RSP "=0x%lx is not in a valid kernel stack, backtrace not available\n",
+ rsp);
+ return 0;
+ }
+ memset(&bb_actual, 0, sizeof(bb_actual));
+ bb_actual_set_value(BBRG_RSP, rsp);
+ bb_actual_set_valid(BBRG_RSP, 1);
+
+ kdb_printf(RSP "%*s" RIP "%*sFunction (args)\n",
+ 2*KDB_WORD_SIZE, " ",
+ 2*KDB_WORD_SIZE, " ");
+ if (ar.stack.next && !suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+
+ bb_cleanup();
+ /* Run through all the stacks */
+ while (ar.stack.physical_start) {
+ if (rip_at_rsp) {
+ rip = *(kdb_machreg_t *)rsp;
+ /* I wish that gcc was fixed to include a nop
+ * instruction after ATTRIB_NORET functions. The lack
+ * of a nop means that the return address points to the
+ * start of next function, so fudge it to point to one
+ * byte previous.
+ *
+ * No, we cannot just decrement all rip values.
+ * Sometimes an rip legally points to the start of a
+ * function, e.g. interrupted code or hand crafted
+ * assembler.
+ */
+ if (prev_noret) {
+ kdbnearsym(rip, &symtab);
+ if (rip == symtab.sym_start) {
+ --rip;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("\tprev_noret, " RIP
+ "=0x%lx\n", rip);
+ }
+ }
+ }
+ kdbnearsym(rip, &symtab);
+ if (old_style) {
+ if (__kernel_text_address(rip) && !suppress) {
+ bt_print_one(rip, rsp, &ar, &symtab, 0);
+ ++count;
+ }
+ if (rsp == (unsigned long)regs) {
+ if (ar.stack.next && suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+ ++count;
+ suppress = 0;
+ }
+ rsp += sizeof(rip);
+ rip_at_rsp = 1;
+ if (rsp >= ar.stack.logical_end) {
+ if (!ar.stack.next)
+ break;
+ kdba_bt_new_stack(&ar, &rsp, &count, &suppress);
+ rip_at_rsp = 0;
+ continue;
+ }
+ } else {
+ /* Start each analysis with no dynamic data from the
+ * previous kdb_bb() run.
+ */
+ bb_cleanup();
+ kdb_bb(rip);
+ if (bb_giveup)
+ break;
+ prev_interrupt_handler = interrupt_handler;
+ interrupt_handler = bb_interrupt_handler(rip);
+ prev_rsp = rsp;
+ if (rip_at_rsp) {
+ if (prev_interrupt_handler) {
+ cs = *((kdb_machreg_t *)rsp + 1) & 0xffff;
+ hardware_pushed =
+ bb_hardware_pushed_arch(rsp, &ar);
+ } else {
+ cs = __KERNEL_CS;
+ hardware_pushed = 0;
+ }
+ rsp += sizeof(rip) + hardware_pushed;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: " RSP " "
+ kdb_machreg_fmt0
+ " -> " kdb_machreg_fmt0
+ " hardware_pushed %d"
+ " prev_interrupt_handler %d"
+ " cs 0x%lx\n",
+ __FUNCTION__,
+ prev_rsp,
+ rsp,
+ hardware_pushed,
+ prev_interrupt_handler,
+ cs);
+ if (rsp >= ar.stack.logical_end &&
+ ar.stack.next) {
+ kdba_bt_new_stack(&ar, &rsp, &count,
+ &suppress);
+ rip_at_rsp = 0;
+ continue;
+ }
+ bb_actual_set_value(BBRG_RSP, rsp);
+ } else {
+ cs = __KERNEL_CS;
+ }
+ rip_at_rsp = 1;
+ bb_actual_rollback(&ar);
+ if (bb_giveup)
+ break;
+ if (bb_actual_value(BBRG_RSP) < rsp) {
+ kdb_printf("%s: " RSP " is going backwards, "
+ kdb_machreg_fmt0 " -> "
+ kdb_machreg_fmt0 "\n",
+ __FUNCTION__,
+ rsp,
+ bb_actual_value(BBRG_RSP));
+ bb_giveup = 1;
+ break;
+ }
+ bb_arguments(&ar);
+ if (!suppress) {
+ bt_print_one(rip, prev_rsp, &ar, &symtab, argcount);
+ ++count;
+ }
+ /* Functions that terminate the backtrace */
+ if (strcmp(bb_func_name, "cpu_idle") == 0 ||
+ strcmp(bb_func_name, "child_rip") == 0)
+ break;
+ if (rsp >= ar.stack.logical_end &&
+ !ar.stack.next)
+ break;
+ if (rsp <= (unsigned long)regs &&
+ bb_actual_value(BBRG_RSP) > (unsigned long)regs) {
+ if (ar.stack.next && suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+ ++count;
+ suppress = 0;
+ }
+ if (cs != __KERNEL_CS) {
+ kdb_printf("Reached user space\n");
+ break;
+ }
+ rsp = bb_actual_value(BBRG_RSP);
+ }
+ prev_noret = bb_noret(bb_func_name);
+ if (count > 200)
+ break;
+ }
+ if (bb_giveup)
+ return 1;
+ bb_cleanup();
+ kdbnearsym_cleanup();
+
+ if (count > 200) {
+ kdb_printf("bt truncated, count limit reached\n");
+ return 1;
+ } else if (suppress) {
+ kdb_printf
+ ("bt did not find pt_regs - no trace produced. Suggest 'set BTSP 1'\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * kdba_bt_address
+ *
+ * Do a backtrace starting at a specified stack address. Use this if the
+ * heuristics get the stack decode wrong.
+ *
+ * Inputs:
+ * addr Address provided to 'bt' command.
+ * argcount
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * mds %rsp comes in handy when examining the stack to do a manual
+ * traceback.
+ */
+
+int kdba_bt_address(kdb_machreg_t addr, int argcount)
+{
+ int ret;
+ kdba_id_init(&kdb_di); /* kdb_bb needs this done once */
+ ret = kdba_bt_stack(addr, argcount, NULL, 0);
+ if (ret == 1)
+ ret = kdba_bt_stack(addr, argcount, NULL, 1);
+ return ret;
+}
+
+/*
+ * kdba_bt_process
+ *
+ * Do a backtrace for a specified process.
+ *
+ * Inputs:
+ * p Struct task pointer extracted by 'bt' command.
+ * argcount
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ */
+
+int kdba_bt_process(const struct task_struct *p, int argcount)
+{
+ int ret;
+ kdba_id_init(&kdb_di); /* kdb_bb needs this done once */
+ ret = kdba_bt_stack(0, argcount, p, 0);
+ if (ret == 1)
+ ret = kdba_bt_stack(0, argcount, p, 1);
+ return ret;
+}
+
+static int __init kdba_bt_x86_init(void)
+{
+ int i, c, cp = -1;
+ struct bb_name_state *r;
+
+ kdb_register_repeat("bb1", kdb_bb1, "<vaddr>", "Analyse one basic block", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bb_all", kdb_bb_all, "", "Backtrace check on all built in functions", 0, KDB_REPEAT_NONE);
+
+ /* Split the opcode usage table by the first letter of each set of
+ * opcodes, for faster mapping of opcode to its operand usage.
+ */
+ for (i = 0; i < ARRAY_SIZE(bb_opcode_usage_all); ++i) {
+ c = bb_opcode_usage_all[i].opcode[0] - 'a';
+ if (c != cp) {
+ cp = c;
+ bb_opcode_usage[c].opcode = bb_opcode_usage_all + i;
+ }
+ ++bb_opcode_usage[c].size;
+ }
+
+ bb_common_interrupt = kallsyms_lookup_name("common_interrupt");
+ bb_error_entry = kallsyms_lookup_name("error_entry");
+ bb_ret_from_intr = kallsyms_lookup_name("ret_from_intr");
+ bb_thread_return = kallsyms_lookup_name("thread_return");
+ bb_sync_regs = kallsyms_lookup_name("sync_regs");
+ bb_save_v86_state = kallsyms_lookup_name("save_v86_state");
+ bb__sched_text_start = kallsyms_lookup_name("__sched_text_start");
+ bb__sched_text_end = kallsyms_lookup_name("__sched_text_end");
+ bb_save_args = kallsyms_lookup_name("save_args");
++ bb_save_rest = kallsyms_lookup_name("save_rest");
++ bb_save_paranoid = kallsyms_lookup_name("save_paranoid");
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ r->address = kallsyms_lookup_name(r->name);
+ }
+
+#ifdef CONFIG_4KSTACKS
+ kdba_hardirq_ctx = (struct thread_info **)kallsyms_lookup_name("hardirq_ctx");
+ kdba_softirq_ctx = (struct thread_info **)kallsyms_lookup_name("softirq_ctx");
+#endif /* CONFIG_4KSTACKS */
+
+ return 0;
+}
+
+static void __exit kdba_bt_x86_exit(void)
+{
+ kdb_unregister("bb1");
+ kdb_unregister("bb_all");
+}
+
+module_init(kdba_bt_x86_init)
+module_exit(kdba_bt_x86_exit)
--- /dev/null
+/*
+ * Kernel Debugger Architecture Dependent Console I/O handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/keyboard.h>
+#include <linux/serial.h>
+#include <linux/serial_reg.h>
+
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
- #include <pc_keyb.h>
++#include "pc_keyb.h"
+
+#ifdef CONFIG_VT_CONSOLE
+#define KDB_BLINK_LED 1
+#else
+#undef KDB_BLINK_LED
+#endif
+
+#ifdef CONFIG_KDB_USB
+
- /* support up to 8 USB keyboards (probably excessive, but...) */
- #define KDB_USB_NUM_KEYBOARDS 8
+struct kdb_usb_kbd_info kdb_usb_kbds[KDB_USB_NUM_KEYBOARDS];
++EXPORT_SYMBOL(kdb_usb_kbds);
+
+extern int kdb_no_usb;
+
+static unsigned char kdb_usb_keycode[256] = {
+ 0, 0, 0, 0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38,
+ 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 28, 1, 14, 15, 57, 12, 13, 26,
+ 27, 43, 84, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106,
+ 105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71,
+ 72, 73, 82, 83, 86,127,116,117, 85, 89, 90, 91, 92, 93, 94, 95,
+ 120,121,122,123,134,138,130,132,128,129,131,137,133,135,136,113,
+ 115,114, 0, 0, 0,124, 0,181,182,183,184,185,186,187,188,189,
+ 190,191,192,193,194,195,196,197,198, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113,
+ 150,158,159,128,136,177,178,176,142,152,173,140
+};
+
+/*
+ * kdb_usb_keyboard_attach()
+ * Attach a USB keyboard to kdb.
+ */
+int
- kdb_usb_keyboard_attach(struct urb *urb, unsigned char *buffer, void *poll_func)
++kdb_usb_keyboard_attach(struct urb *urb, unsigned char *buffer,
++ void *poll_func, void *compl_func,
++ kdb_hc_keyboard_attach_t kdb_hc_keyboard_attach,
++ kdb_hc_keyboard_detach_t kdb_hc_keyboard_detach,
++ unsigned int bufsize,
++ struct urb *hid_urb)
+{
+ int i;
+ int rc = -1;
+
+ if (kdb_no_usb)
+ return 0;
+
+ /*
+ * Search through the array of KDB USB keyboards (kdb_usb_kbds)
+ * looking for a free index. If found, assign the keyboard to
+ * the array index.
+ */
+
+ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
+ if (kdb_usb_kbds[i].urb) /* index is already assigned */
+ continue;
+
+ /* found a free array index */
+ kdb_usb_kbds[i].urb = urb;
+ kdb_usb_kbds[i].buffer = buffer;
+ kdb_usb_kbds[i].poll_func = poll_func;
+
++ kdb_usb_kbds[i].kdb_hc_urb_complete = compl_func;
++ kdb_usb_kbds[i].kdb_hc_keyboard_attach = kdb_hc_keyboard_attach;
++ kdb_usb_kbds[i].kdb_hc_keyboard_detach = kdb_hc_keyboard_detach;
++
++ /* USB Host Controller specific Keyboadr attach callback.
++ * Currently only UHCI has this callback.
++ */
++ if (kdb_usb_kbds[i].kdb_hc_keyboard_attach)
++ kdb_usb_kbds[i].kdb_hc_keyboard_attach(i, bufsize);
++
+ rc = 0; /* success */
+
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL (kdb_usb_keyboard_attach);
+
+/*
+ * kdb_usb_keyboard_detach()
+ * Detach a USB keyboard from kdb.
+ */
+int
+kdb_usb_keyboard_detach(struct urb *urb)
+{
+ int i;
+ int rc = -1;
+
+ if (kdb_no_usb)
+ return 0;
+
+ /*
+ * Search through the array of KDB USB keyboards (kdb_usb_kbds)
+ * looking for the index with the matching URB. If found,
+ * clear the array index.
+ */
+
+ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
- if (kdb_usb_kbds[i].urb != urb)
++ if ((kdb_usb_kbds[i].urb != urb) &&
++ (kdb_usb_kbds[i].hid_urb != urb))
+ continue;
+
+ /* found it, clear the index */
++
++ /* USB Host Controller specific Keyboard detach callback.
++ * Currently only UHCI has this callback.
++ */
++ if (kdb_usb_kbds[i].kdb_hc_keyboard_detach)
++ kdb_usb_kbds[i].kdb_hc_keyboard_detach(urb, i);
++
+ kdb_usb_kbds[i].urb = NULL;
+ kdb_usb_kbds[i].buffer = NULL;
+ kdb_usb_kbds[i].poll_func = NULL;
+ kdb_usb_kbds[i].caps_lock = 0;
++ kdb_usb_kbds[i].hid_urb = NULL;
+
+ rc = 0; /* success */
+
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL (kdb_usb_keyboard_detach);
+
+/*
+ * get_usb_char
+ * This function drives the USB attached keyboards.
+ * Fetch the USB scancode and decode it.
+ */
+static int
+get_usb_char(void)
+{
+ int i;
- int ret;
+ unsigned char keycode, spec;
+ extern u_short plain_map[], shift_map[], ctrl_map[];
++ int ret = 1;
++ int ret_key = -1, j, max;
+
+ if (kdb_no_usb)
+ return -1;
+
+ /*
+ * Loop through all the USB keyboard(s) and return
+ * the first character obtained from them.
+ */
+
+ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
+ /* skip uninitialized keyboard array entries */
+ if (!kdb_usb_kbds[i].urb || !kdb_usb_kbds[i].buffer ||
+ !kdb_usb_kbds[i].poll_func)
+ continue;
+
+ /* Transfer char */
+ ret = (*kdb_usb_kbds[i].poll_func)(kdb_usb_kbds[i].urb);
+ if (ret == -EBUSY && kdb_usb_kbds[i].poll_ret != -EBUSY)
+ kdb_printf("NOTICE: USB HD driver BUSY. "
+ "USB keyboard has been disabled.\n");
+
+ kdb_usb_kbds[i].poll_ret = ret;
+
+ if (ret < 0) /* error or no characters, try the next kbd */
+ continue;
+
++ /* If 2 keys was pressed simultaneously,
++ * both keycodes will be in buffer.
++ * Last pressed key will be last non
++ * zero byte.
++ */
++ for (j=0; j<4; j++){
++ if (!kdb_usb_kbds[i].buffer[2+j])
++ break;
++ }
++ /* Last pressed key */
++ max = j + 1;
++
+ spec = kdb_usb_kbds[i].buffer[0];
+ keycode = kdb_usb_kbds[i].buffer[2];
+ kdb_usb_kbds[i].buffer[0] = (char)0;
+ kdb_usb_kbds[i].buffer[2] = (char)0;
+
- if(kdb_usb_kbds[i].buffer[3]) {
- kdb_usb_kbds[i].buffer[3] = (char)0;
- continue;
- }
++ ret_key = -1;
+
+ /* A normal key is pressed, decode it */
+ if(keycode)
+ keycode = kdb_usb_keycode[keycode];
+
+ /* 2 Keys pressed at one time ? */
+ if (spec && keycode) {
+ switch(spec)
+ {
+ case 0x2:
+ case 0x20: /* Shift */
- return shift_map[keycode];
++ ret_key = shift_map[keycode];
++ break;
+ case 0x1:
+ case 0x10: /* Ctrl */
- return ctrl_map[keycode];
++ ret_key = ctrl_map[keycode];
++ break;
+ case 0x4:
+ case 0x40: /* Alt */
+ break;
+ }
+ } else if (keycode) { /* If only one key pressed */
+ switch(keycode)
+ {
+ case 0x1C: /* Enter */
- return 13;
++ ret_key = 13;
++ break;
+
+ case 0x3A: /* Capslock */
+ kdb_usb_kbds[i].caps_lock = !(kdb_usb_kbds[i].caps_lock);
+ break;
+ case 0x0E: /* Backspace */
- return 8;
++ ret_key = 8;
++ break;
+ case 0x0F: /* TAB */
- return 9;
++ ret_key = 9;
++ break;
+ case 0x77: /* Pause */
+ break ;
+ default:
+ if(!kdb_usb_kbds[i].caps_lock) {
- return plain_map[keycode];
++ ret_key = plain_map[keycode];
+ }
+ else {
- return shift_map[keycode];
++ ret_key = shift_map[keycode];
+ }
+ }
+ }
++
++ if (ret_key != 1) {
++ /* Key was pressed, return keycode */
++
++ /* Clear buffer before urb resending */
++ if (kdb_usb_kbds[i].buffer)
++ for(j=0; j<8; j++)
++ kdb_usb_kbds[i].buffer[j] = (char)0;
++
++ /* USB Host Controller specific Urb complete callback.
++ * Currently only UHCI has this callback.
++ */
++ if (kdb_usb_kbds[i].kdb_hc_urb_complete)
++ (*kdb_usb_kbds[i].kdb_hc_urb_complete)((struct urb *)kdb_usb_kbds[i].urb);
++
++ return ret_key;
++ }
+ }
+
++
++
+ /* no chars were returned from any of the USB keyboards */
+
+ return -1;
+}
+#endif /* CONFIG_KDB_USB */
+
+/*
+ * This module contains code to read characters from the keyboard or a serial
+ * port.
+ *
+ * It is used by the kernel debugger, and is polled, not interrupt driven.
+ *
+ */
+
+#ifdef KDB_BLINK_LED
+/*
+ * send: Send a byte to the keyboard controller. Used primarily to
+ * alter LED settings.
+ */
+
+static void
+kdb_kbdsend(unsigned char byte)
+{
+ int timeout;
+ for (timeout = 200 * 1000; timeout && (inb(KBD_STATUS_REG) & KBD_STAT_IBF); timeout--);
+ outb(byte, KBD_DATA_REG);
+ udelay(40);
+ for (timeout = 200 * 1000; timeout && (~inb(KBD_STATUS_REG) & KBD_STAT_OBF); timeout--);
+ inb(KBD_DATA_REG);
+ udelay(40);
+}
+
+static void
+kdb_toggleled(int led)
+{
+ static int leds;
+
+ leds ^= led;
+
+ kdb_kbdsend(KBD_CMD_SET_LEDS);
+ kdb_kbdsend((unsigned char)leds);
+}
+#endif /* KDB_BLINK_LED */
+
+#if defined(CONFIG_SERIAL_8250_CONSOLE) || defined(CONFIG_SERIAL_CORE_CONSOLE)
+#define CONFIG_SERIAL_CONSOLE
+#endif
+
+#if defined(CONFIG_SERIAL_CONSOLE)
+
+struct kdb_serial kdb_serial;
+
+static unsigned int
+serial_inp(struct kdb_serial *kdb_serial, unsigned long offset)
+{
+ offset <<= kdb_serial->ioreg_shift;
+
+ switch (kdb_serial->io_type) {
+ case SERIAL_IO_MEM:
+ return readb((void __iomem *)(kdb_serial->iobase + offset));
+ break;
+ default:
+ return inb(kdb_serial->iobase + offset);
+ break;
+ }
+}
+
+/* Check if there is a byte ready at the serial port */
+static int get_serial_char(void)
+{
+ unsigned char ch;
+
+ if (kdb_serial.iobase == 0)
+ return -1;
+
+ if (serial_inp(&kdb_serial, UART_LSR) & UART_LSR_DR) {
+ ch = serial_inp(&kdb_serial, UART_RX);
+ if (ch == 0x7f)
+ ch = 8;
+ return ch;
+ }
+ return -1;
+}
+#endif /* CONFIG_SERIAL_CONSOLE */
+
+#ifdef CONFIG_VT_CONSOLE
+
+static int kbd_exists;
+
+/*
+ * Check if the keyboard controller has a keypress for us.
+ * Some parts (Enter Release, LED change) are still blocking polled here,
+ * but hopefully they are all short.
+ */
+static int get_kbd_char(void)
+{
+ int scancode, scanstatus;
+ static int shift_lock; /* CAPS LOCK state (0-off, 1-on) */
+ static int shift_key; /* Shift next keypress */
+ static int ctrl_key;
+ u_short keychar;
+ extern u_short plain_map[], shift_map[], ctrl_map[];
+
+ if (KDB_FLAG(NO_I8042) || KDB_FLAG(NO_VT_CONSOLE) ||
+ (inb(KBD_STATUS_REG) == 0xff && inb(KBD_DATA_REG) == 0xff)) {
+ kbd_exists = 0;
+ return -1;
+ }
+ kbd_exists = 1;
+
+ if ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
+ return -1;
+
+ /*
+ * Fetch the scancode
+ */
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+
+ /*
+ * Ignore mouse events.
+ */
+ if (scanstatus & KBD_STAT_MOUSE_OBF)
+ return -1;
+
+ /*
+ * Ignore release, trigger on make
+ * (except for shift keys, where we want to
+ * keep the shift state so long as the key is
+ * held down).
+ */
+
+ if (((scancode&0x7f) == 0x2a) || ((scancode&0x7f) == 0x36)) {
+ /*
+ * Next key may use shift table
+ */
+ if ((scancode & 0x80) == 0) {
+ shift_key=1;
+ } else {
+ shift_key=0;
+ }
+ return -1;
+ }
+
+ if ((scancode&0x7f) == 0x1d) {
+ /*
+ * Left ctrl key
+ */
+ if ((scancode & 0x80) == 0) {
+ ctrl_key = 1;
+ } else {
+ ctrl_key = 0;
+ }
+ return -1;
+ }
+
+ if ((scancode & 0x80) != 0)
+ return -1;
+
+ scancode &= 0x7f;
+
+ /*
+ * Translate scancode
+ */
+
+ if (scancode == 0x3a) {
+ /*
+ * Toggle caps lock
+ */
+ shift_lock ^= 1;
+
+#ifdef KDB_BLINK_LED
+ kdb_toggleled(0x4);
+#endif
+ return -1;
+ }
+
+ if (scancode == 0x0e) {
+ /*
+ * Backspace
+ */
+ return 8;
+ }
+
+ /* Special Key */
+ switch (scancode) {
+ case 0xF: /* Tab */
+ return 9;
+ case 0x53: /* Del */
+ return 4;
+ case 0x47: /* Home */
+ return 1;
+ case 0x4F: /* End */
+ return 5;
+ case 0x4B: /* Left */
+ return 2;
+ case 0x48: /* Up */
+ return 16;
+ case 0x50: /* Down */
+ return 14;
+ case 0x4D: /* Right */
+ return 6;
+ }
+
+ if (scancode == 0xe0) {
+ return -1;
+ }
+
+ /*
+ * For Japanese 86/106 keyboards
+ * See comment in drivers/char/pc_keyb.c.
+ * - Masahiro Adegawa
+ */
+ if (scancode == 0x73) {
+ scancode = 0x59;
+ } else if (scancode == 0x7d) {
+ scancode = 0x7c;
+ }
+
+ if (!shift_lock && !shift_key && !ctrl_key) {
+ keychar = plain_map[scancode];
+ } else if (shift_lock || shift_key) {
+ keychar = shift_map[scancode];
+ } else if (ctrl_key) {
+ keychar = ctrl_map[scancode];
+ } else {
+ keychar = 0x0020;
+ kdb_printf("Unknown state/scancode (%d)\n", scancode);
+ }
+ keychar &= 0x0fff;
+ if (keychar == '\t')
+ keychar = ' ';
+ switch (KTYP(keychar)) {
+ case KT_LETTER:
+ case KT_LATIN:
+ if (isprint(keychar))
+ break; /* printable characters */
+ /* drop through */
+ case KT_SPEC:
+ if (keychar == K_ENTER)
+ break;
+ /* drop through */
+ default:
+ return(-1); /* ignore unprintables */
+ }
+
+ if ((scancode & 0x7f) == 0x1c) {
+ /*
+ * enter key. All done. Absorb the release scancode.
+ */
+ while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
+ ;
+
+ /*
+ * Fetch the scancode
+ */
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+
+ while (scanstatus & KBD_STAT_MOUSE_OBF) {
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+ }
+
+ if (scancode != 0x9c) {
+ /*
+ * Wasn't an enter-release, why not?
+ */
+ kdb_printf("kdb: expected enter got 0x%x status 0x%x\n",
+ scancode, scanstatus);
+ }
+
+ kdb_printf("\n");
+ return 13;
+ }
+
+ return keychar & 0xff;
+}
+#endif /* CONFIG_VT_CONSOLE */
+
+#ifdef KDB_BLINK_LED
+
+/* Leave numlock alone, setting it messes up laptop keyboards with the keypad
+ * mapped over normal keys.
+ */
+static int kdba_blink_mask = 0x1 | 0x4;
+
+#define BOGOMIPS (boot_cpu_data.loops_per_jiffy/(500000/HZ))
+static int blink_led(void)
+{
+ static long delay;
+
+ if (kbd_exists == 0)
+ return -1;
+
+ if (--delay < 0) {
+ if (BOGOMIPS == 0) /* early kdb */
+ delay = 150000000/1000; /* arbitrary bogomips */
+ else
+ delay = 150000000/BOGOMIPS; /* Roughly 1 second when polling */
+ kdb_toggleled(kdba_blink_mask);
+ }
+ return -1;
+}
+#endif
+
+get_char_func poll_funcs[] = {
+#if defined(CONFIG_VT_CONSOLE)
+ get_kbd_char,
+#endif
+#if defined(CONFIG_SERIAL_CONSOLE)
+ get_serial_char,
+#endif
+#ifdef KDB_BLINK_LED
+ blink_led,
+#endif
+#ifdef CONFIG_KDB_USB
+ get_usb_char,
+#endif
+ NULL
+};
+
+/*
+ * On some Compaq Deskpro's, there is a keyboard freeze many times after
+ * exiting from the kdb. As kdb's keyboard handler is not interrupt-driven and
+ * uses a polled interface, it makes more sense to disable motherboard keyboard
+ * controller's OBF interrupts during kdb's polling.In case, of interrupts
+ * remaining enabled during kdb's polling, it may cause un-necessary
+ * interrupts being signalled during keypresses, which are also sometimes seen
+ * as spurious interrupts after exiting from kdb. This hack to disable OBF
+ * interrupts before entry to kdb and re-enabling them at kdb exit point also
+ * solves the keyboard freeze issue. These functions are called from
+ * kdb_local(), hence these are arch. specific setup and cleanup functions
+ * executing only on the local processor - ashishk@sco.com
+ */
+
+void kdba_local_arch_setup(void)
+{
+#ifdef CONFIG_VT_CONSOLE
+ int timeout;
+ unsigned char c;
+
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_command(KBD_CCMD_READ_MODE);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ for (timeout = 200 * 1000; timeout &&
+ (!(kbd_read_status() & KBD_STAT_OBF)); timeout--);
+ c = kbd_read_input();
+ c &= ~KBD_MODE_KBD_INT;
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_command(KBD_CCMD_WRITE_MODE);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_output(c);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ mdelay(1);
+#endif /* CONFIG_VT_CONSOLE */
+}
+
+void kdba_local_arch_cleanup(void)
+{
+#ifdef CONFIG_VT_CONSOLE
+ int timeout;
+ unsigned char c;
+
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_command(KBD_CCMD_READ_MODE);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ for (timeout = 200 * 1000; timeout &&
+ (!(kbd_read_status() & KBD_STAT_OBF)); timeout--);
+ c = kbd_read_input();
+ c |= KBD_MODE_KBD_INT;
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_command(KBD_CCMD_WRITE_MODE);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ kbd_write_output(c);
+ mdelay(1);
+ while (kbd_read_status() & KBD_STAT_IBF);
+ mdelay(1);
+#endif /* CONFIG_VT_CONSOLE */
+}
--- /dev/null
+/*
+ * Kernel Debugger Architecture Independent Support Functions
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/cpumask.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/desc.h>
- #include <asm/tlbflush.h>
+
+static kdb_machreg_t
+kdba_getcr(int regnum)
+{
+ kdb_machreg_t contents = 0;
+ switch(regnum) {
+ case 0:
+ __asm__ (_ASM_MOV " %%cr0,%0\n\t":"=r"(contents));
+ break;
+ case 1:
+ break;
+ case 2:
+ __asm__ (_ASM_MOV " %%cr2,%0\n\t":"=r"(contents));
+ break;
+ case 3:
+ __asm__ (_ASM_MOV " %%cr3,%0\n\t":"=r"(contents));
+ break;
+ case 4:
+ __asm__ (_ASM_MOV " %%cr4,%0\n\t":"=r"(contents));
+ break;
+ default:
+ break;
+ }
+
+ return contents;
+}
+
+void
+kdba_putdr(int regnum, kdb_machreg_t contents)
+{
+ switch(regnum) {
+ case 0:
+ __asm__ (_ASM_MOV " %0,%%db0\n\t"::"r"(contents));
+ break;
+ case 1:
+ __asm__ (_ASM_MOV " %0,%%db1\n\t"::"r"(contents));
+ break;
+ case 2:
+ __asm__ (_ASM_MOV " %0,%%db2\n\t"::"r"(contents));
+ break;
+ case 3:
+ __asm__ (_ASM_MOV " %0,%%db3\n\t"::"r"(contents));
+ break;
+ case 4:
+ case 5:
+ break;
+ case 6:
+ __asm__ (_ASM_MOV " %0,%%db6\n\t"::"r"(contents));
+ break;
+ case 7:
+ __asm__ (_ASM_MOV " %0,%%db7\n\t"::"r"(contents));
+ break;
+ default:
+ break;
+ }
+}
+
+kdb_machreg_t
+kdba_getdr(int regnum)
+{
+ kdb_machreg_t contents = 0;
+ switch(regnum) {
+ case 0:
+ __asm__ (_ASM_MOV " %%db0,%0\n\t":"=r"(contents));
+ break;
+ case 1:
+ __asm__ (_ASM_MOV " %%db1,%0\n\t":"=r"(contents));
+ break;
+ case 2:
+ __asm__ (_ASM_MOV " %%db2,%0\n\t":"=r"(contents));
+ break;
+ case 3:
+ __asm__ (_ASM_MOV " %%db3,%0\n\t":"=r"(contents));
+ break;
+ case 4:
+ case 5:
+ break;
+ case 6:
+ __asm__ (_ASM_MOV " %%db6,%0\n\t":"=r"(contents));
+ break;
+ case 7:
+ __asm__ (_ASM_MOV " %%db7,%0\n\t":"=r"(contents));
+ break;
+ default:
+ break;
+ }
+
+ return contents;
+}
+
+kdb_machreg_t
+kdba_getdr6(void)
+{
+ return kdba_getdr(6);
+}
+
+kdb_machreg_t
+kdba_getdr7(void)
+{
+ return kdba_getdr(7);
+}
+
+void
+kdba_putdr6(kdb_machreg_t contents)
+{
+ kdba_putdr(6, contents);
+}
+
+static void
+kdba_putdr7(kdb_machreg_t contents)
+{
+ kdba_putdr(7, contents);
+}
+
+void
+kdba_installdbreg(kdb_bp_t *bp)
+{
+ int cpu = smp_processor_id();
+
+ kdb_machreg_t dr7;
+
+ dr7 = kdba_getdr7();
+
+ kdba_putdr(bp->bp_hard[cpu]->bph_reg, bp->bp_addr);
+
+ dr7 |= DR7_GE;
+ if (cpu_has_de)
+ set_in_cr4(X86_CR4_DE);
+
+ switch (bp->bp_hard[cpu]->bph_reg){
+ case 0:
+ DR7_RW0SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ DR7_LEN0SET(dr7,bp->bp_hard[cpu]->bph_length);
+ DR7_G0SET(dr7);
+ break;
+ case 1:
+ DR7_RW1SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ DR7_LEN1SET(dr7,bp->bp_hard[cpu]->bph_length);
+ DR7_G1SET(dr7);
+ break;
+ case 2:
+ DR7_RW2SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ DR7_LEN2SET(dr7,bp->bp_hard[cpu]->bph_length);
+ DR7_G2SET(dr7);
+ break;
+ case 3:
+ DR7_RW3SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ DR7_LEN3SET(dr7,bp->bp_hard[cpu]->bph_length);
+ DR7_G3SET(dr7);
+ break;
+ default:
+ kdb_printf("kdb: Bad debug register!! %ld\n",
+ bp->bp_hard[cpu]->bph_reg);
+ break;
+ }
+
+ kdba_putdr7(dr7);
+ return;
+}
+
+void
+kdba_removedbreg(kdb_bp_t *bp)
+{
+ int regnum;
+ kdb_machreg_t dr7;
+ int cpu = smp_processor_id();
+
+ if (!bp->bp_hard[cpu])
+ return;
+
+ regnum = bp->bp_hard[cpu]->bph_reg;
+
+ dr7 = kdba_getdr7();
+
+ kdba_putdr(regnum, 0);
+
+ switch (regnum) {
+ case 0:
+ DR7_G0CLR(dr7);
+ DR7_L0CLR(dr7);
+ break;
+ case 1:
+ DR7_G1CLR(dr7);
+ DR7_L1CLR(dr7);
+ break;
+ case 2:
+ DR7_G2CLR(dr7);
+ DR7_L2CLR(dr7);
+ break;
+ case 3:
+ DR7_G3CLR(dr7);
+ DR7_L3CLR(dr7);
+ break;
+ default:
+ kdb_printf("kdb: Bad debug register!! %d\n", regnum);
+ break;
+ }
+
+ kdba_putdr7(dr7);
+}
+
+struct kdbregs {
+ char *reg_name;
+ size_t reg_offset;
+};
+
+static struct kdbregs dbreglist[] = {
+ { "dr0", 0 },
+ { "dr1", 1 },
+ { "dr2", 2 },
+ { "dr3", 3 },
+ { "dr6", 6 },
+ { "dr7", 7 },
+};
+
+static const int ndbreglist = sizeof(dbreglist) / sizeof(struct kdbregs);
+
+#ifdef CONFIG_X86_32
+static struct kdbregs kdbreglist[] = {
+ { "ax", offsetof(struct pt_regs, ax) },
+ { "bx", offsetof(struct pt_regs, bx) },
+ { "cx", offsetof(struct pt_regs, cx) },
+ { "dx", offsetof(struct pt_regs, dx) },
+
+ { "si", offsetof(struct pt_regs, si) },
+ { "di", offsetof(struct pt_regs, di) },
+ { "sp", offsetof(struct pt_regs, sp) },
+ { "ip", offsetof(struct pt_regs, ip) },
+
+ { "bp", offsetof(struct pt_regs, bp) },
+ { "ss", offsetof(struct pt_regs, ss) },
+ { "cs", offsetof(struct pt_regs, cs) },
+ { "flags", offsetof(struct pt_regs, flags) },
+
+ { "ds", offsetof(struct pt_regs, ds) },
+ { "es", offsetof(struct pt_regs, es) },
+ { "origax", offsetof(struct pt_regs, orig_ax) },
+
+};
+
+static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs);
+
+
+/*
+ * kdba_getregcontents
+ *
+ * Return the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * The following pseudo register names are supported:
+ * ®s - Prints address of exception frame
+ * kesp - Prints kernel stack pointer at time of fault
+ * cesp - Prints current kernel stack pointer, inside kdb
+ * ceflags - Prints current flags, inside kdb
+ * %<regname> - Uses the value of the registers at the
+ * last time the user process entered kernel
+ * mode, instead of the registers at the time
+ * kdb was entered.
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * Outputs:
+ * *contents Pointer to unsigned long to recieve register contents
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ * If kdb was entered via an interrupt from the kernel itself then
+ * ss and sp are *not* on the stack.
+ */
+
+int
+kdba_getregcontents(const char *regname,
+ struct pt_regs *regs,
+ kdb_machreg_t *contents)
+{
+ int i;
+
+ if (strcmp(regname, "cesp") == 0) {
+ asm volatile("movl %%esp,%0":"=m" (*contents));
+ return 0;
+ }
+
+ if (strcmp(regname, "ceflags") == 0) {
+ unsigned long flags;
+ local_save_flags(flags);
+ *contents = flags;
+ return 0;
+ }
+
+ if (regname[0] == '%') {
+ /* User registers: %%e[a-c]x, etc */
+ regname++;
+ regs = (struct pt_regs *)
+ (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ }
+
+ for (i=0; i<ndbreglist; i++) {
+ if (strnicmp(dbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < ndbreglist)
+ && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ *contents = kdba_getdr(dbreglist[i].reg_offset);
+ return 0;
+ }
+
+ if (!regs) {
+ kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ return KDB_BADREG;
+ }
+
+ if (strcmp(regname, "®s") == 0) {
+ *contents = (unsigned long)regs;
+ return 0;
+ }
+
+ if (strcmp(regname, "kesp") == 0) {
+ *contents = (unsigned long)regs + sizeof(struct pt_regs);
+ if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ /* sp and ss are not on stack */
+ *contents -= 2*4;
+ }
+ return 0;
+ }
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strnicmp(kdbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < nkdbreglist)
+ && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ /* No cpl switch, sp and ss are not on stack */
+ if (strcmp(kdbreglist[i].reg_name, "sp") == 0) {
+ *contents = (kdb_machreg_t)regs +
+ sizeof(struct pt_regs) - 2*4;
+ return(0);
+ }
+ if (strcmp(kdbreglist[i].reg_name, "xss") == 0) {
+ asm volatile(
+ "pushl %%ss\n"
+ "popl %0\n"
+ :"=m" (*contents));
+ return(0);
+ }
+ }
+ *contents = *(unsigned long *)((unsigned long)regs +
+ kdbreglist[i].reg_offset);
+ return(0);
+ }
+
+ return KDB_BADREG;
+}
+
+/*
+ * kdba_setregcontents
+ *
+ * Set the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * Supports modification of user-mode registers via
+ * %<register-name>
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * contents Unsigned long containing new register contents
+ * Outputs:
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdba_setregcontents(const char *regname,
+ struct pt_regs *regs,
+ unsigned long contents)
+{
+ int i;
+
+ if (regname[0] == '%') {
+ regname++;
+ regs = (struct pt_regs *)
+ (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ }
+
+ for (i=0; i<ndbreglist; i++) {
+ if (strnicmp(dbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < ndbreglist)
+ && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ kdba_putdr(dbreglist[i].reg_offset, contents);
+ return 0;
+ }
+
+ if (!regs) {
+ kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ return KDB_BADREG;
+ }
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strnicmp(kdbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < nkdbreglist)
+ && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ *(unsigned long *)((unsigned long)regs
+ + kdbreglist[i].reg_offset) = contents;
+ return 0;
+ }
+
+ return KDB_BADREG;
+}
+
+/*
+ * kdba_pt_regs
+ *
+ * Format a struct pt_regs
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * If no address is supplied, it uses the last irq pt_regs.
+ */
+
+static int
+kdba_pt_regs(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ int nextarg;
+ struct pt_regs *p;
+ static const char *fmt = " %-11.11s 0x%lx\n";
+
+ if (argc == 0) {
+ addr = (kdb_machreg_t) get_irq_regs();
+ } else if (argc == 1) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ p = (struct pt_regs *) addr;
+ kdb_printf("struct pt_regs 0x%p-0x%p\n", p, (unsigned char *)p + sizeof(*p) - 1);
+ kdb_print_nameval("bx", p->bx);
+ kdb_print_nameval("cx", p->cx);
+ kdb_print_nameval("dx", p->dx);
+ kdb_print_nameval("si", p->si);
+ kdb_print_nameval("di", p->di);
+ kdb_print_nameval("bp", p->bp);
+ kdb_print_nameval("ax", p->ax);
+ kdb_printf(fmt, "ds", p->ds);
+ kdb_printf(fmt, "es", p->es);
+ kdb_print_nameval("orig_ax", p->orig_ax);
+ kdb_print_nameval("ip", p->ip);
+ kdb_printf(fmt, "cs", p->cs);
+ kdb_printf(fmt, "flags", p->flags);
+ kdb_printf(fmt, "sp", p->sp);
+ kdb_printf(fmt, "ss", p->ss);
+ return 0;
+}
+
+#else /* CONFIG_X86_32 */
+
+static struct kdbregs kdbreglist[] = {
+ { "r15", offsetof(struct pt_regs, r15) },
+ { "r14", offsetof(struct pt_regs, r14) },
+ { "r13", offsetof(struct pt_regs, r13) },
+ { "r12", offsetof(struct pt_regs, r12) },
+ { "bp", offsetof(struct pt_regs, bp) },
+ { "bx", offsetof(struct pt_regs, bx) },
+ { "r11", offsetof(struct pt_regs, r11) },
+ { "r10", offsetof(struct pt_regs, r10) },
+ { "r9", offsetof(struct pt_regs, r9) },
+ { "r8", offsetof(struct pt_regs, r8) },
+ { "ax", offsetof(struct pt_regs, ax) },
+ { "cx", offsetof(struct pt_regs, cx) },
+ { "dx", offsetof(struct pt_regs, dx) },
+ { "si", offsetof(struct pt_regs, si) },
+ { "di", offsetof(struct pt_regs, di) },
+ { "orig_ax", offsetof(struct pt_regs, orig_ax) },
+ { "ip", offsetof(struct pt_regs, ip) },
+ { "cs", offsetof(struct pt_regs, cs) },
+ { "flags", offsetof(struct pt_regs, flags) },
+ { "sp", offsetof(struct pt_regs, sp) },
+ { "ss", offsetof(struct pt_regs, ss) },
+};
+
+static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs);
+
+
+/*
+ * kdba_getregcontents
+ *
+ * Return the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * The following pseudo register names are supported:
+ * ®s - Prints address of exception frame
+ * krsp - Prints kernel stack pointer at time of fault
+ * crsp - Prints current kernel stack pointer, inside kdb
+ * ceflags - Prints current flags, inside kdb
+ * %<regname> - Uses the value of the registers at the
+ * last time the user process entered kernel
+ * mode, instead of the registers at the time
+ * kdb was entered.
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * Outputs:
+ * *contents Pointer to unsigned long to recieve register contents
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ * If kdb was entered via an interrupt from the kernel itself then
+ * ss and sp are *not* on the stack.
+ */
+int
+kdba_getregcontents(const char *regname,
+ struct pt_regs *regs,
+ kdb_machreg_t *contents)
+{
+ int i;
+
+ if (strcmp(regname, "®s") == 0) {
+ *contents = (unsigned long)regs;
+ return 0;
+ }
+
+ if (strcmp(regname, "krsp") == 0) {
+ *contents = (unsigned long)regs + sizeof(struct pt_regs);
+ if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ /* sp and ss are not on stack */
+ *contents -= 2*4;
+ }
+ return 0;
+ }
+
+ if (strcmp(regname, "crsp") == 0) {
+ asm volatile("movq %%rsp,%0":"=m" (*contents));
+ return 0;
+ }
+
+ if (strcmp(regname, "ceflags") == 0) {
+ unsigned long flags;
+ local_save_flags(flags);
+ *contents = flags;
+ return 0;
+ }
+
+ if (regname[0] == '%') {
+ /* User registers: %%r[a-c]x, etc */
+ regname++;
+ regs = (struct pt_regs *)
+ (current->thread.sp0 - sizeof(struct pt_regs));
+ }
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strnicmp(kdbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < nkdbreglist)
+ && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ /* No cpl switch, sp is not on stack */
+ if (strcmp(kdbreglist[i].reg_name, "sp") == 0) {
+ *contents = (kdb_machreg_t)regs +
+ sizeof(struct pt_regs) - 2*8;
+ return(0);
+ }
+#if 0 /* FIXME */
+ if (strcmp(kdbreglist[i].reg_name, "ss") == 0) {
+ kdb_machreg_t r;
+
+ r = (kdb_machreg_t)regs +
+ sizeof(struct pt_regs) - 2*8;
+ *contents = (kdb_machreg_t)SS(r); /* XXX */
+ return(0);
+ }
+#endif
+ }
+ *contents = *(unsigned long *)((unsigned long)regs +
+ kdbreglist[i].reg_offset);
+ return(0);
+ }
+
+ for (i=0; i<ndbreglist; i++) {
+ if (strnicmp(dbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < ndbreglist)
+ && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ *contents = kdba_getdr(dbreglist[i].reg_offset);
+ return 0;
+ }
+ return KDB_BADREG;
+}
+
+/*
+ * kdba_setregcontents
+ *
+ * Set the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * Supports modification of user-mode registers via
+ * %<register-name>
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * contents Unsigned long containing new register contents
+ * Outputs:
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdba_setregcontents(const char *regname,
+ struct pt_regs *regs,
+ unsigned long contents)
+{
+ int i;
+
+ if (regname[0] == '%') {
+ regname++;
+ regs = (struct pt_regs *)
+ (current->thread.sp0 - sizeof(struct pt_regs));
+ }
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strnicmp(kdbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < nkdbreglist)
+ && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ *(unsigned long *)((unsigned long)regs
+ + kdbreglist[i].reg_offset) = contents;
+ return 0;
+ }
+
+ for (i=0; i<ndbreglist; i++) {
+ if (strnicmp(dbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i < ndbreglist)
+ && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ kdba_putdr(dbreglist[i].reg_offset, contents);
+ return 0;
+ }
+
+ return KDB_BADREG;
+}
+
+/*
+ * kdba_pt_regs
+ *
+ * Format a struct pt_regs
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * If no address is supplied, it uses the last irq pt_regs.
+ */
+
+static int
+kdba_pt_regs(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ int nextarg;
+ struct pt_regs *p;
+ static const char *fmt = " %-11.11s 0x%lx\n";
+ static int first_time = 1;
+
+ if (argc == 0) {
+ addr = (kdb_machreg_t) get_irq_regs();
+ } else if (argc == 1) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ p = (struct pt_regs *) addr;
+ if (first_time) {
+ first_time = 0;
+ kdb_printf("\n+++ Warning: x86_64 pt_regs are not always "
+ "completely defined, r15-bx may be invalid\n\n");
+ }
+ kdb_printf("struct pt_regs 0x%p-0x%p\n", p, (unsigned char *)p + sizeof(*p) - 1);
+ kdb_print_nameval("r15", p->r15);
+ kdb_print_nameval("r14", p->r14);
+ kdb_print_nameval("r13", p->r13);
+ kdb_print_nameval("r12", p->r12);
+ kdb_print_nameval("bp", p->bp);
+ kdb_print_nameval("bx", p->bx);
+ kdb_print_nameval("r11", p->r11);
+ kdb_print_nameval("r10", p->r10);
+ kdb_print_nameval("r9", p->r9);
+ kdb_print_nameval("r8", p->r8);
+ kdb_print_nameval("ax", p->ax);
+ kdb_print_nameval("cx", p->cx);
+ kdb_print_nameval("dx", p->dx);
+ kdb_print_nameval("si", p->si);
+ kdb_print_nameval("di", p->di);
+ kdb_print_nameval("orig_ax", p->orig_ax);
+ kdb_print_nameval("ip", p->ip);
+ kdb_printf(fmt, "cs", p->cs);
+ kdb_printf(fmt, "flags", p->flags);
+ kdb_printf(fmt, "sp", p->sp);
+ kdb_printf(fmt, "ss", p->ss);
+ return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * kdba_dumpregs
+ *
+ * Dump the specified register set to the display.
+ *
+ * Parameters:
+ * regs Pointer to structure containing registers.
+ * type Character string identifying register set to dump
+ * extra string further identifying register (optional)
+ * Outputs:
+ * Returns:
+ * 0 Success
+ * Locking:
+ * None.
+ * Remarks:
+ * This function will dump the general register set if the type
+ * argument is NULL (struct pt_regs). The alternate register
+ * set types supported by this function:
+ *
+ * d Debug registers
+ * c Control registers
+ * u User registers at most recent entry to kernel
+ * for the process currently selected with "pid" command.
+ * Following not yet implemented:
+ * r Memory Type Range Registers (extra defines register)
+ *
+ * MSR on i386/x86_64 are handled by rdmsr/wrmsr commands.
+ */
+
+int
+kdba_dumpregs(struct pt_regs *regs,
+ const char *type,
+ const char *extra)
+{
+ int i;
+ int count = 0;
+
+ if (type
+ && (type[0] == 'u')) {
+ type = NULL;
+ regs = (struct pt_regs *)
+ (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ }
+
+ if (type == NULL) {
+ struct kdbregs *rlp;
+ kdb_machreg_t contents;
+
+ if (!regs) {
+ kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ return KDB_BADREG;
+ }
+
+#ifdef CONFIG_X86_32
+ for (i=0, rlp=kdbreglist; i<nkdbreglist; i++,rlp++) {
+ kdb_printf("%s = ", rlp->reg_name);
+ kdba_getregcontents(rlp->reg_name, regs, &contents);
+ kdb_printf("0x%08lx ", contents);
+ if ((++count % 4) == 0)
+ kdb_printf("\n");
+ }
+#else
+ for (i=0, rlp=kdbreglist; i<nkdbreglist; i++,rlp++) {
+ kdb_printf("%8s = ", rlp->reg_name);
+ kdba_getregcontents(rlp->reg_name, regs, &contents);
+ kdb_printf("0x%016lx ", contents);
+ if ((++count % 2) == 0)
+ kdb_printf("\n");
+ }
+#endif
+
+ kdb_printf("®s = 0x%p\n", regs);
+
+ return 0;
+ }
+
+ switch (type[0]) {
+ case 'd':
+ {
+ unsigned long dr[8];
+
+ for(i=0; i<8; i++) {
+ if ((i == 4) || (i == 5)) continue;
+ dr[i] = kdba_getdr(i);
+ }
+ kdb_printf("dr0 = 0x%08lx dr1 = 0x%08lx dr2 = 0x%08lx dr3 = 0x%08lx\n",
+ dr[0], dr[1], dr[2], dr[3]);
+ kdb_printf("dr6 = 0x%08lx dr7 = 0x%08lx\n",
+ dr[6], dr[7]);
+ return 0;
+ }
+ case 'c':
+ {
+ unsigned long cr[5];
+
+ for (i=0; i<5; i++) {
+ cr[i] = kdba_getcr(i);
+ }
+ kdb_printf("cr0 = 0x%08lx cr1 = 0x%08lx cr2 = 0x%08lx cr3 = 0x%08lx\ncr4 = 0x%08lx\n",
+ cr[0], cr[1], cr[2], cr[3], cr[4]);
+ return 0;
+ }
+ case 'r':
+ break;
+ default:
+ return KDB_BADREG;
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+EXPORT_SYMBOL(kdba_dumpregs);
+
+kdb_machreg_t
+kdba_getpc(struct pt_regs *regs)
+{
+ return regs ? regs->ip : 0;
+}
+
+int
+kdba_setpc(struct pt_regs *regs, kdb_machreg_t newpc)
+{
+ if (KDB_NULL_REGS(regs))
+ return KDB_BADREG;
+ regs->ip = newpc;
+ KDB_STATE_SET(IP_ADJUSTED);
+ return 0;
+}
+
+/*
+ * kdba_main_loop
+ *
+ * Do any architecture specific set up before entering the main kdb loop.
+ * The primary function of this routine is to make all processes look the
+ * same to kdb, kdb must be able to list a process without worrying if the
+ * process is running or blocked, so make all process look as though they
+ * are blocked.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * error2 kdb's current reason code. Initially error but can change
+ * acording to kdb state.
+ * db_result Result from break or debug point.
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it should
+ * always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * Outputs:
+ * Sets ip and sp in current->thread.
+ * Locking:
+ * None.
+ * Remarks:
+ * none.
+ */
+
+int
+kdba_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ kdb_dbtrap_t db_result, struct pt_regs *regs)
+{
+ int ret;
+
+#ifdef CONFIG_X86_64
+ if (regs)
+ kdba_getregcontents("sp", regs, &(current->thread.sp));
+#endif
+ ret = kdb_save_running(regs, reason, reason2, error, db_result);
+ kdb_unsave_running(regs);
+ return ret;
+}
+
+void
+kdba_disableint(kdb_intstate_t *state)
+{
+ unsigned long *fp = (unsigned long *)state;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ *fp = flags;
+}
+
+void
+kdba_restoreint(kdb_intstate_t *state)
+{
+ unsigned long flags = *(unsigned long *)state;
+ local_irq_restore(flags);
+}
+
+void
+kdba_setsinglestep(struct pt_regs *regs)
+{
+ if (KDB_NULL_REGS(regs))
+ return;
+ if (regs->flags & X86_EFLAGS_IF)
+ KDB_STATE_SET(A_IF);
+ else
+ KDB_STATE_CLEAR(A_IF);
+ regs->flags = (regs->flags | X86_EFLAGS_TF) & ~X86_EFLAGS_IF;
+}
+
+void
+kdba_clearsinglestep(struct pt_regs *regs)
+{
+ if (KDB_NULL_REGS(regs))
+ return;
+ if (KDB_STATE(A_IF))
+ regs->flags |= X86_EFLAGS_IF;
+ else
+ regs->flags &= ~X86_EFLAGS_IF;
+}
+
+#ifdef CONFIG_X86_32
+int asmlinkage
+kdba_setjmp(kdb_jmp_buf *jb)
+{
+#ifdef CONFIG_FRAME_POINTER
+ __asm__ ("movl 8(%esp), %eax\n\t"
+ "movl %ebx, 0(%eax)\n\t"
+ "movl %esi, 4(%eax)\n\t"
+ "movl %edi, 8(%eax)\n\t"
+ "movl (%esp), %ecx\n\t"
+ "movl %ecx, 12(%eax)\n\t"
+ "leal 8(%esp), %ecx\n\t"
+ "movl %ecx, 16(%eax)\n\t"
+ "movl 4(%esp), %ecx\n\t"
+ "movl %ecx, 20(%eax)\n\t");
+#else /* CONFIG_FRAME_POINTER */
+ __asm__ ("movl 4(%esp), %eax\n\t"
+ "movl %ebx, 0(%eax)\n\t"
+ "movl %esi, 4(%eax)\n\t"
+ "movl %edi, 8(%eax)\n\t"
+ "movl %ebp, 12(%eax)\n\t"
+ "leal 4(%esp), %ecx\n\t"
+ "movl %ecx, 16(%eax)\n\t"
+ "movl 0(%esp), %ecx\n\t"
+ "movl %ecx, 20(%eax)\n\t");
+#endif /* CONFIG_FRAME_POINTER */
+ return 0;
+}
+
+void asmlinkage
+kdba_longjmp(kdb_jmp_buf *jb, int reason)
+{
+#ifdef CONFIG_FRAME_POINTER
+ __asm__("movl 8(%esp), %ecx\n\t"
+ "movl 12(%esp), %eax\n\t"
+ "movl 20(%ecx), %edx\n\t"
+ "movl 0(%ecx), %ebx\n\t"
+ "movl 4(%ecx), %esi\n\t"
+ "movl 8(%ecx), %edi\n\t"
+ "movl 12(%ecx), %ebp\n\t"
+ "movl 16(%ecx), %esp\n\t"
+ "jmp *%edx\n");
+#else /* CONFIG_FRAME_POINTER */
+ __asm__("movl 4(%esp), %ecx\n\t"
+ "movl 8(%esp), %eax\n\t"
+ "movl 20(%ecx), %edx\n\t"
+ "movl 0(%ecx), %ebx\n\t"
+ "movl 4(%ecx), %esi\n\t"
+ "movl 8(%ecx), %edi\n\t"
+ "movl 12(%ecx), %ebp\n\t"
+ "movl 16(%ecx), %esp\n\t"
+ "jmp *%edx\n");
+#endif /* CONFIG_FRAME_POINTER */
+}
+
+#else /* CONFIG_X86_32 */
+
+int asmlinkage
+kdba_setjmp(kdb_jmp_buf *jb)
+{
+#ifdef CONFIG_FRAME_POINTER
+ __asm__ __volatile__
+ ("movq %%rbx, (0*8)(%%rdi);"
+ "movq %%rcx, (1*8)(%%rdi);"
+ "movq %%r12, (2*8)(%%rdi);"
+ "movq %%r13, (3*8)(%%rdi);"
+ "movq %%r14, (4*8)(%%rdi);"
+ "movq %%r15, (5*8)(%%rdi);"
+ "leaq 16(%%rsp), %%rdx;"
+ "movq %%rdx, (6*8)(%%rdi);"
+ "movq %%rax, (7*8)(%%rdi)"
+ :
+ : "a" (__builtin_return_address(0)),
+ "c" (__builtin_frame_address(1))
+ );
+#else /* !CONFIG_FRAME_POINTER */
+ __asm__ __volatile__
+ ("movq %%rbx, (0*8)(%%rdi);"
+ "movq %%rbp, (1*8)(%%rdi);"
+ "movq %%r12, (2*8)(%%rdi);"
+ "movq %%r13, (3*8)(%%rdi);"
+ "movq %%r14, (4*8)(%%rdi);"
+ "movq %%r15, (5*8)(%%rdi);"
+ "leaq 8(%%rsp), %%rdx;"
+ "movq %%rdx, (6*8)(%%rdi);"
+ "movq %%rax, (7*8)(%%rdi)"
+ :
+ : "a" (__builtin_return_address(0))
+ );
+#endif /* CONFIG_FRAME_POINTER */
+ return 0;
+}
+
+void asmlinkage
+kdba_longjmp(kdb_jmp_buf *jb, int reason)
+{
+ __asm__("movq (0*8)(%rdi),%rbx;"
+ "movq (1*8)(%rdi),%rbp;"
+ "movq (2*8)(%rdi),%r12;"
+ "movq (3*8)(%rdi),%r13;"
+ "movq (4*8)(%rdi),%r14;"
+ "movq (5*8)(%rdi),%r15;"
+ "movq (7*8)(%rdi),%rdx;"
+ "movq (6*8)(%rdi),%rsp;"
+ "mov %rsi, %rax;"
+ "jmpq *%rdx");
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+/*
+ * kdba_stackdepth
+ *
+ * Print processes that are using more than a specific percentage of their
+ * stack.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * If no percentage is supplied, it uses 60.
+ */
+
+static void
+kdba_stackdepth1(struct task_struct *p, unsigned long sp)
+{
+ struct thread_info *tinfo;
+ int used;
+ const char *type;
+ kdb_ps1(p);
+ do {
+ tinfo = (struct thread_info *)(sp & -THREAD_SIZE);
+ used = sizeof(*tinfo) + THREAD_SIZE - (sp & (THREAD_SIZE-1));
+ type = NULL;
+ if (kdb_task_has_cpu(p)) {
+ struct kdb_activation_record ar;
+ memset(&ar, 0, sizeof(ar));
+ kdba_get_stack_info_alternate(sp, -1, &ar);
+ type = ar.stack.id;
+ }
+ if (!type)
+ type = "process";
+ kdb_printf(" %s stack %p sp %lx used %d\n", type, tinfo, sp, used);
+ sp = tinfo->previous_esp;
+ } while (sp);
+}
+
+static int
+kdba_stackdepth(int argc, const char **argv)
+{
+ int diag, cpu, threshold, used, over;
+ unsigned long percentage;
+ unsigned long esp;
+ long offset = 0;
+ int nextarg;
+ struct task_struct *p, *g;
+ struct kdb_running_process *krp;
+ struct thread_info *tinfo;
+
+ if (argc == 0) {
+ percentage = 60;
+ } else if (argc == 1) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &percentage, &offset, NULL);
+ if (diag)
+ return diag;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+ percentage = max_t(int, percentage, 1);
+ percentage = min_t(int, percentage, 100);
+ threshold = ((2 * THREAD_SIZE * percentage) / 100 + 1) >> 1;
+ kdb_printf("stackdepth: processes using more than %ld%% (%d bytes) of stack\n",
+ percentage, threshold);
+
+ /* Run the active tasks first, they can have multiple stacks */
+ for (cpu = 0, krp = kdb_running_process; cpu < NR_CPUS; ++cpu, ++krp) {
+ if (!cpu_online(cpu))
+ continue;
+ p = krp->p;
+ esp = krp->arch.sp;
+ over = 0;
+ do {
+ tinfo = (struct thread_info *)(esp & -THREAD_SIZE);
+ used = sizeof(*tinfo) + THREAD_SIZE - (esp & (THREAD_SIZE-1));
+ if (used >= threshold)
+ over = 1;
+ esp = tinfo->previous_esp;
+ } while (esp);
+ if (over)
+ kdba_stackdepth1(p, krp->arch.sp);
+ }
+ /* Now the tasks that are not on cpus */
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_has_cpu(p))
+ continue;
+ esp = p->thread.sp;
+ used = sizeof(*tinfo) + THREAD_SIZE - (esp & (THREAD_SIZE-1));
+ over = used >= threshold;
+ if (over)
+ kdba_stackdepth1(p, esp);
+ } kdb_while_each_thread(g, p);
+
+ return 0;
+}
+#else /* CONFIG_X86_32 */
+
- /*
- * kdba_cpu_pda
- *
- * Format a struct cpu_pda
- *
- * Inputs:
- * argc argument count
- * argv argument vector
- * Outputs:
- * None.
- * Returns:
- * zero for success, a kdb diagnostic if error
- * Locking:
- * none.
- * Remarks:
- * If no cpu is supplied, it prints the current cpu. If the cpu is '*'
- * then it prints all cpus.
- */
-
- static int
- kdba_cpu_data(int argc, const char **argv)
- {
- int diag, nextarg, all_cpus = 0;
- long offset = 0;
- unsigned long cpu;
- static const char *fmtl = " %-17.17s 0x%lx\n";
- static const char *fmtd = " %-17.17s %d\n";
- static const char *fmtp = " %-17.17s 0x%p\n";
-
- if (argc == 0) {
- cpu = smp_processor_id();
- } else if (argc == 1) {
- if (strcmp(argv[1], "*") == 0) {
- all_cpus = 1;
- cpu = 0;
- } else {
- nextarg = 1;
- diag = kdbgetaddrarg(argc, argv, &nextarg, &cpu, &offset, NULL);
- if (diag)
- return diag;
- }
- } else {
- return KDB_ARGCOUNT;
- }
-
- for (; cpu < NR_CPUS; ++cpu) {
- if (cpu_online(cpu)) {
- irq_cpustat_t *irq_stats = &per_cpu(irq_stat, cpu);
- kdb_printf(fmtp, "current_task", per_cpu(current_task, cpu));
- kdb_printf(fmtl, "offset", per_cpu(this_cpu_off, cpu));
- kdb_printf(fmtl, "kernel_stack", per_cpu(kernel_stack, cpu));
- kdb_printf(fmtl, "old_rsp", per_cpu(old_rsp, cpu));
- kdb_printf(fmtd, "irq_count", per_cpu(irq_count, cpu));
- kdb_printf(fmtd, "cpu_number", per_cpu(cpu_number, cpu));
- kdb_printf(fmtp, "irq_stack_ptr", per_cpu(irq_stack_ptr, cpu));
- kdb_printf(fmtp, "node_number", cpu_to_node(cpu));
- kdb_printf(fmtd, "__softirq_pending", irq_stats->__softirq_pending);
- kdb_printf(fmtd, "__nmi_count", irq_stats->__nmi_count);
- kdb_printf(fmtd, "mmu_state", per_cpu(cpu_tlbstate.state, cpu));
- kdb_printf(fmtp, "active_mm", per_cpu(cpu_tlbstate.active_mm, cpu));
- kdb_printf(fmtd, "apic_timer_irqs", irq_stats->apic_timer_irqs);
- }
- if (!all_cpus)
- break;
- }
- return 0;
- }
+
+/*
+ * kdba_entry
+ *
+ * This is the interface routine between
+ * the notifier die_chain and kdb
+ */
+static int kdba_entry( struct notifier_block *b, unsigned long val, void *v)
+{
+ struct die_args *args = v;
+ int err, trap, ret = 0;
+ struct pt_regs *regs;
+
+ regs = args->regs;
+ err = args->err;
+ trap = args->trapnr;
+ switch (val){
+#ifdef CONFIG_SMP
+ case DIE_NMI_IPI:
+ ret = kdb_ipi(regs, NULL);
+ break;
+#endif /* CONFIG_SMP */
+ case DIE_OOPS:
+ ret = kdb(KDB_REASON_OOPS, err, regs);
+ break;
+ case DIE_CALL:
+ ret = kdb(KDB_REASON_ENTER, err, regs);
+ break;
+ case DIE_DEBUG:
+ ret = kdb(KDB_REASON_DEBUG, err, regs);
+ break;
+ case DIE_NMIWATCHDOG:
+ ret = kdb(KDB_REASON_NMI, err, regs);
+ break;
+ case DIE_INT3:
+ ret = kdb(KDB_REASON_BREAK, err, regs);
+ // falls thru
+ default:
+ break;
+ }
+ return (ret ? NOTIFY_STOP : NOTIFY_DONE);
+}
+
+/*
+ * notifier block for kdb entry
+ */
+static struct notifier_block kdba_notifier = {
+ .notifier_call = kdba_entry
+};
+#endif /* CONFIG_X86_32 */
+
+asmlinkage int kdb_call(void);
+
+/* Executed once on each cpu at startup. */
+void
+kdba_cpu_up(void)
+{
+}
+
+static int __init
+kdba_arch_init(void)
+{
+ set_intr_gate(KDBENTER_VECTOR, kdb_call);
+ return 0;
+}
+
+arch_initcall(kdba_arch_init);
+
+/*
+ * kdba_init
+ *
+ * Architecture specific initialization.
+ *
+ * Parameters:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ * None.
+ */
+
+void __init
+kdba_init(void)
+{
+ kdba_arch_init(); /* Need to register KDBENTER_VECTOR early */
+ kdb_register("pt_regs", kdba_pt_regs, "address", "Format struct pt_regs", 0);
+#ifdef CONFIG_X86_32
+ kdb_register("stackdepth", kdba_stackdepth, "[percentage]", "Print processes using >= stack percentage", 0);
+#else
- kdb_register("cpu_data", kdba_cpu_data, "<cpu>", "Format per-process data", 0);
+ register_die_notifier(&kdba_notifier);
+#endif
+ return;
+}
+
+/*
+ * kdba_adjust_ip
+ *
+ * Architecture specific adjustment of instruction pointer before leaving
+ * kdb.
+ *
+ * Parameters:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it should
+ * always be valid.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ * noop on ix86.
+ */
+
+void
+kdba_adjust_ip(kdb_reason_t reason, int error, struct pt_regs *regs)
+{
+ return;
+}
+
+void
+kdba_set_current_task(const struct task_struct *p)
+{
+ kdb_current_task = p;
+ if (kdb_task_has_cpu(p)) {
+ struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
+ kdb_current_regs = krp->regs;
+ return;
+ }
+ kdb_current_regs = NULL;
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * asm-i386 uaccess.h supplies __copy_to_user which relies on MMU to
+ * trap invalid addresses in the _xxx fields. Verify the other address
+ * of the pair is valid by accessing the first and last byte ourselves,
+ * then any access violations should only be caused by the _xxx
+ * addresses,
+ */
+
+int
+kdba_putarea_size(unsigned long to_xxx, void *from, size_t size)
+{
+ mm_segment_t oldfs = get_fs();
+ int r;
+ char c;
+ c = *((volatile char *)from);
+ c = *((volatile char *)from + size - 1);
+
+ if (to_xxx < PAGE_OFFSET) {
+ return kdb_putuserarea_size(to_xxx, from, size);
+ }
+
+ set_fs(KERNEL_DS);
+ r = __copy_to_user_inatomic((void __user *)to_xxx, from, size);
+ set_fs(oldfs);
+ return r;
+}
+
+int
+kdba_getarea_size(void *to, unsigned long from_xxx, size_t size)
+{
+ mm_segment_t oldfs = get_fs();
+ int r;
+ *((volatile char *)to) = '\0';
+ *((volatile char *)to + size - 1) = '\0';
+
+ if (from_xxx < PAGE_OFFSET) {
+ return kdb_getuserarea_size(to, from_xxx, size);
+ }
+
+ set_fs(KERNEL_DS);
+ switch (size) {
+ case 1:
+ r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 1);
+ break;
+ case 2:
+ r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 2);
+ break;
+ case 4:
+ r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 4);
+ break;
+ case 8:
+ r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 8);
+ break;
+ default:
+ r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, size);
+ break;
+ }
+ set_fs(oldfs);
+ return r;
+}
+
+int
+kdba_verify_rw(unsigned long addr, size_t size)
+{
+ unsigned char data[size];
+ return(kdba_getarea_size(data, addr, size) || kdba_putarea_size(addr, data, size));
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_SMP
+
+#include <asm/ipi.h>
+
+gate_desc save_idt[NR_VECTORS];
+
+void kdba_takeover_vector(int vector)
+{
+ memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
+ set_intr_gate(KDB_VECTOR, kdb_interrupt);
+ return;
+}
+
+void kdba_giveback_vector(int vector)
+{
+ native_write_idt_entry(idt_table, vector, &save_idt[vector]);
+ return;
+}
+
+/* When first entering KDB, try a normal IPI. That reduces backtrace problems
+ * on the other cpus.
+ */
+void
+smp_kdb_stop(void)
+{
+ if (!KDB_FLAG(NOIPI)) {
+ kdba_takeover_vector(KDB_VECTOR);
+ apic->send_IPI_allbutself(KDB_VECTOR);
+ }
+}
+
+/* The normal KDB IPI handler */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void
+smp_kdb_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ ack_APIC_irq();
+ irq_enter();
+ kdb_ipi(regs, NULL);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+/* Invoked once from kdb_wait_for_cpus when waiting for cpus. For those cpus
+ * that have not responded to the normal KDB interrupt yet, hit them with an
+ * NMI event.
+ */
+void
+kdba_wait_for_cpus(void)
+{
+ int c;
+ if (KDB_FLAG(CATASTROPHIC))
+ return;
+ kdb_printf(" Sending NMI to non-responding cpus: ");
+ for_each_online_cpu(c) {
+ if (kdb_running_process[c].seqno < kdb_seqno - 1) {
+ kdb_printf(" %d", c);
+ apic->send_IPI_mask(cpumask_of(c), NMI_VECTOR);
+ }
+ }
+ kdb_printf(".\n");
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KDB_KDUMP
+void kdba_kdump_prepare(struct pt_regs *regs)
+{
+ int i;
+ struct pt_regs r;
+ if (regs == NULL)
+ regs = &r;
+
+ for (i = 1; i < NR_CPUS; ++i) {
+ if (!cpu_online(i))
+ continue;
+
+ KDB_STATE_SET_CPU(KEXEC, i);
+ }
+
+ machine_crash_shutdown(regs);
+}
+
+extern void halt_current_cpu(struct pt_regs *);
+
+void kdba_kdump_shutdown_slave(struct pt_regs *regs)
+{
- #ifndef CONFIG_PARAVIRT_XEN
++#ifndef CONFIG_XEN
+ halt_current_cpu(regs);
+#endif /* CONFIG_XEN */
+}
+
+#endif /* CONFIG_KDB_KDUMP */
--- /dev/null
+/* Print i386 instructions for GDB, the GNU debugger.
+ Copyright 1988, 1989, 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Extracted from binutils 2.16.91.0.2 (OpenSUSE 10.0) and modified for kdb use.
+ * Run through col -b to remove trailing whitespace and various #ifdef/ifndef
+ * __KERNEL__ added.
+ * Keith Owens <kaos@sgi.com> 15 May 2006
+ */
+
+/* 80386 instruction printer by Pace Willisson (pace@prep.ai.mit.edu)
+ July 1988
+ modified by John Hassey (hassey@dg-rtp.dg.com)
+ x86-64 support added by Jan Hubicka (jh@suse.cz)
+ VIA PadLock support by Michal Ludvig (mludvig@suse.cz). */
+
+/* The main tables describing the instructions is essentially a copy
+ of the "Opcode Map" chapter (Appendix A) of the Intel 80386
+ Programmers Manual. Usually, there is a capital letter, followed
+ by a small letter. The capital letter tell the addressing mode,
+ and the small letter tells about the operand size. Refer to
+ the Intel manual for details. */
+
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/dis-asm.h>
+#include <linux/kdb.h>
+#define abort() BUG()
+#else /* __KERNEL__ */
+#include "dis-asm.h"
+#include "sysdep.h"
+#include "opintl.h"
+#endif /* __KERNEL__ */
+
+#define MAXLEN 20
+
+#ifndef __KERNEL__
+#include <setjmp.h>
+#endif /* __KERNEL__ */
+
+#ifndef UNIXWARE_COMPAT
+/* Set non-zero for broken, compatible instructions. Set to zero for
+ non-broken opcodes. */
+#define UNIXWARE_COMPAT 1
+#endif
+
+static int fetch_data (struct disassemble_info *, bfd_byte *);
+static void ckprefix (void);
+static const char *prefix_name (int, int);
+static int print_insn (bfd_vma, disassemble_info *);
+static void dofloat (int);
+static void OP_ST (int, int);
+static void OP_STi (int, int);
+static int putop (const char *, int);
+static void oappend (const char *);
+static void append_seg (void);
+static void OP_indirE (int, int);
+static void print_operand_value (char *, int, bfd_vma);
+static void OP_E (int, int);
+static void OP_G (int, int);
+static bfd_vma get64 (void);
+static bfd_signed_vma get32 (void);
+static bfd_signed_vma get32s (void);
+static int get16 (void);
+static void set_op (bfd_vma, int);
+static void OP_REG (int, int);
+static void OP_IMREG (int, int);
+static void OP_I (int, int);
+static void OP_I64 (int, int);
+static void OP_sI (int, int);
+static void OP_J (int, int);
+static void OP_SEG (int, int);
+static void OP_DIR (int, int);
+static void OP_OFF (int, int);
+static void OP_OFF64 (int, int);
+static void ptr_reg (int, int);
+static void OP_ESreg (int, int);
+static void OP_DSreg (int, int);
+static void OP_C (int, int);
+static void OP_D (int, int);
+static void OP_T (int, int);
+static void OP_Rd (int, int);
+static void OP_MMX (int, int);
+static void OP_XMM (int, int);
+static void OP_EM (int, int);
+static void OP_EX (int, int);
+static void OP_MS (int, int);
+static void OP_XS (int, int);
+static void OP_M (int, int);
+static void OP_VMX (int, int);
+static void OP_0fae (int, int);
+static void OP_0f07 (int, int);
+static void NOP_Fixup (int, int);
+static void OP_3DNowSuffix (int, int);
+static void OP_SIMD_Suffix (int, int);
+static void SIMD_Fixup (int, int);
+static void PNI_Fixup (int, int);
+static void SVME_Fixup (int, int);
+static void INVLPG_Fixup (int, int);
+static void BadOp (void);
+static void SEG_Fixup (int, int);
+static void VMX_Fixup (int, int);
+
+struct dis_private {
+ /* Points to first byte not fetched. */
+ bfd_byte *max_fetched;
+ bfd_byte the_buffer[MAXLEN];
+ bfd_vma insn_start;
+ int orig_sizeflag;
+#ifndef __KERNEL__
+ jmp_buf bailout;
+#endif /* __KERNEL__ */
+};
+
+/* The opcode for the fwait instruction, which we treat as a prefix
+ when we can. */
+#define FWAIT_OPCODE (0x9b)
+
+/* Set to 1 for 64bit mode disassembly. */
+static int mode_64bit;
+
+/* Flags for the prefixes for the current instruction. See below. */
+static int prefixes;
+
+/* REX prefix the current instruction. See below. */
+static int rex;
+/* Bits of REX we've already used. */
+static int rex_used;
+#define REX_MODE64 8
+#define REX_EXTX 4
+#define REX_EXTY 2
+#define REX_EXTZ 1
+/* Mark parts used in the REX prefix. When we are testing for
+ empty prefix (for 8bit register REX extension), just mask it
+ out. Otherwise test for REX bit is excuse for existence of REX
+ only in case value is nonzero. */
+#define USED_REX(value) \
+ { \
+ if (value) \
+ rex_used |= (rex & value) ? (value) | 0x40 : 0; \
+ else \
+ rex_used |= 0x40; \
+ }
+
+/* Flags for prefixes which we somehow handled when printing the
+ current instruction. */
+static int used_prefixes;
+
+/* Flags stored in PREFIXES. */
+#define PREFIX_REPZ 1
+#define PREFIX_REPNZ 2
+#define PREFIX_LOCK 4
+#define PREFIX_CS 8
+#define PREFIX_SS 0x10
+#define PREFIX_DS 0x20
+#define PREFIX_ES 0x40
+#define PREFIX_FS 0x80
+#define PREFIX_GS 0x100
+#define PREFIX_DATA 0x200
+#define PREFIX_ADDR 0x400
+#define PREFIX_FWAIT 0x800
+
+/* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
+ to ADDR (exclusive) are valid. Returns 1 for success, longjmps
+ on error. */
+#define FETCH_DATA(info, addr) \
+ ((addr) <= ((struct dis_private *) (info->private_data))->max_fetched \
+ ? 1 : fetch_data ((info), (addr)))
+
+static int
+fetch_data (struct disassemble_info *info, bfd_byte *addr)
+{
+ int status;
+ struct dis_private *priv = (struct dis_private *) info->private_data;
+ bfd_vma start = priv->insn_start + (priv->max_fetched - priv->the_buffer);
+
+ status = (*info->read_memory_func) (start,
+ priv->max_fetched,
+ addr - priv->max_fetched,
+ info);
+ if (status != 0)
+ {
+ /* If we did manage to read at least one byte, then
+ print_insn_i386 will do something sensible. Otherwise, print
+ an error. We do that here because this is where we know
+ STATUS. */
+ if (priv->max_fetched == priv->the_buffer)
+ (*info->memory_error_func) (status, start, info);
+#ifndef __KERNEL__
+ longjmp (priv->bailout, 1);
+#else /* __KERNEL__ */
+ /* XXX - what to do? */
+ kdb_printf("Hmm. longjmp.\n");
+#endif /* __KERNEL__ */
+ }
+ else
+ priv->max_fetched = addr;
+ return 1;
+}
+
+#define XX NULL, 0
+
+#define Eb OP_E, b_mode
+#define Ev OP_E, v_mode
+#define Ed OP_E, d_mode
+#define Eq OP_E, q_mode
+#define Edq OP_E, dq_mode
+#define Edqw OP_E, dqw_mode
+#define indirEv OP_indirE, branch_v_mode
+#define indirEp OP_indirE, f_mode
+#define Em OP_E, m_mode
+#define Ew OP_E, w_mode
+#define Ma OP_E, v_mode
+#define M OP_M, 0 /* lea, lgdt, etc. */
+#define Mp OP_M, f_mode /* 32 or 48 bit memory operand for LDS, LES etc */
+#define Gb OP_G, b_mode
+#define Gv OP_G, v_mode
+#define Gd OP_G, d_mode
+#define Gdq OP_G, dq_mode
+#define Gm OP_G, m_mode
+#define Gw OP_G, w_mode
+#define Rd OP_Rd, d_mode
+#define Rm OP_Rd, m_mode
+#define Ib OP_I, b_mode
+#define sIb OP_sI, b_mode /* sign extened byte */
+#define Iv OP_I, v_mode
+#define Iq OP_I, q_mode
+#define Iv64 OP_I64, v_mode
+#define Iw OP_I, w_mode
+#define I1 OP_I, const_1_mode
+#define Jb OP_J, b_mode
+#define Jv OP_J, v_mode
+#define Cm OP_C, m_mode
+#define Dm OP_D, m_mode
+#define Td OP_T, d_mode
+#define Sv SEG_Fixup, v_mode
+
+#define RMeAX OP_REG, eAX_reg
+#define RMeBX OP_REG, eBX_reg
+#define RMeCX OP_REG, eCX_reg
+#define RMeDX OP_REG, eDX_reg
+#define RMeSP OP_REG, eSP_reg
+#define RMeBP OP_REG, eBP_reg
+#define RMeSI OP_REG, eSI_reg
+#define RMeDI OP_REG, eDI_reg
+#define RMrAX OP_REG, rAX_reg
+#define RMrBX OP_REG, rBX_reg
+#define RMrCX OP_REG, rCX_reg
+#define RMrDX OP_REG, rDX_reg
+#define RMrSP OP_REG, rSP_reg
+#define RMrBP OP_REG, rBP_reg
+#define RMrSI OP_REG, rSI_reg
+#define RMrDI OP_REG, rDI_reg
+#define RMAL OP_REG, al_reg
+#define RMAL OP_REG, al_reg
+#define RMCL OP_REG, cl_reg
+#define RMDL OP_REG, dl_reg
+#define RMBL OP_REG, bl_reg
+#define RMAH OP_REG, ah_reg
+#define RMCH OP_REG, ch_reg
+#define RMDH OP_REG, dh_reg
+#define RMBH OP_REG, bh_reg
+#define RMAX OP_REG, ax_reg
+#define RMDX OP_REG, dx_reg
+
+#define eAX OP_IMREG, eAX_reg
+#define eBX OP_IMREG, eBX_reg
+#define eCX OP_IMREG, eCX_reg
+#define eDX OP_IMREG, eDX_reg
+#define eSP OP_IMREG, eSP_reg
+#define eBP OP_IMREG, eBP_reg
+#define eSI OP_IMREG, eSI_reg
+#define eDI OP_IMREG, eDI_reg
+#define AL OP_IMREG, al_reg
+#define AL OP_IMREG, al_reg
+#define CL OP_IMREG, cl_reg
+#define DL OP_IMREG, dl_reg
+#define BL OP_IMREG, bl_reg
+#define AH OP_IMREG, ah_reg
+#define CH OP_IMREG, ch_reg
+#define DH OP_IMREG, dh_reg
+#define BH OP_IMREG, bh_reg
+#define AX OP_IMREG, ax_reg
+#define DX OP_IMREG, dx_reg
+#define indirDX OP_IMREG, indir_dx_reg
+
+#define Sw OP_SEG, w_mode
+#define Ap OP_DIR, 0
+#define Ob OP_OFF, b_mode
+#define Ob64 OP_OFF64, b_mode
+#define Ov OP_OFF, v_mode
+#define Ov64 OP_OFF64, v_mode
+#define Xb OP_DSreg, eSI_reg
+#define Xv OP_DSreg, eSI_reg
+#define Yb OP_ESreg, eDI_reg
+#define Yv OP_ESreg, eDI_reg
+#define DSBX OP_DSreg, eBX_reg
+
+#define es OP_REG, es_reg
+#define ss OP_REG, ss_reg
+#define cs OP_REG, cs_reg
+#define ds OP_REG, ds_reg
+#define fs OP_REG, fs_reg
+#define gs OP_REG, gs_reg
+
+#define MX OP_MMX, 0
+#define XM OP_XMM, 0
+#define EM OP_EM, v_mode
+#define EX OP_EX, v_mode
+#define MS OP_MS, v_mode
+#define XS OP_XS, v_mode
+#define VM OP_VMX, q_mode
+#define OPSUF OP_3DNowSuffix, 0
+#define OPSIMD OP_SIMD_Suffix, 0
+
+#define cond_jump_flag NULL, cond_jump_mode
+#define loop_jcxz_flag NULL, loop_jcxz_mode
+
+/* bits in sizeflag */
+#define SUFFIX_ALWAYS 4
+#define AFLAG 2
+#define DFLAG 1
+
+#define b_mode 1 /* byte operand */
+#define v_mode 2 /* operand size depends on prefixes */
+#define w_mode 3 /* word operand */
+#define d_mode 4 /* double word operand */
+#define q_mode 5 /* quad word operand */
+#define t_mode 6 /* ten-byte operand */
+#define x_mode 7 /* 16-byte XMM operand */
+#define m_mode 8 /* d_mode in 32bit, q_mode in 64bit mode. */
+#define cond_jump_mode 9
+#define loop_jcxz_mode 10
+#define dq_mode 11 /* operand size depends on REX prefixes. */
+#define dqw_mode 12 /* registers like dq_mode, memory like w_mode. */
+#define f_mode 13 /* 4- or 6-byte pointer operand */
+#define const_1_mode 14
+#define branch_v_mode 15 /* v_mode for branch. */
+
+#define es_reg 100
+#define cs_reg 101
+#define ss_reg 102
+#define ds_reg 103
+#define fs_reg 104
+#define gs_reg 105
+
+#define eAX_reg 108
+#define eCX_reg 109
+#define eDX_reg 110
+#define eBX_reg 111
+#define eSP_reg 112
+#define eBP_reg 113
+#define eSI_reg 114
+#define eDI_reg 115
+
+#define al_reg 116
+#define cl_reg 117
+#define dl_reg 118
+#define bl_reg 119
+#define ah_reg 120
+#define ch_reg 121
+#define dh_reg 122
+#define bh_reg 123
+
+#define ax_reg 124
+#define cx_reg 125
+#define dx_reg 126
+#define bx_reg 127
+#define sp_reg 128
+#define bp_reg 129
+#define si_reg 130
+#define di_reg 131
+
+#define rAX_reg 132
+#define rCX_reg 133
+#define rDX_reg 134
+#define rBX_reg 135
+#define rSP_reg 136
+#define rBP_reg 137
+#define rSI_reg 138
+#define rDI_reg 139
+
+#define indir_dx_reg 150
+
+#define FLOATCODE 1
+#define USE_GROUPS 2
+#define USE_PREFIX_USER_TABLE 3
+#define X86_64_SPECIAL 4
+
+#define FLOAT NULL, NULL, FLOATCODE, NULL, 0, NULL, 0
+
+#define GRP1b NULL, NULL, USE_GROUPS, NULL, 0, NULL, 0
+#define GRP1S NULL, NULL, USE_GROUPS, NULL, 1, NULL, 0
+#define GRP1Ss NULL, NULL, USE_GROUPS, NULL, 2, NULL, 0
+#define GRP2b NULL, NULL, USE_GROUPS, NULL, 3, NULL, 0
+#define GRP2S NULL, NULL, USE_GROUPS, NULL, 4, NULL, 0
+#define GRP2b_one NULL, NULL, USE_GROUPS, NULL, 5, NULL, 0
+#define GRP2S_one NULL, NULL, USE_GROUPS, NULL, 6, NULL, 0
+#define GRP2b_cl NULL, NULL, USE_GROUPS, NULL, 7, NULL, 0
+#define GRP2S_cl NULL, NULL, USE_GROUPS, NULL, 8, NULL, 0
+#define GRP3b NULL, NULL, USE_GROUPS, NULL, 9, NULL, 0
+#define GRP3S NULL, NULL, USE_GROUPS, NULL, 10, NULL, 0
+#define GRP4 NULL, NULL, USE_GROUPS, NULL, 11, NULL, 0
+#define GRP5 NULL, NULL, USE_GROUPS, NULL, 12, NULL, 0
+#define GRP6 NULL, NULL, USE_GROUPS, NULL, 13, NULL, 0
+#define GRP7 NULL, NULL, USE_GROUPS, NULL, 14, NULL, 0
+#define GRP8 NULL, NULL, USE_GROUPS, NULL, 15, NULL, 0
+#define GRP9 NULL, NULL, USE_GROUPS, NULL, 16, NULL, 0
+#define GRP10 NULL, NULL, USE_GROUPS, NULL, 17, NULL, 0
+#define GRP11 NULL, NULL, USE_GROUPS, NULL, 18, NULL, 0
+#define GRP12 NULL, NULL, USE_GROUPS, NULL, 19, NULL, 0
+#define GRP13 NULL, NULL, USE_GROUPS, NULL, 20, NULL, 0
+#define GRP14 NULL, NULL, USE_GROUPS, NULL, 21, NULL, 0
+#define GRPAMD NULL, NULL, USE_GROUPS, NULL, 22, NULL, 0
+#define GRPPADLCK1 NULL, NULL, USE_GROUPS, NULL, 23, NULL, 0
+#define GRPPADLCK2 NULL, NULL, USE_GROUPS, NULL, 24, NULL, 0
+
+#define PREGRP0 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 0, NULL, 0
+#define PREGRP1 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 1, NULL, 0
+#define PREGRP2 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 2, NULL, 0
+#define PREGRP3 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 3, NULL, 0
+#define PREGRP4 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 4, NULL, 0
+#define PREGRP5 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 5, NULL, 0
+#define PREGRP6 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 6, NULL, 0
+#define PREGRP7 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 7, NULL, 0
+#define PREGRP8 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 8, NULL, 0
+#define PREGRP9 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 9, NULL, 0
+#define PREGRP10 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 10, NULL, 0
+#define PREGRP11 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 11, NULL, 0
+#define PREGRP12 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 12, NULL, 0
+#define PREGRP13 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 13, NULL, 0
+#define PREGRP14 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 14, NULL, 0
+#define PREGRP15 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 15, NULL, 0
+#define PREGRP16 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 16, NULL, 0
+#define PREGRP17 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 17, NULL, 0
+#define PREGRP18 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 18, NULL, 0
+#define PREGRP19 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 19, NULL, 0
+#define PREGRP20 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 20, NULL, 0
+#define PREGRP21 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 21, NULL, 0
+#define PREGRP22 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 22, NULL, 0
+#define PREGRP23 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 23, NULL, 0
+#define PREGRP24 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 24, NULL, 0
+#define PREGRP25 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 25, NULL, 0
+#define PREGRP26 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 26, NULL, 0
+#define PREGRP27 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 27, NULL, 0
+#define PREGRP28 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 28, NULL, 0
+#define PREGRP29 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 29, NULL, 0
+#define PREGRP30 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 30, NULL, 0
+#define PREGRP31 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 31, NULL, 0
+#define PREGRP32 NULL, NULL, USE_PREFIX_USER_TABLE, NULL, 32, NULL, 0
+
+#define X86_64_0 NULL, NULL, X86_64_SPECIAL, NULL, 0, NULL, 0
+
+typedef void (*op_rtn) (int bytemode, int sizeflag);
+
+struct dis386 {
+ const char *name;
+ op_rtn op1;
+ int bytemode1;
+ op_rtn op2;
+ int bytemode2;
+ op_rtn op3;
+ int bytemode3;
+};
+
+/* Upper case letters in the instruction names here are macros.
+ 'A' => print 'b' if no register operands or suffix_always is true
+ 'B' => print 'b' if suffix_always is true
+ 'C' => print 's' or 'l' ('w' or 'd' in Intel mode) depending on operand
+ . size prefix
+ 'E' => print 'e' if 32-bit form of jcxz
+ 'F' => print 'w' or 'l' depending on address size prefix (loop insns)
+ 'H' => print ",pt" or ",pn" branch hint
+ 'I' => honor following macro letter even in Intel mode (implemented only
+ . for some of the macro letters)
+ 'J' => print 'l'
+ 'L' => print 'l' if suffix_always is true
+ 'N' => print 'n' if instruction has no wait "prefix"
+ 'O' => print 'd', or 'o'
+ 'P' => print 'w', 'l' or 'q' if instruction has an operand size prefix,
+ . or suffix_always is true. print 'q' if rex prefix is present.
+ 'Q' => print 'w', 'l' or 'q' if no register operands or suffix_always
+ . is true
+ 'R' => print 'w', 'l' or 'q' ("wd" or "dq" in intel mode)
+ 'S' => print 'w', 'l' or 'q' if suffix_always is true
+ 'T' => print 'q' in 64bit mode and behave as 'P' otherwise
+ 'U' => print 'q' in 64bit mode and behave as 'Q' otherwise
+ 'W' => print 'b' or 'w' ("w" or "de" in intel mode)
+ 'X' => print 's', 'd' depending on data16 prefix (for XMM)
+ 'Y' => 'q' if instruction has an REX 64bit overwrite prefix
+
+ Many of the above letters print nothing in Intel mode. See "putop"
+ for the details.
+
+ Braces '{' and '}', and vertical bars '|', indicate alternative
+ mnemonic strings for AT&T, Intel, X86_64 AT&T, and X86_64 Intel
+ modes. In cases where there are only two alternatives, the X86_64
+ instruction is reserved, and "(bad)" is printed.
+*/
+
+static const struct dis386 dis386[] = {
+ /* 00 */
+ { "addB", Eb, Gb, XX },
+ { "addS", Ev, Gv, XX },
+ { "addB", Gb, Eb, XX },
+ { "addS", Gv, Ev, XX },
+ { "addB", AL, Ib, XX },
+ { "addS", eAX, Iv, XX },
+ { "push{T|}", es, XX, XX },
+ { "pop{T|}", es, XX, XX },
+ /* 08 */
+ { "orB", Eb, Gb, XX },
+ { "orS", Ev, Gv, XX },
+ { "orB", Gb, Eb, XX },
+ { "orS", Gv, Ev, XX },
+ { "orB", AL, Ib, XX },
+ { "orS", eAX, Iv, XX },
+ { "push{T|}", cs, XX, XX },
+ { "(bad)", XX, XX, XX }, /* 0x0f extended opcode escape */
+ /* 10 */
+ { "adcB", Eb, Gb, XX },
+ { "adcS", Ev, Gv, XX },
+ { "adcB", Gb, Eb, XX },
+ { "adcS", Gv, Ev, XX },
+ { "adcB", AL, Ib, XX },
+ { "adcS", eAX, Iv, XX },
+ { "push{T|}", ss, XX, XX },
+ { "popT|}", ss, XX, XX },
+ /* 18 */
+ { "sbbB", Eb, Gb, XX },
+ { "sbbS", Ev, Gv, XX },
+ { "sbbB", Gb, Eb, XX },
+ { "sbbS", Gv, Ev, XX },
+ { "sbbB", AL, Ib, XX },
+ { "sbbS", eAX, Iv, XX },
+ { "push{T|}", ds, XX, XX },
+ { "pop{T|}", ds, XX, XX },
+ /* 20 */
+ { "andB", Eb, Gb, XX },
+ { "andS", Ev, Gv, XX },
+ { "andB", Gb, Eb, XX },
+ { "andS", Gv, Ev, XX },
+ { "andB", AL, Ib, XX },
+ { "andS", eAX, Iv, XX },
+ { "(bad)", XX, XX, XX }, /* SEG ES prefix */
+ { "daa{|}", XX, XX, XX },
+ /* 28 */
+ { "subB", Eb, Gb, XX },
+ { "subS", Ev, Gv, XX },
+ { "subB", Gb, Eb, XX },
+ { "subS", Gv, Ev, XX },
+ { "subB", AL, Ib, XX },
+ { "subS", eAX, Iv, XX },
+ { "(bad)", XX, XX, XX }, /* SEG CS prefix */
+ { "das{|}", XX, XX, XX },
+ /* 30 */
+ { "xorB", Eb, Gb, XX },
+ { "xorS", Ev, Gv, XX },
+ { "xorB", Gb, Eb, XX },
+ { "xorS", Gv, Ev, XX },
+ { "xorB", AL, Ib, XX },
+ { "xorS", eAX, Iv, XX },
+ { "(bad)", XX, XX, XX }, /* SEG SS prefix */
+ { "aaa{|}", XX, XX, XX },
+ /* 38 */
+ { "cmpB", Eb, Gb, XX },
+ { "cmpS", Ev, Gv, XX },
+ { "cmpB", Gb, Eb, XX },
+ { "cmpS", Gv, Ev, XX },
+ { "cmpB", AL, Ib, XX },
+ { "cmpS", eAX, Iv, XX },
+ { "(bad)", XX, XX, XX }, /* SEG DS prefix */
+ { "aas{|}", XX, XX, XX },
+ /* 40 */
+ { "inc{S|}", RMeAX, XX, XX },
+ { "inc{S|}", RMeCX, XX, XX },
+ { "inc{S|}", RMeDX, XX, XX },
+ { "inc{S|}", RMeBX, XX, XX },
+ { "inc{S|}", RMeSP, XX, XX },
+ { "inc{S|}", RMeBP, XX, XX },
+ { "inc{S|}", RMeSI, XX, XX },
+ { "inc{S|}", RMeDI, XX, XX },
+ /* 48 */
+ { "dec{S|}", RMeAX, XX, XX },
+ { "dec{S|}", RMeCX, XX, XX },
+ { "dec{S|}", RMeDX, XX, XX },
+ { "dec{S|}", RMeBX, XX, XX },
+ { "dec{S|}", RMeSP, XX, XX },
+ { "dec{S|}", RMeBP, XX, XX },
+ { "dec{S|}", RMeSI, XX, XX },
+ { "dec{S|}", RMeDI, XX, XX },
+ /* 50 */
+ { "pushS", RMrAX, XX, XX },
+ { "pushS", RMrCX, XX, XX },
+ { "pushS", RMrDX, XX, XX },
+ { "pushS", RMrBX, XX, XX },
+ { "pushS", RMrSP, XX, XX },
+ { "pushS", RMrBP, XX, XX },
+ { "pushS", RMrSI, XX, XX },
+ { "pushS", RMrDI, XX, XX },
+ /* 58 */
+ { "popS", RMrAX, XX, XX },
+ { "popS", RMrCX, XX, XX },
+ { "popS", RMrDX, XX, XX },
+ { "popS", RMrBX, XX, XX },
+ { "popS", RMrSP, XX, XX },
+ { "popS", RMrBP, XX, XX },
+ { "popS", RMrSI, XX, XX },
+ { "popS", RMrDI, XX, XX },
+ /* 60 */
+ { "pusha{P|}", XX, XX, XX },
+ { "popa{P|}", XX, XX, XX },
+ { "bound{S|}", Gv, Ma, XX },
+ { X86_64_0 },
+ { "(bad)", XX, XX, XX }, /* seg fs */
+ { "(bad)", XX, XX, XX }, /* seg gs */
+ { "(bad)", XX, XX, XX }, /* op size prefix */
+ { "(bad)", XX, XX, XX }, /* adr size prefix */
+ /* 68 */
+ { "pushT", Iq, XX, XX },
+ { "imulS", Gv, Ev, Iv },
+ { "pushT", sIb, XX, XX },
+ { "imulS", Gv, Ev, sIb },
+ { "ins{b||b|}", Yb, indirDX, XX },
+ { "ins{R||R|}", Yv, indirDX, XX },
+ { "outs{b||b|}", indirDX, Xb, XX },
+ { "outs{R||R|}", indirDX, Xv, XX },
+ /* 70 */
+ { "joH", Jb, XX, cond_jump_flag },
+ { "jnoH", Jb, XX, cond_jump_flag },
+ { "jbH", Jb, XX, cond_jump_flag },
+ { "jaeH", Jb, XX, cond_jump_flag },
+ { "jeH", Jb, XX, cond_jump_flag },
+ { "jneH", Jb, XX, cond_jump_flag },
+ { "jbeH", Jb, XX, cond_jump_flag },
+ { "jaH", Jb, XX, cond_jump_flag },
+ /* 78 */
+ { "jsH", Jb, XX, cond_jump_flag },
+ { "jnsH", Jb, XX, cond_jump_flag },
+ { "jpH", Jb, XX, cond_jump_flag },
+ { "jnpH", Jb, XX, cond_jump_flag },
+ { "jlH", Jb, XX, cond_jump_flag },
+ { "jgeH", Jb, XX, cond_jump_flag },
+ { "jleH", Jb, XX, cond_jump_flag },
+ { "jgH", Jb, XX, cond_jump_flag },
+ /* 80 */
+ { GRP1b },
+ { GRP1S },
+ { "(bad)", XX, XX, XX },
+ { GRP1Ss },
+ { "testB", Eb, Gb, XX },
+ { "testS", Ev, Gv, XX },
+ { "xchgB", Eb, Gb, XX },
+ { "xchgS", Ev, Gv, XX },
+ /* 88 */
+ { "movB", Eb, Gb, XX },
+ { "movS", Ev, Gv, XX },
+ { "movB", Gb, Eb, XX },
+ { "movS", Gv, Ev, XX },
+ { "movQ", Sv, Sw, XX },
+ { "leaS", Gv, M, XX },
+ { "movQ", Sw, Sv, XX },
+ { "popU", Ev, XX, XX },
+ /* 90 */
+ { "nop", NOP_Fixup, 0, XX, XX },
+ { "xchgS", RMeCX, eAX, XX },
+ { "xchgS", RMeDX, eAX, XX },
+ { "xchgS", RMeBX, eAX, XX },
+ { "xchgS", RMeSP, eAX, XX },
+ { "xchgS", RMeBP, eAX, XX },
+ { "xchgS", RMeSI, eAX, XX },
+ { "xchgS", RMeDI, eAX, XX },
+ /* 98 */
+ { "cW{tR||tR|}", XX, XX, XX },
+ { "cR{tO||tO|}", XX, XX, XX },
+ { "Jcall{T|}", Ap, XX, XX },
+ { "(bad)", XX, XX, XX }, /* fwait */
+ { "pushfT", XX, XX, XX },
+ { "popfT", XX, XX, XX },
+ { "sahf{|}", XX, XX, XX },
+ { "lahf{|}", XX, XX, XX },
+ /* a0 */
+ { "movB", AL, Ob64, XX },
+ { "movS", eAX, Ov64, XX },
+ { "movB", Ob64, AL, XX },
+ { "movS", Ov64, eAX, XX },
+ { "movs{b||b|}", Yb, Xb, XX },
+ { "movs{R||R|}", Yv, Xv, XX },
+ { "cmps{b||b|}", Xb, Yb, XX },
+ { "cmps{R||R|}", Xv, Yv, XX },
+ /* a8 */
+ { "testB", AL, Ib, XX },
+ { "testS", eAX, Iv, XX },
+ { "stosB", Yb, AL, XX },
+ { "stosS", Yv, eAX, XX },
+ { "lodsB", AL, Xb, XX },
+ { "lodsS", eAX, Xv, XX },
+ { "scasB", AL, Yb, XX },
+ { "scasS", eAX, Yv, XX },
+ /* b0 */
+ { "movB", RMAL, Ib, XX },
+ { "movB", RMCL, Ib, XX },
+ { "movB", RMDL, Ib, XX },
+ { "movB", RMBL, Ib, XX },
+ { "movB", RMAH, Ib, XX },
+ { "movB", RMCH, Ib, XX },
+ { "movB", RMDH, Ib, XX },
+ { "movB", RMBH, Ib, XX },
+ /* b8 */
+ { "movS", RMeAX, Iv64, XX },
+ { "movS", RMeCX, Iv64, XX },
+ { "movS", RMeDX, Iv64, XX },
+ { "movS", RMeBX, Iv64, XX },
+ { "movS", RMeSP, Iv64, XX },
+ { "movS", RMeBP, Iv64, XX },
+ { "movS", RMeSI, Iv64, XX },
+ { "movS", RMeDI, Iv64, XX },
+ /* c0 */
+ { GRP2b },
+ { GRP2S },
+ { "retT", Iw, XX, XX },
+ { "retT", XX, XX, XX },
+ { "les{S|}", Gv, Mp, XX },
+ { "ldsS", Gv, Mp, XX },
+ { "movA", Eb, Ib, XX },
+ { "movQ", Ev, Iv, XX },
+ /* c8 */
+ { "enterT", Iw, Ib, XX },
+ { "leaveT", XX, XX, XX },
+ { "lretP", Iw, XX, XX },
+ { "lretP", XX, XX, XX },
+ { "int3", XX, XX, XX },
+ { "int", Ib, XX, XX },
+ { "into{|}", XX, XX, XX },
+ { "iretP", XX, XX, XX },
+ /* d0 */
+ { GRP2b_one },
+ { GRP2S_one },
+ { GRP2b_cl },
+ { GRP2S_cl },
+ { "aam{|}", sIb, XX, XX },
+ { "aad{|}", sIb, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "xlat", DSBX, XX, XX },
+ /* d8 */
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ { FLOAT },
+ /* e0 */
+ { "loopneFH", Jb, XX, loop_jcxz_flag },
+ { "loopeFH", Jb, XX, loop_jcxz_flag },
+ { "loopFH", Jb, XX, loop_jcxz_flag },
+ { "jEcxzH", Jb, XX, loop_jcxz_flag },
+ { "inB", AL, Ib, XX },
+ { "inS", eAX, Ib, XX },
+ { "outB", Ib, AL, XX },
+ { "outS", Ib, eAX, XX },
+ /* e8 */
+ { "callT", Jv, XX, XX },
+ { "jmpT", Jv, XX, XX },
+ { "Jjmp{T|}", Ap, XX, XX },
+ { "jmp", Jb, XX, XX },
+ { "inB", AL, indirDX, XX },
+ { "inS", eAX, indirDX, XX },
+ { "outB", indirDX, AL, XX },
+ { "outS", indirDX, eAX, XX },
+ /* f0 */
+ { "(bad)", XX, XX, XX }, /* lock prefix */
+ { "icebp", XX, XX, XX },
+ { "(bad)", XX, XX, XX }, /* repne */
+ { "(bad)", XX, XX, XX }, /* repz */
+ { "hlt", XX, XX, XX },
+ { "cmc", XX, XX, XX },
+ { GRP3b },
+ { GRP3S },
+ /* f8 */
+ { "clc", XX, XX, XX },
+ { "stc", XX, XX, XX },
+ { "cli", XX, XX, XX },
+ { "sti", XX, XX, XX },
+ { "cld", XX, XX, XX },
+ { "std", XX, XX, XX },
+ { GRP4 },
+ { GRP5 },
+};
+
+static const struct dis386 dis386_twobyte[] = {
+ /* 00 */
+ { GRP6 },
+ { GRP7 },
+ { "larS", Gv, Ew, XX },
+ { "lslS", Gv, Ew, XX },
+ { "(bad)", XX, XX, XX },
+ { "syscall", XX, XX, XX },
+ { "clts", XX, XX, XX },
+ { "sysretP", XX, XX, XX },
+ /* 08 */
+ { "invd", XX, XX, XX },
+ { "wbinvd", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "ud2a", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { GRPAMD },
+ { "femms", XX, XX, XX },
+ { "", MX, EM, OPSUF }, /* See OP_3DNowSuffix. */
+ /* 10 */
+ { PREGRP8 },
+ { PREGRP9 },
+ { PREGRP30 },
+ { "movlpX", EX, XM, SIMD_Fixup, 'h' },
+ { "unpcklpX", XM, EX, XX },
+ { "unpckhpX", XM, EX, XX },
+ { PREGRP31 },
+ { "movhpX", EX, XM, SIMD_Fixup, 'l' },
+ /* 18 */
+ { GRP14 },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ /* 20 */
+ { "movL", Rm, Cm, XX },
+ { "movL", Rm, Dm, XX },
+ { "movL", Cm, Rm, XX },
+ { "movL", Dm, Rm, XX },
+ { "movL", Rd, Td, XX },
+ { "(bad)", XX, XX, XX },
+ { "movL", Td, Rd, XX },
+ { "(bad)", XX, XX, XX },
+ /* 28 */
+ { "movapX", XM, EX, XX },
+ { "movapX", EX, XM, XX },
+ { PREGRP2 },
+ { "movntpX", Ev, XM, XX },
+ { PREGRP4 },
+ { PREGRP3 },
+ { "ucomisX", XM,EX, XX },
+ { "comisX", XM,EX, XX },
+ /* 30 */
+ { "wrmsr", XX, XX, XX },
+ { "rdtsc", XX, XX, XX },
+ { "rdmsr", XX, XX, XX },
+ { "rdpmc", XX, XX, XX },
+ { "sysenter", XX, XX, XX },
+ { "sysexit", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ /* 38 */
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ /* 40 */
+ { "cmovo", Gv, Ev, XX },
+ { "cmovno", Gv, Ev, XX },
+ { "cmovb", Gv, Ev, XX },
+ { "cmovae", Gv, Ev, XX },
+ { "cmove", Gv, Ev, XX },
+ { "cmovne", Gv, Ev, XX },
+ { "cmovbe", Gv, Ev, XX },
+ { "cmova", Gv, Ev, XX },
+ /* 48 */
+ { "cmovs", Gv, Ev, XX },
+ { "cmovns", Gv, Ev, XX },
+ { "cmovp", Gv, Ev, XX },
+ { "cmovnp", Gv, Ev, XX },
+ { "cmovl", Gv, Ev, XX },
+ { "cmovge", Gv, Ev, XX },
+ { "cmovle", Gv, Ev, XX },
+ { "cmovg", Gv, Ev, XX },
+ /* 50 */
+ { "movmskpX", Gdq, XS, XX },
+ { PREGRP13 },
+ { PREGRP12 },
+ { PREGRP11 },
+ { "andpX", XM, EX, XX },
+ { "andnpX", XM, EX, XX },
+ { "orpX", XM, EX, XX },
+ { "xorpX", XM, EX, XX },
+ /* 58 */
+ { PREGRP0 },
+ { PREGRP10 },
+ { PREGRP17 },
+ { PREGRP16 },
+ { PREGRP14 },
+ { PREGRP7 },
+ { PREGRP5 },
+ { PREGRP6 },
+ /* 60 */
+ { "punpcklbw", MX, EM, XX },
+ { "punpcklwd", MX, EM, XX },
+ { "punpckldq", MX, EM, XX },
+ { "packsswb", MX, EM, XX },
+ { "pcmpgtb", MX, EM, XX },
+ { "pcmpgtw", MX, EM, XX },
+ { "pcmpgtd", MX, EM, XX },
+ { "packuswb", MX, EM, XX },
+ /* 68 */
+ { "punpckhbw", MX, EM, XX },
+ { "punpckhwd", MX, EM, XX },
+ { "punpckhdq", MX, EM, XX },
+ { "packssdw", MX, EM, XX },
+ { PREGRP26 },
+ { PREGRP24 },
+ { "movd", MX, Edq, XX },
+ { PREGRP19 },
+ /* 70 */
+ { PREGRP22 },
+ { GRP10 },
+ { GRP11 },
+ { GRP12 },
+ { "pcmpeqb", MX, EM, XX },
+ { "pcmpeqw", MX, EM, XX },
+ { "pcmpeqd", MX, EM, XX },
+ { "emms", XX, XX, XX },
+ /* 78 */
+ { "vmread", Em, Gm, XX },
+ { "vmwrite", Gm, Em, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { PREGRP28 },
+ { PREGRP29 },
+ { PREGRP23 },
+ { PREGRP20 },
+ /* 80 */
+ { "joH", Jv, XX, cond_jump_flag },
+ { "jnoH", Jv, XX, cond_jump_flag },
+ { "jbH", Jv, XX, cond_jump_flag },
+ { "jaeH", Jv, XX, cond_jump_flag },
+ { "jeH", Jv, XX, cond_jump_flag },
+ { "jneH", Jv, XX, cond_jump_flag },
+ { "jbeH", Jv, XX, cond_jump_flag },
+ { "jaH", Jv, XX, cond_jump_flag },
+ /* 88 */
+ { "jsH", Jv, XX, cond_jump_flag },
+ { "jnsH", Jv, XX, cond_jump_flag },
+ { "jpH", Jv, XX, cond_jump_flag },
+ { "jnpH", Jv, XX, cond_jump_flag },
+ { "jlH", Jv, XX, cond_jump_flag },
+ { "jgeH", Jv, XX, cond_jump_flag },
+ { "jleH", Jv, XX, cond_jump_flag },
+ { "jgH", Jv, XX, cond_jump_flag },
+ /* 90 */
+ { "seto", Eb, XX, XX },
+ { "setno", Eb, XX, XX },
+ { "setb", Eb, XX, XX },
+ { "setae", Eb, XX, XX },
+ { "sete", Eb, XX, XX },
+ { "setne", Eb, XX, XX },
+ { "setbe", Eb, XX, XX },
+ { "seta", Eb, XX, XX },
+ /* 98 */
+ { "sets", Eb, XX, XX },
+ { "setns", Eb, XX, XX },
+ { "setp", Eb, XX, XX },
+ { "setnp", Eb, XX, XX },
+ { "setl", Eb, XX, XX },
+ { "setge", Eb, XX, XX },
+ { "setle", Eb, XX, XX },
+ { "setg", Eb, XX, XX },
+ /* a0 */
+ { "pushT", fs, XX, XX },
+ { "popT", fs, XX, XX },
+ { "cpuid", XX, XX, XX },
+ { "btS", Ev, Gv, XX },
+ { "shldS", Ev, Gv, Ib },
+ { "shldS", Ev, Gv, CL },
+ { GRPPADLCK2 },
+ { GRPPADLCK1 },
+ /* a8 */
+ { "pushT", gs, XX, XX },
+ { "popT", gs, XX, XX },
+ { "rsm", XX, XX, XX },
+ { "btsS", Ev, Gv, XX },
+ { "shrdS", Ev, Gv, Ib },
+ { "shrdS", Ev, Gv, CL },
+ { GRP13 },
+ { "imulS", Gv, Ev, XX },
+ /* b0 */
+ { "cmpxchgB", Eb, Gb, XX },
+ { "cmpxchgS", Ev, Gv, XX },
+ { "lssS", Gv, Mp, XX },
+ { "btrS", Ev, Gv, XX },
+ { "lfsS", Gv, Mp, XX },
+ { "lgsS", Gv, Mp, XX },
+ { "movz{bR|x|bR|x}", Gv, Eb, XX },
+ { "movz{wR|x|wR|x}", Gv, Ew, XX }, /* yes, there really is movzww ! */
+ /* b8 */
+ { "(bad)", XX, XX, XX },
+ { "ud2b", XX, XX, XX },
+ { GRP8 },
+ { "btcS", Ev, Gv, XX },
+ { "bsfS", Gv, Ev, XX },
+ { "bsrS", Gv, Ev, XX },
+ { "movs{bR|x|bR|x}", Gv, Eb, XX },
+ { "movs{wR|x|wR|x}", Gv, Ew, XX }, /* yes, there really is movsww ! */
+ /* c0 */
+ { "xaddB", Eb, Gb, XX },
+ { "xaddS", Ev, Gv, XX },
+ { PREGRP1 },
+ { "movntiS", Ev, Gv, XX },
+ { "pinsrw", MX, Edqw, Ib },
+ { "pextrw", Gdq, MS, Ib },
+ { "shufpX", XM, EX, Ib },
+ { GRP9 },
+ /* c8 */
+ { "bswap", RMeAX, XX, XX },
+ { "bswap", RMeCX, XX, XX },
+ { "bswap", RMeDX, XX, XX },
+ { "bswap", RMeBX, XX, XX },
+ { "bswap", RMeSP, XX, XX },
+ { "bswap", RMeBP, XX, XX },
+ { "bswap", RMeSI, XX, XX },
+ { "bswap", RMeDI, XX, XX },
+ /* d0 */
+ { PREGRP27 },
+ { "psrlw", MX, EM, XX },
+ { "psrld", MX, EM, XX },
+ { "psrlq", MX, EM, XX },
+ { "paddq", MX, EM, XX },
+ { "pmullw", MX, EM, XX },
+ { PREGRP21 },
+ { "pmovmskb", Gdq, MS, XX },
+ /* d8 */
+ { "psubusb", MX, EM, XX },
+ { "psubusw", MX, EM, XX },
+ { "pminub", MX, EM, XX },
+ { "pand", MX, EM, XX },
+ { "paddusb", MX, EM, XX },
+ { "paddusw", MX, EM, XX },
+ { "pmaxub", MX, EM, XX },
+ { "pandn", MX, EM, XX },
+ /* e0 */
+ { "pavgb", MX, EM, XX },
+ { "psraw", MX, EM, XX },
+ { "psrad", MX, EM, XX },
+ { "pavgw", MX, EM, XX },
+ { "pmulhuw", MX, EM, XX },
+ { "pmulhw", MX, EM, XX },
+ { PREGRP15 },
+ { PREGRP25 },
+ /* e8 */
+ { "psubsb", MX, EM, XX },
+ { "psubsw", MX, EM, XX },
+ { "pminsw", MX, EM, XX },
+ { "por", MX, EM, XX },
+ { "paddsb", MX, EM, XX },
+ { "paddsw", MX, EM, XX },
+ { "pmaxsw", MX, EM, XX },
+ { "pxor", MX, EM, XX },
+ /* f0 */
+ { PREGRP32 },
+ { "psllw", MX, EM, XX },
+ { "pslld", MX, EM, XX },
+ { "psllq", MX, EM, XX },
+ { "pmuludq", MX, EM, XX },
+ { "pmaddwd", MX, EM, XX },
+ { "psadbw", MX, EM, XX },
+ { PREGRP18 },
+ /* f8 */
+ { "psubb", MX, EM, XX },
+ { "psubw", MX, EM, XX },
+ { "psubd", MX, EM, XX },
+ { "psubq", MX, EM, XX },
+ { "paddb", MX, EM, XX },
+ { "paddw", MX, EM, XX },
+ { "paddd", MX, EM, XX },
+ { "(bad)", XX, XX, XX }
+};
+
+static const unsigned char onebyte_has_modrm[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ /* 00 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 00 */
+ /* 10 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 10 */
+ /* 20 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 20 */
+ /* 30 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 30 */
+ /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40 */
+ /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 50 */
+ /* 60 */ 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0, /* 60 */
+ /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 70 */
+ /* 80 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 80 */
+ /* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 90 */
+ /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0 */
+ /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* b0 */
+ /* c0 */ 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* c0 */
+ /* d0 */ 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1, /* d0 */
+ /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0 */
+ /* f0 */ 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1 /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+static const unsigned char twobyte_has_modrm[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ /* 00 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1, /* 0f */
+ /* 10 */ 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0, /* 1f */
+ /* 20 */ 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1, /* 2f */
+ /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
+ /* 40 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4f */
+ /* 50 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 5f */
+ /* 60 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6f */
+ /* 70 */ 1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1, /* 7f */
+ /* 80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8f */
+ /* 90 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 9f */
+ /* a0 */ 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1, /* af */
+ /* b0 */ 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1, /* bf */
+ /* c0 */ 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, /* cf */
+ /* d0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* df */
+ /* e0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ef */
+ /* f0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 /* ff */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+static const unsigned char twobyte_uses_SSE_prefix[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ /* 00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0f */
+ /* 10 */ 1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0, /* 1f */
+ /* 20 */ 0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0, /* 2f */
+ /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
+ /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
+ /* 50 */ 0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1, /* 5f */
+ /* 60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1, /* 6f */
+ /* 70 */ 1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, /* 7f */
+ /* 80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8f */
+ /* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 9f */
+ /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
+ /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
+ /* c0 */ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
+ /* d0 */ 1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, /* df */
+ /* e0 */ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, /* ef */
+ /* f0 */ 1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 /* ff */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+static char obuf[100];
+static char *obufp;
+static char scratchbuf[100];
+static unsigned char *start_codep;
+static unsigned char *insn_codep;
+static unsigned char *codep;
+static disassemble_info *the_info;
+static int mod;
+static int rm;
+static int reg;
+static unsigned char need_modrm;
+
+/* If we are accessing mod/rm/reg without need_modrm set, then the
+ values are stale. Hitting this abort likely indicates that you
+ need to update onebyte_has_modrm or twobyte_has_modrm. */
+#define MODRM_CHECK if (!need_modrm) abort ()
+
+static const char **names64;
+static const char **names32;
+static const char **names16;
+static const char **names8;
+static const char **names8rex;
+static const char **names_seg;
+static const char **index16;
+
+static const char *intel_names64[] = {
+ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+};
+static const char *intel_names32[] = {
+ "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+ "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
+};
+static const char *intel_names16[] = {
+ "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
+ "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
+};
+static const char *intel_names8[] = {
+ "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
+};
+static const char *intel_names8rex[] = {
+ "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
+ "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
+};
+static const char *intel_names_seg[] = {
+ "es", "cs", "ss", "ds", "fs", "gs", "?", "?",
+};
+static const char *intel_index16[] = {
+ "bx+si", "bx+di", "bp+si", "bp+di", "si", "di", "bp", "bx"
+};
+
+static const char *att_names64[] = {
+ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
+};
+static const char *att_names32[] = {
+ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+ "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d"
+};
+static const char *att_names16[] = {
+ "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
+ "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w"
+};
+static const char *att_names8[] = {
+ "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh",
+};
+static const char *att_names8rex[] = {
+ "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
+ "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b"
+};
+static const char *att_names_seg[] = {
+ "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "%?", "%?",
+};
+static const char *att_index16[] = {
+ "%bx,%si", "%bx,%di", "%bp,%si", "%bp,%di", "%si", "%di", "%bp", "%bx"
+};
+
+static const struct dis386 grps[][8] = {
+ /* GRP1b */
+ {
+ { "addA", Eb, Ib, XX },
+ { "orA", Eb, Ib, XX },
+ { "adcA", Eb, Ib, XX },
+ { "sbbA", Eb, Ib, XX },
+ { "andA", Eb, Ib, XX },
+ { "subA", Eb, Ib, XX },
+ { "xorA", Eb, Ib, XX },
+ { "cmpA", Eb, Ib, XX }
+ },
+ /* GRP1S */
+ {
+ { "addQ", Ev, Iv, XX },
+ { "orQ", Ev, Iv, XX },
+ { "adcQ", Ev, Iv, XX },
+ { "sbbQ", Ev, Iv, XX },
+ { "andQ", Ev, Iv, XX },
+ { "subQ", Ev, Iv, XX },
+ { "xorQ", Ev, Iv, XX },
+ { "cmpQ", Ev, Iv, XX }
+ },
+ /* GRP1Ss */
+ {
+ { "addQ", Ev, sIb, XX },
+ { "orQ", Ev, sIb, XX },
+ { "adcQ", Ev, sIb, XX },
+ { "sbbQ", Ev, sIb, XX },
+ { "andQ", Ev, sIb, XX },
+ { "subQ", Ev, sIb, XX },
+ { "xorQ", Ev, sIb, XX },
+ { "cmpQ", Ev, sIb, XX }
+ },
+ /* GRP2b */
+ {
+ { "rolA", Eb, Ib, XX },
+ { "rorA", Eb, Ib, XX },
+ { "rclA", Eb, Ib, XX },
+ { "rcrA", Eb, Ib, XX },
+ { "shlA", Eb, Ib, XX },
+ { "shrA", Eb, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "sarA", Eb, Ib, XX },
+ },
+ /* GRP2S */
+ {
+ { "rolQ", Ev, Ib, XX },
+ { "rorQ", Ev, Ib, XX },
+ { "rclQ", Ev, Ib, XX },
+ { "rcrQ", Ev, Ib, XX },
+ { "shlQ", Ev, Ib, XX },
+ { "shrQ", Ev, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "sarQ", Ev, Ib, XX },
+ },
+ /* GRP2b_one */
+ {
+ { "rolA", Eb, I1, XX },
+ { "rorA", Eb, I1, XX },
+ { "rclA", Eb, I1, XX },
+ { "rcrA", Eb, I1, XX },
+ { "shlA", Eb, I1, XX },
+ { "shrA", Eb, I1, XX },
+ { "(bad)", XX, XX, XX },
+ { "sarA", Eb, I1, XX },
+ },
+ /* GRP2S_one */
+ {
+ { "rolQ", Ev, I1, XX },
+ { "rorQ", Ev, I1, XX },
+ { "rclQ", Ev, I1, XX },
+ { "rcrQ", Ev, I1, XX },
+ { "shlQ", Ev, I1, XX },
+ { "shrQ", Ev, I1, XX },
+ { "(bad)", XX, XX, XX},
+ { "sarQ", Ev, I1, XX },
+ },
+ /* GRP2b_cl */
+ {
+ { "rolA", Eb, CL, XX },
+ { "rorA", Eb, CL, XX },
+ { "rclA", Eb, CL, XX },
+ { "rcrA", Eb, CL, XX },
+ { "shlA", Eb, CL, XX },
+ { "shrA", Eb, CL, XX },
+ { "(bad)", XX, XX, XX },
+ { "sarA", Eb, CL, XX },
+ },
+ /* GRP2S_cl */
+ {
+ { "rolQ", Ev, CL, XX },
+ { "rorQ", Ev, CL, XX },
+ { "rclQ", Ev, CL, XX },
+ { "rcrQ", Ev, CL, XX },
+ { "shlQ", Ev, CL, XX },
+ { "shrQ", Ev, CL, XX },
+ { "(bad)", XX, XX, XX },
+ { "sarQ", Ev, CL, XX }
+ },
+ /* GRP3b */
+ {
+ { "testA", Eb, Ib, XX },
+ { "(bad)", Eb, XX, XX },
+ { "notA", Eb, XX, XX },
+ { "negA", Eb, XX, XX },
+ { "mulA", Eb, XX, XX }, /* Don't print the implicit %al register, */
+ { "imulA", Eb, XX, XX }, /* to distinguish these opcodes from other */
+ { "divA", Eb, XX, XX }, /* mul/imul opcodes. Do the same for div */
+ { "idivA", Eb, XX, XX } /* and idiv for consistency. */
+ },
+ /* GRP3S */
+ {
+ { "testQ", Ev, Iv, XX },
+ { "(bad)", XX, XX, XX },
+ { "notQ", Ev, XX, XX },
+ { "negQ", Ev, XX, XX },
+ { "mulQ", Ev, XX, XX }, /* Don't print the implicit register. */
+ { "imulQ", Ev, XX, XX },
+ { "divQ", Ev, XX, XX },
+ { "idivQ", Ev, XX, XX },
+ },
+ /* GRP4 */
+ {
+ { "incA", Eb, XX, XX },
+ { "decA", Eb, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRP5 */
+ {
+ { "incQ", Ev, XX, XX },
+ { "decQ", Ev, XX, XX },
+ { "callT", indirEv, XX, XX },
+ { "JcallT", indirEp, XX, XX },
+ { "jmpT", indirEv, XX, XX },
+ { "JjmpT", indirEp, XX, XX },
+ { "pushU", Ev, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRP6 */
+ {
+ { "sldtQ", Ev, XX, XX },
+ { "strQ", Ev, XX, XX },
+ { "lldt", Ew, XX, XX },
+ { "ltr", Ew, XX, XX },
+ { "verr", Ew, XX, XX },
+ { "verw", Ew, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX }
+ },
+ /* GRP7 */
+ {
+ { "sgdtIQ", VMX_Fixup, 0, XX, XX },
+ { "sidtIQ", PNI_Fixup, 0, XX, XX },
+ { "lgdt{Q|Q||}", M, XX, XX },
+ { "lidt{Q|Q||}", SVME_Fixup, 0, XX, XX },
+ { "smswQ", Ev, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "lmsw", Ew, XX, XX },
+ { "invlpg", INVLPG_Fixup, w_mode, XX, XX },
+ },
+ /* GRP8 */
+ {
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "btQ", Ev, Ib, XX },
+ { "btsQ", Ev, Ib, XX },
+ { "btrQ", Ev, Ib, XX },
+ { "btcQ", Ev, Ib, XX },
+ },
+ /* GRP9 */
+ {
+ { "(bad)", XX, XX, XX },
+ { "cmpxchg8b", Eq, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "", VM, XX, XX }, /* See OP_VMX. */
+ { "vmptrst", Eq, XX, XX },
+ },
+ /* GRP10 */
+ {
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "psrlw", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "psraw", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "psllw", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRP11 */
+ {
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "psrld", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "psrad", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "pslld", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRP12 */
+ {
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "psrlq", MS, Ib, XX },
+ { "psrldq", MS, Ib, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "psllq", MS, Ib, XX },
+ { "pslldq", MS, Ib, XX },
+ },
+ /* GRP13 */
+ {
+ { "fxsave", Ev, XX, XX },
+ { "fxrstor", Ev, XX, XX },
+ { "ldmxcsr", Ev, XX, XX },
+ { "stmxcsr", Ev, XX, XX },
- { "(bad)", XX, XX, XX },
- { "lfence", OP_0fae, 0, XX, XX },
++ { "xsave", Ev, XX, XX },
++ { "xrstor", OP_0fae, 0, XX, XX },
+ { "mfence", OP_0fae, 0, XX, XX },
+ { "clflush", OP_0fae, 0, XX, XX },
+ },
+ /* GRP14 */
+ {
+ { "prefetchnta", Ev, XX, XX },
+ { "prefetcht0", Ev, XX, XX },
+ { "prefetcht1", Ev, XX, XX },
+ { "prefetcht2", Ev, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRPAMD */
+ {
+ { "prefetch", Eb, XX, XX },
+ { "prefetchw", Eb, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* GRPPADLCK1 */
+ {
+ { "xstore-rng", OP_0f07, 0, XX, XX },
+ { "xcrypt-ecb", OP_0f07, 0, XX, XX },
+ { "xcrypt-cbc", OP_0f07, 0, XX, XX },
+ { "xcrypt-ctr", OP_0f07, 0, XX, XX },
+ { "xcrypt-cfb", OP_0f07, 0, XX, XX },
+ { "xcrypt-ofb", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ },
+ /* GRPPADLCK2 */
+ {
+ { "montmul", OP_0f07, 0, XX, XX },
+ { "xsha1", OP_0f07, 0, XX, XX },
+ { "xsha256", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ { "(bad)", OP_0f07, 0, XX, XX },
+ }
+};
+
+static const struct dis386 prefix_user_table[][4] = {
+ /* PREGRP0 */
+ {
+ { "addps", XM, EX, XX },
+ { "addss", XM, EX, XX },
+ { "addpd", XM, EX, XX },
+ { "addsd", XM, EX, XX },
+ },
+ /* PREGRP1 */
+ {
+ { "", XM, EX, OPSIMD }, /* See OP_SIMD_SUFFIX. */
+ { "", XM, EX, OPSIMD },
+ { "", XM, EX, OPSIMD },
+ { "", XM, EX, OPSIMD },
+ },
+ /* PREGRP2 */
+ {
+ { "cvtpi2ps", XM, EM, XX },
+ { "cvtsi2ssY", XM, Ev, XX },
+ { "cvtpi2pd", XM, EM, XX },
+ { "cvtsi2sdY", XM, Ev, XX },
+ },
+ /* PREGRP3 */
+ {
+ { "cvtps2pi", MX, EX, XX },
+ { "cvtss2siY", Gv, EX, XX },
+ { "cvtpd2pi", MX, EX, XX },
+ { "cvtsd2siY", Gv, EX, XX },
+ },
+ /* PREGRP4 */
+ {
+ { "cvttps2pi", MX, EX, XX },
+ { "cvttss2siY", Gv, EX, XX },
+ { "cvttpd2pi", MX, EX, XX },
+ { "cvttsd2siY", Gv, EX, XX },
+ },
+ /* PREGRP5 */
+ {
+ { "divps", XM, EX, XX },
+ { "divss", XM, EX, XX },
+ { "divpd", XM, EX, XX },
+ { "divsd", XM, EX, XX },
+ },
+ /* PREGRP6 */
+ {
+ { "maxps", XM, EX, XX },
+ { "maxss", XM, EX, XX },
+ { "maxpd", XM, EX, XX },
+ { "maxsd", XM, EX, XX },
+ },
+ /* PREGRP7 */
+ {
+ { "minps", XM, EX, XX },
+ { "minss", XM, EX, XX },
+ { "minpd", XM, EX, XX },
+ { "minsd", XM, EX, XX },
+ },
+ /* PREGRP8 */
+ {
+ { "movups", XM, EX, XX },
+ { "movss", XM, EX, XX },
+ { "movupd", XM, EX, XX },
+ { "movsd", XM, EX, XX },
+ },
+ /* PREGRP9 */
+ {
+ { "movups", EX, XM, XX },
+ { "movss", EX, XM, XX },
+ { "movupd", EX, XM, XX },
+ { "movsd", EX, XM, XX },
+ },
+ /* PREGRP10 */
+ {
+ { "mulps", XM, EX, XX },
+ { "mulss", XM, EX, XX },
+ { "mulpd", XM, EX, XX },
+ { "mulsd", XM, EX, XX },
+ },
+ /* PREGRP11 */
+ {
+ { "rcpps", XM, EX, XX },
+ { "rcpss", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP12 */
+ {
+ { "rsqrtps", XM, EX, XX },
+ { "rsqrtss", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP13 */
+ {
+ { "sqrtps", XM, EX, XX },
+ { "sqrtss", XM, EX, XX },
+ { "sqrtpd", XM, EX, XX },
+ { "sqrtsd", XM, EX, XX },
+ },
+ /* PREGRP14 */
+ {
+ { "subps", XM, EX, XX },
+ { "subss", XM, EX, XX },
+ { "subpd", XM, EX, XX },
+ { "subsd", XM, EX, XX },
+ },
+ /* PREGRP15 */
+ {
+ { "(bad)", XM, EX, XX },
+ { "cvtdq2pd", XM, EX, XX },
+ { "cvttpd2dq", XM, EX, XX },
+ { "cvtpd2dq", XM, EX, XX },
+ },
+ /* PREGRP16 */
+ {
+ { "cvtdq2ps", XM, EX, XX },
+ { "cvttps2dq",XM, EX, XX },
+ { "cvtps2dq",XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP17 */
+ {
+ { "cvtps2pd", XM, EX, XX },
+ { "cvtss2sd", XM, EX, XX },
+ { "cvtpd2ps", XM, EX, XX },
+ { "cvtsd2ss", XM, EX, XX },
+ },
+ /* PREGRP18 */
+ {
+ { "maskmovq", MX, MS, XX },
+ { "(bad)", XM, EX, XX },
+ { "maskmovdqu", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP19 */
+ {
+ { "movq", MX, EM, XX },
+ { "movdqu", XM, EX, XX },
+ { "movdqa", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP20 */
+ {
+ { "movq", EM, MX, XX },
+ { "movdqu", EX, XM, XX },
+ { "movdqa", EX, XM, XX },
+ { "(bad)", EX, XM, XX },
+ },
+ /* PREGRP21 */
+ {
+ { "(bad)", EX, XM, XX },
+ { "movq2dq", XM, MS, XX },
+ { "movq", EX, XM, XX },
+ { "movdq2q", MX, XS, XX },
+ },
+ /* PREGRP22 */
+ {
+ { "pshufw", MX, EM, Ib },
+ { "pshufhw", XM, EX, Ib },
+ { "pshufd", XM, EX, Ib },
+ { "pshuflw", XM, EX, Ib },
+ },
+ /* PREGRP23 */
+ {
+ { "movd", Edq, MX, XX },
+ { "movq", XM, EX, XX },
+ { "movd", Edq, XM, XX },
+ { "(bad)", Ed, XM, XX },
+ },
+ /* PREGRP24 */
+ {
+ { "(bad)", MX, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "punpckhqdq", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP25 */
+ {
+ { "movntq", EM, MX, XX },
+ { "(bad)", EM, XM, XX },
+ { "movntdq", EM, XM, XX },
+ { "(bad)", EM, XM, XX },
+ },
+ /* PREGRP26 */
+ {
+ { "(bad)", MX, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "punpcklqdq", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP27 */
+ {
+ { "(bad)", MX, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "addsubpd", XM, EX, XX },
+ { "addsubps", XM, EX, XX },
+ },
+ /* PREGRP28 */
+ {
+ { "(bad)", MX, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "haddpd", XM, EX, XX },
+ { "haddps", XM, EX, XX },
+ },
+ /* PREGRP29 */
+ {
+ { "(bad)", MX, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "hsubpd", XM, EX, XX },
+ { "hsubps", XM, EX, XX },
+ },
+ /* PREGRP30 */
+ {
+ { "movlpX", XM, EX, SIMD_Fixup, 'h' }, /* really only 2 operands */
+ { "movsldup", XM, EX, XX },
+ { "movlpd", XM, EX, XX },
+ { "movddup", XM, EX, XX },
+ },
+ /* PREGRP31 */
+ {
+ { "movhpX", XM, EX, SIMD_Fixup, 'l' },
+ { "movshdup", XM, EX, XX },
+ { "movhpd", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ },
+ /* PREGRP32 */
+ {
+ { "(bad)", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "(bad)", XM, EX, XX },
+ { "lddqu", XM, M, XX },
+ },
+};
+
+static const struct dis386 x86_64_table[][2] = {
+ {
+ { "arpl", Ew, Gw, XX },
+ { "movs{||lq|xd}", Gv, Ed, XX },
+ },
+};
+
+#ifdef __KERNEL__
+#define INTERNAL_DISASSEMBLER_ERROR "<internal disassembler error>"
+#else /* __KERNEL__ */
+#define INTERNAL_DISASSEMBLER_ERROR _("<internal disassembler error>")
+#endif /* __KERNEL__ */
+
+static void
+ckprefix (void)
+{
+ int newrex;
+ rex = 0;
+ prefixes = 0;
+ used_prefixes = 0;
+ rex_used = 0;
+ while (1)
+ {
+ FETCH_DATA (the_info, codep + 1);
+ newrex = 0;
+ switch (*codep)
+ {
+ /* REX prefixes family. */
+ case 0x40:
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x45:
+ case 0x46:
+ case 0x47:
+ case 0x48:
+ case 0x49:
+ case 0x4a:
+ case 0x4b:
+ case 0x4c:
+ case 0x4d:
+ case 0x4e:
+ case 0x4f:
+ if (mode_64bit)
+ newrex = *codep;
+ else
+ return;
+ break;
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ break;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ break;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ break;
+ case 0x2e:
+ prefixes |= PREFIX_CS;
+ break;
+ case 0x36:
+ prefixes |= PREFIX_SS;
+ break;
+ case 0x3e:
+ prefixes |= PREFIX_DS;
+ break;
+ case 0x26:
+ prefixes |= PREFIX_ES;
+ break;
+ case 0x64:
+ prefixes |= PREFIX_FS;
+ break;
+ case 0x65:
+ prefixes |= PREFIX_GS;
+ break;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ break;
+ case 0x67:
+ prefixes |= PREFIX_ADDR;
+ break;
+ case FWAIT_OPCODE:
+ /* fwait is really an instruction. If there are prefixes
+ before the fwait, they belong to the fwait, *not* to the
+ following instruction. */
+ if (prefixes)
+ {
+ prefixes |= PREFIX_FWAIT;
+ codep++;
+ return;
+ }
+ prefixes = PREFIX_FWAIT;
+ break;
+ default:
+ return;
+ }
+ /* Rex is ignored when followed by another prefix. */
+ if (rex)
+ {
+ oappend (prefix_name (rex, 0));
+ oappend (" ");
+ }
+ rex = newrex;
+ codep++;
+ }
+}
+
+/* Return the name of the prefix byte PREF, or NULL if PREF is not a
+ prefix byte. */
+
+static const char *
+prefix_name (int pref, int sizeflag)
+{
+ switch (pref)
+ {
+ /* REX prefixes family. */
+ case 0x40:
+ return "rex";
+ case 0x41:
+ return "rexZ";
+ case 0x42:
+ return "rexY";
+ case 0x43:
+ return "rexYZ";
+ case 0x44:
+ return "rexX";
+ case 0x45:
+ return "rexXZ";
+ case 0x46:
+ return "rexXY";
+ case 0x47:
+ return "rexXYZ";
+ case 0x48:
+ return "rex64";
+ case 0x49:
+ return "rex64Z";
+ case 0x4a:
+ return "rex64Y";
+ case 0x4b:
+ return "rex64YZ";
+ case 0x4c:
+ return "rex64X";
+ case 0x4d:
+ return "rex64XZ";
+ case 0x4e:
+ return "rex64XY";
+ case 0x4f:
+ return "rex64XYZ";
+ case 0xf3:
+ return "repz";
+ case 0xf2:
+ return "repnz";
+ case 0xf0:
+ return "lock";
+ case 0x2e:
+ return "cs";
+ case 0x36:
+ return "ss";
+ case 0x3e:
+ return "ds";
+ case 0x26:
+ return "es";
+ case 0x64:
+ return "fs";
+ case 0x65:
+ return "gs";
+ case 0x66:
+ return (sizeflag & DFLAG) ? "data16" : "data32";
+ case 0x67:
+ if (mode_64bit)
+ return (sizeflag & AFLAG) ? "addr32" : "addr64";
+ else
+ return (sizeflag & AFLAG) ? "addr16" : "addr32";
+ case FWAIT_OPCODE:
+ return "fwait";
+ default:
+ return NULL;
+ }
+}
+
+static char op1out[100], op2out[100], op3out[100];
+static int op_ad, op_index[3];
+static int two_source_ops;
+static bfd_vma op_address[3];
+static bfd_vma op_riprel[3];
+static bfd_vma start_pc;
+
+/*
+ * On the 386's of 1988, the maximum length of an instruction is 15 bytes.
+ * (see topic "Redundant prefixes" in the "Differences from 8086"
+ * section of the "Virtual 8086 Mode" chapter.)
+ * 'pc' should be the address of this instruction, it will
+ * be used to print the target address if this is a relative jump or call
+ * The function returns the length of this instruction in bytes.
+ */
+
+static char intel_syntax;
+static char open_char;
+static char close_char;
+static char separator_char;
+static char scale_char;
+
+/* Here for backwards compatibility. When gdb stops using
+ print_insn_i386_att and print_insn_i386_intel these functions can
+ disappear, and print_insn_i386 be merged into print_insn. */
+int
+print_insn_i386_att (bfd_vma pc, disassemble_info *info)
+{
+ intel_syntax = 0;
+
+ return print_insn (pc, info);
+}
+
+int
+print_insn_i386_intel (bfd_vma pc, disassemble_info *info)
+{
+ intel_syntax = 1;
+
+ return print_insn (pc, info);
+}
+
+int
+print_insn_i386 (bfd_vma pc, disassemble_info *info)
+{
+ intel_syntax = -1;
+
+ return print_insn (pc, info);
+}
+
+static int
+print_insn (bfd_vma pc, disassemble_info *info)
+{
+ const struct dis386 *dp;
+ int i;
+ char *first, *second, *third;
+ int needcomma;
+ unsigned char uses_SSE_prefix, uses_LOCK_prefix;
+ int sizeflag;
+ const char *p;
+ struct dis_private priv;
+
+ mode_64bit = (info->mach == bfd_mach_x86_64_intel_syntax
+ || info->mach == bfd_mach_x86_64);
+
+ if (intel_syntax == (char) -1)
+ intel_syntax = (info->mach == bfd_mach_i386_i386_intel_syntax
+ || info->mach == bfd_mach_x86_64_intel_syntax);
+
+ if (info->mach == bfd_mach_i386_i386
+ || info->mach == bfd_mach_x86_64
+ || info->mach == bfd_mach_i386_i386_intel_syntax
+ || info->mach == bfd_mach_x86_64_intel_syntax)
+ priv.orig_sizeflag = AFLAG | DFLAG;
+ else if (info->mach == bfd_mach_i386_i8086)
+ priv.orig_sizeflag = 0;
+ else
+ abort ();
+
+ for (p = info->disassembler_options; p != NULL; )
+ {
+ if (strncmp (p, "x86-64", 6) == 0)
+ {
+ mode_64bit = 1;
+ priv.orig_sizeflag = AFLAG | DFLAG;
+ }
+ else if (strncmp (p, "i386", 4) == 0)
+ {
+ mode_64bit = 0;
+ priv.orig_sizeflag = AFLAG | DFLAG;
+ }
+ else if (strncmp (p, "i8086", 5) == 0)
+ {
+ mode_64bit = 0;
+ priv.orig_sizeflag = 0;
+ }
+ else if (strncmp (p, "intel", 5) == 0)
+ {
+ intel_syntax = 1;
+ }
+ else if (strncmp (p, "att", 3) == 0)
+ {
+ intel_syntax = 0;
+ }
+ else if (strncmp (p, "addr", 4) == 0)
+ {
+ if (p[4] == '1' && p[5] == '6')
+ priv.orig_sizeflag &= ~AFLAG;
+ else if (p[4] == '3' && p[5] == '2')
+ priv.orig_sizeflag |= AFLAG;
+ }
+ else if (strncmp (p, "data", 4) == 0)
+ {
+ if (p[4] == '1' && p[5] == '6')
+ priv.orig_sizeflag &= ~DFLAG;
+ else if (p[4] == '3' && p[5] == '2')
+ priv.orig_sizeflag |= DFLAG;
+ }
+ else if (strncmp (p, "suffix", 6) == 0)
+ priv.orig_sizeflag |= SUFFIX_ALWAYS;
+
+ p = strchr (p, ',');
+ if (p != NULL)
+ p++;
+ }
+
+ if (intel_syntax)
+ {
+ names64 = intel_names64;
+ names32 = intel_names32;
+ names16 = intel_names16;
+ names8 = intel_names8;
+ names8rex = intel_names8rex;
+ names_seg = intel_names_seg;
+ index16 = intel_index16;
+ open_char = '[';
+ close_char = ']';
+ separator_char = '+';
+ scale_char = '*';
+ }
+ else
+ {
+ names64 = att_names64;
+ names32 = att_names32;
+ names16 = att_names16;
+ names8 = att_names8;
+ names8rex = att_names8rex;
+ names_seg = att_names_seg;
+ index16 = att_index16;
+ open_char = '(';
+ close_char = ')';
+ separator_char = ',';
+ scale_char = ',';
+ }
+
+ /* The output looks better if we put 7 bytes on a line, since that
+ puts most long word instructions on a single line. */
+ info->bytes_per_line = 7;
+
+ info->private_data = &priv;
+ priv.max_fetched = priv.the_buffer;
+ priv.insn_start = pc;
+
+ obuf[0] = 0;
+ op1out[0] = 0;
+ op2out[0] = 0;
+ op3out[0] = 0;
+
+ op_index[0] = op_index[1] = op_index[2] = -1;
+
+ the_info = info;
+ start_pc = pc;
+ start_codep = priv.the_buffer;
+ codep = priv.the_buffer;
+
+#ifndef __KERNEL__
+ if (setjmp (priv.bailout) != 0)
+ {
+ const char *name;
+
+ /* Getting here means we tried for data but didn't get it. That
+ means we have an incomplete instruction of some sort. Just
+ print the first byte as a prefix or a .byte pseudo-op. */
+ if (codep > priv.the_buffer)
+ {
+ name = prefix_name (priv.the_buffer[0], priv.orig_sizeflag);
+ if (name != NULL)
+ (*info->fprintf_func) (info->stream, "%s", name);
+ else
+ {
+ /* Just print the first byte as a .byte instruction. */
+ (*info->fprintf_func) (info->stream, ".byte 0x%x",
+ (unsigned int) priv.the_buffer[0]);
+ }
+
+ return 1;
+ }
+
+ return -1;
+ }
+#endif /* __KERNEL__ */
+
+ obufp = obuf;
+ ckprefix ();
+
+ insn_codep = codep;
+ sizeflag = priv.orig_sizeflag;
+
+ FETCH_DATA (info, codep + 1);
+ two_source_ops = (*codep == 0x62) || (*codep == 0xc8);
+
+ if ((prefixes & PREFIX_FWAIT)
+ && ((*codep < 0xd8) || (*codep > 0xdf)))
+ {
+ const char *name;
+
+ /* fwait not followed by floating point instruction. Print the
+ first prefix, which is probably fwait itself. */
+ name = prefix_name (priv.the_buffer[0], priv.orig_sizeflag);
+ if (name == NULL)
+ name = INTERNAL_DISASSEMBLER_ERROR;
+ (*info->fprintf_func) (info->stream, "%s", name);
+ return 1;
+ }
+
+ if (*codep == 0x0f)
+ {
+ FETCH_DATA (info, codep + 2);
+ dp = &dis386_twobyte[*++codep];
+ need_modrm = twobyte_has_modrm[*codep];
+ uses_SSE_prefix = twobyte_uses_SSE_prefix[*codep];
+ uses_LOCK_prefix = (*codep & ~0x02) == 0x20;
+ }
+ else
+ {
+ dp = &dis386[*codep];
+ need_modrm = onebyte_has_modrm[*codep];
+ uses_SSE_prefix = 0;
+ uses_LOCK_prefix = 0;
+ }
+ codep++;
+
+ if (!uses_SSE_prefix && (prefixes & PREFIX_REPZ))
+ {
+ oappend ("repz ");
+ used_prefixes |= PREFIX_REPZ;
+ }
+ if (!uses_SSE_prefix && (prefixes & PREFIX_REPNZ))
+ {
+ oappend ("repnz ");
+ used_prefixes |= PREFIX_REPNZ;
+ }
+ if (!uses_LOCK_prefix && (prefixes & PREFIX_LOCK))
+ {
+ oappend ("lock ");
+ used_prefixes |= PREFIX_LOCK;
+ }
+
+ if (prefixes & PREFIX_ADDR)
+ {
+ sizeflag ^= AFLAG;
+ if (dp->bytemode3 != loop_jcxz_mode || intel_syntax)
+ {
+ if ((sizeflag & AFLAG) || mode_64bit)
+ oappend ("addr32 ");
+ else
+ oappend ("addr16 ");
+ used_prefixes |= PREFIX_ADDR;
+ }
+ }
+
+ if (!uses_SSE_prefix && (prefixes & PREFIX_DATA))
+ {
+ sizeflag ^= DFLAG;
+ if (dp->bytemode3 == cond_jump_mode
+ && dp->bytemode1 == v_mode
+ && !intel_syntax)
+ {
+ if (sizeflag & DFLAG)
+ oappend ("data32 ");
+ else
+ oappend ("data16 ");
+ used_prefixes |= PREFIX_DATA;
+ }
+ }
+
+ if (need_modrm)
+ {
+ FETCH_DATA (info, codep + 1);
+ mod = (*codep >> 6) & 3;
+ reg = (*codep >> 3) & 7;
+ rm = *codep & 7;
+ }
+
+ if (dp->name == NULL && dp->bytemode1 == FLOATCODE)
+ {
+ dofloat (sizeflag);
+ }
+ else
+ {
+ int index;
+ if (dp->name == NULL)
+ {
+ switch (dp->bytemode1)
+ {
+ case USE_GROUPS:
+ dp = &grps[dp->bytemode2][reg];
+ break;
+
+ case USE_PREFIX_USER_TABLE:
+ index = 0;
+ used_prefixes |= (prefixes & PREFIX_REPZ);
+ if (prefixes & PREFIX_REPZ)
+ index = 1;
+ else
+ {
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (prefixes & PREFIX_DATA)
+ index = 2;
+ else
+ {
+ used_prefixes |= (prefixes & PREFIX_REPNZ);
+ if (prefixes & PREFIX_REPNZ)
+ index = 3;
+ }
+ }
+ dp = &prefix_user_table[dp->bytemode2][index];
+ break;
+
+ case X86_64_SPECIAL:
+ dp = &x86_64_table[dp->bytemode2][mode_64bit];
+ break;
+
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ break;
+ }
+ }
+
+ if (putop (dp->name, sizeflag) == 0)
+ {
+ obufp = op1out;
+ op_ad = 2;
+ if (dp->op1)
+ (*dp->op1) (dp->bytemode1, sizeflag);
+
+ obufp = op2out;
+ op_ad = 1;
+ if (dp->op2)
+ (*dp->op2) (dp->bytemode2, sizeflag);
+
+ obufp = op3out;
+ op_ad = 0;
+ if (dp->op3)
+ (*dp->op3) (dp->bytemode3, sizeflag);
+ }
+ }
+
+ /* See if any prefixes were not used. If so, print the first one
+ separately. If we don't do this, we'll wind up printing an
+ instruction stream which does not precisely correspond to the
+ bytes we are disassembling. */
+ if ((prefixes & ~used_prefixes) != 0)
+ {
+ const char *name;
+
+ name = prefix_name (priv.the_buffer[0], priv.orig_sizeflag);
+ if (name == NULL)
+ name = INTERNAL_DISASSEMBLER_ERROR;
+ (*info->fprintf_func) (info->stream, "%s", name);
+ return 1;
+ }
+ if (rex & ~rex_used)
+ {
+ const char *name;
+ name = prefix_name (rex | 0x40, priv.orig_sizeflag);
+ if (name == NULL)
+ name = INTERNAL_DISASSEMBLER_ERROR;
+ (*info->fprintf_func) (info->stream, "%s ", name);
+ }
+
+ obufp = obuf + strlen (obuf);
+ for (i = strlen (obuf); i < 6; i++)
+ oappend (" ");
+ oappend (" ");
+ (*info->fprintf_func) (info->stream, "%s", obuf);
+
+ /* The enter and bound instructions are printed with operands in the same
+ order as the intel book; everything else is printed in reverse order. */
+ if (intel_syntax || two_source_ops)
+ {
+ first = op1out;
+ second = op2out;
+ third = op3out;
+ op_ad = op_index[0];
+ op_index[0] = op_index[2];
+ op_index[2] = op_ad;
+ }
+ else
+ {
+ first = op3out;
+ second = op2out;
+ third = op1out;
+ }
+ needcomma = 0;
+ if (*first)
+ {
+ if (op_index[0] != -1 && !op_riprel[0])
+ (*info->print_address_func) ((bfd_vma) op_address[op_index[0]], info);
+ else
+ (*info->fprintf_func) (info->stream, "%s", first);
+ needcomma = 1;
+ }
+ if (*second)
+ {
+ if (needcomma)
+ (*info->fprintf_func) (info->stream, ",");
+ if (op_index[1] != -1 && !op_riprel[1])
+ (*info->print_address_func) ((bfd_vma) op_address[op_index[1]], info);
+ else
+ (*info->fprintf_func) (info->stream, "%s", second);
+ needcomma = 1;
+ }
+ if (*third)
+ {
+ if (needcomma)
+ (*info->fprintf_func) (info->stream, ",");
+ if (op_index[2] != -1 && !op_riprel[2])
+ (*info->print_address_func) ((bfd_vma) op_address[op_index[2]], info);
+ else
+ (*info->fprintf_func) (info->stream, "%s", third);
+ }
+ for (i = 0; i < 3; i++)
+ if (op_index[i] != -1 && op_riprel[i])
+ {
+ (*info->fprintf_func) (info->stream, " # ");
+ (*info->print_address_func) ((bfd_vma) (start_pc + codep - start_codep
+ + op_address[op_index[i]]), info);
+ }
+ return codep - priv.the_buffer;
+}
+
+static const char *float_mem[] = {
+ /* d8 */
+ "fadd{s||s|}",
+ "fmul{s||s|}",
+ "fcom{s||s|}",
+ "fcomp{s||s|}",
+ "fsub{s||s|}",
+ "fsubr{s||s|}",
+ "fdiv{s||s|}",
+ "fdivr{s||s|}",
+ /* d9 */
+ "fld{s||s|}",
+ "(bad)",
+ "fst{s||s|}",
+ "fstp{s||s|}",
+ "fldenvIC",
+ "fldcw",
+ "fNstenvIC",
+ "fNstcw",
+ /* da */
+ "fiadd{l||l|}",
+ "fimul{l||l|}",
+ "ficom{l||l|}",
+ "ficomp{l||l|}",
+ "fisub{l||l|}",
+ "fisubr{l||l|}",
+ "fidiv{l||l|}",
+ "fidivr{l||l|}",
+ /* db */
+ "fild{l||l|}",
+ "fisttp{l||l|}",
+ "fist{l||l|}",
+ "fistp{l||l|}",
+ "(bad)",
+ "fld{t||t|}",
+ "(bad)",
+ "fstp{t||t|}",
+ /* dc */
+ "fadd{l||l|}",
+ "fmul{l||l|}",
+ "fcom{l||l|}",
+ "fcomp{l||l|}",
+ "fsub{l||l|}",
+ "fsubr{l||l|}",
+ "fdiv{l||l|}",
+ "fdivr{l||l|}",
+ /* dd */
+ "fld{l||l|}",
+ "fisttp{ll||ll|}",
+ "fst{l||l|}",
+ "fstp{l||l|}",
+ "frstorIC",
+ "(bad)",
+ "fNsaveIC",
+ "fNstsw",
+ /* de */
+ "fiadd",
+ "fimul",
+ "ficom",
+ "ficomp",
+ "fisub",
+ "fisubr",
+ "fidiv",
+ "fidivr",
+ /* df */
+ "fild",
+ "fisttp",
+ "fist",
+ "fistp",
+ "fbld",
+ "fild{ll||ll|}",
+ "fbstp",
+ "fistp{ll||ll|}",
+};
+
+static const unsigned char float_mem_mode[] = {
+ /* d8 */
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ /* d9 */
+ d_mode,
+ 0,
+ d_mode,
+ d_mode,
+ 0,
+ w_mode,
+ 0,
+ w_mode,
+ /* da */
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ /* db */
+ d_mode,
+ d_mode,
+ d_mode,
+ d_mode,
+ 0,
+ t_mode,
+ 0,
+ t_mode,
+ /* dc */
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ /* dd */
+ q_mode,
+ q_mode,
+ q_mode,
+ q_mode,
+ 0,
+ 0,
+ 0,
+ w_mode,
+ /* de */
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ /* df */
+ w_mode,
+ w_mode,
+ w_mode,
+ w_mode,
+ t_mode,
+ q_mode,
+ t_mode,
+ q_mode
+};
+
+#define ST OP_ST, 0
+#define STi OP_STi, 0
+
+#define FGRPd9_2 NULL, NULL, 0, NULL, 0, NULL, 0
+#define FGRPd9_4 NULL, NULL, 1, NULL, 0, NULL, 0
+#define FGRPd9_5 NULL, NULL, 2, NULL, 0, NULL, 0
+#define FGRPd9_6 NULL, NULL, 3, NULL, 0, NULL, 0
+#define FGRPd9_7 NULL, NULL, 4, NULL, 0, NULL, 0
+#define FGRPda_5 NULL, NULL, 5, NULL, 0, NULL, 0
+#define FGRPdb_4 NULL, NULL, 6, NULL, 0, NULL, 0
+#define FGRPde_3 NULL, NULL, 7, NULL, 0, NULL, 0
+#define FGRPdf_4 NULL, NULL, 8, NULL, 0, NULL, 0
+
+static const struct dis386 float_reg[][8] = {
+ /* d8 */
+ {
+ { "fadd", ST, STi, XX },
+ { "fmul", ST, STi, XX },
+ { "fcom", STi, XX, XX },
+ { "fcomp", STi, XX, XX },
+ { "fsub", ST, STi, XX },
+ { "fsubr", ST, STi, XX },
+ { "fdiv", ST, STi, XX },
+ { "fdivr", ST, STi, XX },
+ },
+ /* d9 */
+ {
+ { "fld", STi, XX, XX },
+ { "fxch", STi, XX, XX },
+ { FGRPd9_2 },
+ { "(bad)", XX, XX, XX },
+ { FGRPd9_4 },
+ { FGRPd9_5 },
+ { FGRPd9_6 },
+ { FGRPd9_7 },
+ },
+ /* da */
+ {
+ { "fcmovb", ST, STi, XX },
+ { "fcmove", ST, STi, XX },
+ { "fcmovbe",ST, STi, XX },
+ { "fcmovu", ST, STi, XX },
+ { "(bad)", XX, XX, XX },
+ { FGRPda_5 },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* db */
+ {
+ { "fcmovnb",ST, STi, XX },
+ { "fcmovne",ST, STi, XX },
+ { "fcmovnbe",ST, STi, XX },
+ { "fcmovnu",ST, STi, XX },
+ { FGRPdb_4 },
+ { "fucomi", ST, STi, XX },
+ { "fcomi", ST, STi, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* dc */
+ {
+ { "fadd", STi, ST, XX },
+ { "fmul", STi, ST, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+#if UNIXWARE_COMPAT
+ { "fsub", STi, ST, XX },
+ { "fsubr", STi, ST, XX },
+ { "fdiv", STi, ST, XX },
+ { "fdivr", STi, ST, XX },
+#else
+ { "fsubr", STi, ST, XX },
+ { "fsub", STi, ST, XX },
+ { "fdivr", STi, ST, XX },
+ { "fdiv", STi, ST, XX },
+#endif
+ },
+ /* dd */
+ {
+ { "ffree", STi, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "fst", STi, XX, XX },
+ { "fstp", STi, XX, XX },
+ { "fucom", STi, XX, XX },
+ { "fucomp", STi, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ },
+ /* de */
+ {
+ { "faddp", STi, ST, XX },
+ { "fmulp", STi, ST, XX },
+ { "(bad)", XX, XX, XX },
+ { FGRPde_3 },
+#if UNIXWARE_COMPAT
+ { "fsubp", STi, ST, XX },
+ { "fsubrp", STi, ST, XX },
+ { "fdivp", STi, ST, XX },
+ { "fdivrp", STi, ST, XX },
+#else
+ { "fsubrp", STi, ST, XX },
+ { "fsubp", STi, ST, XX },
+ { "fdivrp", STi, ST, XX },
+ { "fdivp", STi, ST, XX },
+#endif
+ },
+ /* df */
+ {
+ { "ffreep", STi, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { "(bad)", XX, XX, XX },
+ { FGRPdf_4 },
+ { "fucomip",ST, STi, XX },
+ { "fcomip", ST, STi, XX },
+ { "(bad)", XX, XX, XX },
+ },
+};
+
+static char *fgrps[][8] = {
+ /* d9_2 0 */
+ {
+ "fnop","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)",
+ },
+
+ /* d9_4 1 */
+ {
+ "fchs","fabs","(bad)","(bad)","ftst","fxam","(bad)","(bad)",
+ },
+
+ /* d9_5 2 */
+ {
+ "fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz","(bad)",
+ },
+
+ /* d9_6 3 */
+ {
+ "f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp",
+ },
+
+ /* d9_7 4 */
+ {
+ "fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos",
+ },
+
+ /* da_5 5 */
+ {
+ "(bad)","fucompp","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)",
+ },
+
+ /* db_4 6 */
+ {
+ "feni(287 only)","fdisi(287 only)","fNclex","fNinit",
+ "fNsetpm(287 only)","(bad)","(bad)","(bad)",
+ },
+
+ /* de_3 7 */
+ {
+ "(bad)","fcompp","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)",
+ },
+
+ /* df_4 8 */
+ {
+ "fNstsw","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)",
+ },
+};
+
+static void
+dofloat (int sizeflag)
+{
+ const struct dis386 *dp;
+ unsigned char floatop;
+
+ floatop = codep[-1];
+
+ if (mod != 3)
+ {
+ int fp_indx = (floatop - 0xd8) * 8 + reg;
+
+ putop (float_mem[fp_indx], sizeflag);
+ obufp = op1out;
+ OP_E (float_mem_mode[fp_indx], sizeflag);
+ return;
+ }
+ /* Skip mod/rm byte. */
+ MODRM_CHECK;
+ codep++;
+
+ dp = &float_reg[floatop - 0xd8][reg];
+ if (dp->name == NULL)
+ {
+ putop (fgrps[dp->bytemode1][rm], sizeflag);
+
+ /* Instruction fnstsw is only one with strange arg. */
+ if (floatop == 0xdf && codep[-1] == 0xe0)
+ strcpy (op1out, names16[0]);
+ }
+ else
+ {
+ putop (dp->name, sizeflag);
+
+ obufp = op1out;
+ if (dp->op1)
+ (*dp->op1) (dp->bytemode1, sizeflag);
+ obufp = op2out;
+ if (dp->op2)
+ (*dp->op2) (dp->bytemode2, sizeflag);
+ }
+}
+
+static void
+OP_ST (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ oappend ("%st");
+}
+
+static void
+OP_STi (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ sprintf (scratchbuf, "%%st(%d)", rm);
+ oappend (scratchbuf + intel_syntax);
+}
+
+/* Capital letters in template are macros. */
+static int
+putop (const char *template, int sizeflag)
+{
+ const char *p;
+ int alt = 0;
+
+ for (p = template; *p; p++)
+ {
+ switch (*p)
+ {
+ default:
+ *obufp++ = *p;
+ break;
+ case '{':
+ alt = 0;
+ if (intel_syntax)
+ alt += 1;
+ if (mode_64bit)
+ alt += 2;
+ while (alt != 0)
+ {
+ while (*++p != '|')
+ {
+ if (*p == '}')
+ {
+ /* Alternative not valid. */
+ strcpy (obuf, "(bad)");
+ obufp = obuf + 5;
+ return 1;
+ }
+ else if (*p == '\0')
+ abort ();
+ }
+ alt--;
+ }
+ /* Fall through. */
+ case 'I':
+ alt = 1;
+ continue;
+ case '|':
+ while (*++p != '}')
+ {
+ if (*p == '\0')
+ abort ();
+ }
+ break;
+ case '}':
+ break;
+ case 'A':
+ if (intel_syntax)
+ break;
+ if (mod != 3 || (sizeflag & SUFFIX_ALWAYS))
+ *obufp++ = 'b';
+ break;
+ case 'B':
+ if (intel_syntax)
+ break;
+ if (sizeflag & SUFFIX_ALWAYS)
+ *obufp++ = 'b';
+ break;
+ case 'C':
+ if (intel_syntax && !alt)
+ break;
+ if ((prefixes & PREFIX_DATA) || (sizeflag & SUFFIX_ALWAYS))
+ {
+ if (sizeflag & DFLAG)
+ *obufp++ = intel_syntax ? 'd' : 'l';
+ else
+ *obufp++ = intel_syntax ? 'w' : 's';
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ break;
+ case 'E': /* For jcxz/jecxz */
+ if (mode_64bit)
+ {
+ if (sizeflag & AFLAG)
+ *obufp++ = 'r';
+ else
+ *obufp++ = 'e';
+ }
+ else
+ if (sizeflag & AFLAG)
+ *obufp++ = 'e';
+ used_prefixes |= (prefixes & PREFIX_ADDR);
+ break;
+ case 'F':
+ if (intel_syntax)
+ break;
+ if ((prefixes & PREFIX_ADDR) || (sizeflag & SUFFIX_ALWAYS))
+ {
+ if (sizeflag & AFLAG)
+ *obufp++ = mode_64bit ? 'q' : 'l';
+ else
+ *obufp++ = mode_64bit ? 'l' : 'w';
+ used_prefixes |= (prefixes & PREFIX_ADDR);
+ }
+ break;
+ case 'H':
+ if (intel_syntax)
+ break;
+ if ((prefixes & (PREFIX_CS | PREFIX_DS)) == PREFIX_CS
+ || (prefixes & (PREFIX_CS | PREFIX_DS)) == PREFIX_DS)
+ {
+ used_prefixes |= prefixes & (PREFIX_CS | PREFIX_DS);
+ *obufp++ = ',';
+ *obufp++ = 'p';
+ if (prefixes & PREFIX_DS)
+ *obufp++ = 't';
+ else
+ *obufp++ = 'n';
+ }
+ break;
+ case 'J':
+ if (intel_syntax)
+ break;
+ *obufp++ = 'l';
+ break;
+ case 'L':
+ if (intel_syntax)
+ break;
+ if (sizeflag & SUFFIX_ALWAYS)
+ *obufp++ = 'l';
+ break;
+ case 'N':
+ if ((prefixes & PREFIX_FWAIT) == 0)
+ *obufp++ = 'n';
+ else
+ used_prefixes |= PREFIX_FWAIT;
+ break;
+ case 'O':
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ *obufp++ = 'o';
+ else
+ *obufp++ = 'd';
+ break;
+ case 'T':
+ if (intel_syntax)
+ break;
+ if (mode_64bit)
+ {
+ *obufp++ = 'q';
+ break;
+ }
+ /* Fall through. */
+ case 'P':
+ if (intel_syntax)
+ break;
+ if ((prefixes & PREFIX_DATA)
+ || (rex & REX_MODE64)
+ || (sizeflag & SUFFIX_ALWAYS))
+ {
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ *obufp++ = 'q';
+ else
+ {
+ if (sizeflag & DFLAG)
+ *obufp++ = 'l';
+ else
+ *obufp++ = 'w';
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ }
+ break;
+ case 'U':
+ if (intel_syntax)
+ break;
+ if (mode_64bit)
+ {
+ *obufp++ = 'q';
+ break;
+ }
+ /* Fall through. */
+ case 'Q':
+ if (intel_syntax && !alt)
+ break;
+ USED_REX (REX_MODE64);
+ if (mod != 3 || (sizeflag & SUFFIX_ALWAYS))
+ {
+ if (rex & REX_MODE64)
+ *obufp++ = 'q';
+ else
+ {
+ if (sizeflag & DFLAG)
+ *obufp++ = intel_syntax ? 'd' : 'l';
+ else
+ *obufp++ = 'w';
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ }
+ break;
+ case 'R':
+ USED_REX (REX_MODE64);
+ if (intel_syntax)
+ {
+ if (rex & REX_MODE64)
+ {
+ *obufp++ = 'q';
+ *obufp++ = 't';
+ }
+ else if (sizeflag & DFLAG)
+ {
+ *obufp++ = 'd';
+ *obufp++ = 'q';
+ }
+ else
+ {
+ *obufp++ = 'w';
+ *obufp++ = 'd';
+ }
+ }
+ else
+ {
+ if (rex & REX_MODE64)
+ *obufp++ = 'q';
+ else if (sizeflag & DFLAG)
+ *obufp++ = 'l';
+ else
+ *obufp++ = 'w';
+ }
+ if (!(rex & REX_MODE64))
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case 'S':
+ if (intel_syntax)
+ break;
+ if (sizeflag & SUFFIX_ALWAYS)
+ {
+ if (rex & REX_MODE64)
+ *obufp++ = 'q';
+ else
+ {
+ if (sizeflag & DFLAG)
+ *obufp++ = 'l';
+ else
+ *obufp++ = 'w';
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ }
+ break;
+ case 'X':
+ if (prefixes & PREFIX_DATA)
+ *obufp++ = 'd';
+ else
+ *obufp++ = 's';
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case 'Y':
+ if (intel_syntax)
+ break;
+ if (rex & REX_MODE64)
+ {
+ USED_REX (REX_MODE64);
+ *obufp++ = 'q';
+ }
+ break;
+ /* implicit operand size 'l' for i386 or 'q' for x86-64 */
+ case 'W':
+ /* operand size flag for cwtl, cbtw */
+ USED_REX (0);
+ if (rex)
+ *obufp++ = 'l';
+ else if (sizeflag & DFLAG)
+ *obufp++ = 'w';
+ else
+ *obufp++ = 'b';
+ if (intel_syntax)
+ {
+ if (rex)
+ {
+ *obufp++ = 'q';
+ *obufp++ = 'e';
+ }
+ if (sizeflag & DFLAG)
+ {
+ *obufp++ = 'd';
+ *obufp++ = 'e';
+ }
+ else
+ {
+ *obufp++ = 'w';
+ }
+ }
+ if (!rex)
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ }
+ alt = 0;
+ }
+ *obufp = 0;
+ return 0;
+}
+
+static void
+oappend (const char *s)
+{
+ strcpy (obufp, s);
+ obufp += strlen (s);
+}
+
+static void
+append_seg (void)
+{
+ if (prefixes & PREFIX_CS)
+ {
+ used_prefixes |= PREFIX_CS;
+ oappend ("%cs:" + intel_syntax);
+ }
+ if (prefixes & PREFIX_DS)
+ {
+ used_prefixes |= PREFIX_DS;
+ oappend ("%ds:" + intel_syntax);
+ }
+ if (prefixes & PREFIX_SS)
+ {
+ used_prefixes |= PREFIX_SS;
+ oappend ("%ss:" + intel_syntax);
+ }
+ if (prefixes & PREFIX_ES)
+ {
+ used_prefixes |= PREFIX_ES;
+ oappend ("%es:" + intel_syntax);
+ }
+ if (prefixes & PREFIX_FS)
+ {
+ used_prefixes |= PREFIX_FS;
+ oappend ("%fs:" + intel_syntax);
+ }
+ if (prefixes & PREFIX_GS)
+ {
+ used_prefixes |= PREFIX_GS;
+ oappend ("%gs:" + intel_syntax);
+ }
+}
+
+static void
+OP_indirE (int bytemode, int sizeflag)
+{
+ if (!intel_syntax)
+ oappend ("*");
+ OP_E (bytemode, sizeflag);
+}
+
+static void
+print_operand_value (char *buf, int hex, bfd_vma disp)
+{
+ if (mode_64bit)
+ {
+ if (hex)
+ {
+ char tmp[30];
+ int i;
+ buf[0] = '0';
+ buf[1] = 'x';
+ sprintf_vma (tmp, disp);
+ for (i = 0; tmp[i] == '0' && tmp[i + 1]; i++);
+ strcpy (buf + 2, tmp + i);
+ }
+ else
+ {
+ bfd_signed_vma v = disp;
+ char tmp[30];
+ int i;
+ if (v < 0)
+ {
+ *(buf++) = '-';
+ v = -disp;
+ /* Check for possible overflow on 0x8000000000000000. */
+ if (v < 0)
+ {
+ strcpy (buf, "9223372036854775808");
+ return;
+ }
+ }
+ if (!v)
+ {
+ strcpy (buf, "0");
+ return;
+ }
+
+ i = 0;
+ tmp[29] = 0;
+ while (v)
+ {
+ tmp[28 - i] = (v % 10) + '0';
+ v /= 10;
+ i++;
+ }
+ strcpy (buf, tmp + 29 - i);
+ }
+ }
+ else
+ {
+ if (hex)
+ sprintf (buf, "0x%x", (unsigned int) disp);
+ else
+ sprintf (buf, "%d", (int) disp);
+ }
+}
+
+static void
+OP_E (int bytemode, int sizeflag)
+{
+ bfd_vma disp;
+ int add = 0;
+ int riprel = 0;
+ USED_REX (REX_EXTZ);
+ if (rex & REX_EXTZ)
+ add += 8;
+
+ /* Skip mod/rm byte. */
+ MODRM_CHECK;
+ codep++;
+
+ if (mod == 3)
+ {
+ switch (bytemode)
+ {
+ case b_mode:
+ USED_REX (0);
+ if (rex)
+ oappend (names8rex[rm + add]);
+ else
+ oappend (names8[rm + add]);
+ break;
+ case w_mode:
+ oappend (names16[rm + add]);
+ break;
+ case d_mode:
+ oappend (names32[rm + add]);
+ break;
+ case q_mode:
+ oappend (names64[rm + add]);
+ break;
+ case m_mode:
+ if (mode_64bit)
+ oappend (names64[rm + add]);
+ else
+ oappend (names32[rm + add]);
+ break;
+ case branch_v_mode:
+ if (mode_64bit)
+ oappend (names64[rm + add]);
+ else
+ {
+ if ((sizeflag & DFLAG) || bytemode != branch_v_mode)
+ oappend (names32[rm + add]);
+ else
+ oappend (names16[rm + add]);
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ break;
+ case v_mode:
+ case dq_mode:
+ case dqw_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ oappend (names64[rm + add]);
+ else if ((sizeflag & DFLAG) || bytemode != v_mode)
+ oappend (names32[rm + add]);
+ else
+ oappend (names16[rm + add]);
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case 0:
+ break;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ break;
+ }
+ return;
+ }
+
+ disp = 0;
+ append_seg ();
+
+ if ((sizeflag & AFLAG) || mode_64bit) /* 32 bit address mode */
+ {
+ int havesib;
+ int havebase;
+ int base;
+ int index = 0;
+ int scale = 0;
+
+ havesib = 0;
+ havebase = 1;
+ base = rm;
+
+ if (base == 4)
+ {
+ havesib = 1;
+ FETCH_DATA (the_info, codep + 1);
+ index = (*codep >> 3) & 7;
+ if (mode_64bit || index != 0x4)
+ /* When INDEX == 0x4 in 32 bit mode, SCALE is ignored. */
+ scale = (*codep >> 6) & 3;
+ base = *codep & 7;
+ USED_REX (REX_EXTY);
+ if (rex & REX_EXTY)
+ index += 8;
+ codep++;
+ }
+ base += add;
+
+ switch (mod)
+ {
+ case 0:
+ if ((base & 7) == 5)
+ {
+ havebase = 0;
+ if (mode_64bit && !havesib)
+ riprel = 1;
+ disp = get32s ();
+ }
+ break;
+ case 1:
+ FETCH_DATA (the_info, codep + 1);
+ disp = *codep++;
+ if ((disp & 0x80) != 0)
+ disp -= 0x100;
+ break;
+ case 2:
+ disp = get32s ();
+ break;
+ }
+
+ if (!intel_syntax)
+ if (mod != 0 || (base & 7) == 5)
+ {
+ print_operand_value (scratchbuf, !riprel, disp);
+ oappend (scratchbuf);
+ if (riprel)
+ {
+ set_op (disp, 1);
+ oappend ("(%rip)");
+ }
+ }
+
+ if (havebase || (havesib && (index != 4 || scale != 0)))
+ {
+ if (intel_syntax)
+ {
+ switch (bytemode)
+ {
+ case b_mode:
+ oappend ("BYTE PTR ");
+ break;
+ case w_mode:
+ case dqw_mode:
+ oappend ("WORD PTR ");
+ break;
+ case branch_v_mode:
+ case v_mode:
+ case dq_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ oappend ("QWORD PTR ");
+ else if ((sizeflag & DFLAG) || bytemode == dq_mode)
+ oappend ("DWORD PTR ");
+ else
+ oappend ("WORD PTR ");
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case d_mode:
+ oappend ("DWORD PTR ");
+ break;
+ case q_mode:
+ oappend ("QWORD PTR ");
+ break;
+ case m_mode:
+ if (mode_64bit)
+ oappend ("QWORD PTR ");
+ else
+ oappend ("DWORD PTR ");
+ break;
+ case f_mode:
+ if (sizeflag & DFLAG)
+ {
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ oappend ("FWORD PTR ");
+ }
+ else
+ oappend ("DWORD PTR ");
+ break;
+ case t_mode:
+ oappend ("TBYTE PTR ");
+ break;
+ case x_mode:
+ oappend ("XMMWORD PTR ");
+ break;
+ default:
+ break;
+ }
+ }
+ *obufp++ = open_char;
+ if (intel_syntax && riprel)
+ oappend ("rip + ");
+ *obufp = '\0';
+ if (havebase)
+ oappend (mode_64bit && (sizeflag & AFLAG)
+ ? names64[base] : names32[base]);
+ if (havesib)
+ {
+ if (index != 4)
+ {
+ if (!intel_syntax || havebase)
+ {
+ *obufp++ = separator_char;
+ *obufp = '\0';
+ }
+ oappend (mode_64bit && (sizeflag & AFLAG)
+ ? names64[index] : names32[index]);
+ }
+ if (scale != 0 || (!intel_syntax && index != 4))
+ {
+ *obufp++ = scale_char;
+ *obufp = '\0';
+ sprintf (scratchbuf, "%d", 1 << scale);
+ oappend (scratchbuf);
+ }
+ }
+ if (intel_syntax && disp)
+ {
+ if ((bfd_signed_vma) disp > 0)
+ {
+ *obufp++ = '+';
+ *obufp = '\0';
+ }
+ else if (mod != 1)
+ {
+ *obufp++ = '-';
+ *obufp = '\0';
+ disp = - (bfd_signed_vma) disp;
+ }
+
+ print_operand_value (scratchbuf, mod != 1, disp);
+ oappend (scratchbuf);
+ }
+
+ *obufp++ = close_char;
+ *obufp = '\0';
+ }
+ else if (intel_syntax)
+ {
+ if (mod != 0 || (base & 7) == 5)
+ {
+ if (prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS
+ | PREFIX_ES | PREFIX_FS | PREFIX_GS))
+ ;
+ else
+ {
+ oappend (names_seg[ds_reg - es_reg]);
+ oappend (":");
+ }
+ print_operand_value (scratchbuf, 1, disp);
+ oappend (scratchbuf);
+ }
+ }
+ }
+ else
+ { /* 16 bit address mode */
+ switch (mod)
+ {
+ case 0:
+ if (rm == 6)
+ {
+ disp = get16 ();
+ if ((disp & 0x8000) != 0)
+ disp -= 0x10000;
+ }
+ break;
+ case 1:
+ FETCH_DATA (the_info, codep + 1);
+ disp = *codep++;
+ if ((disp & 0x80) != 0)
+ disp -= 0x100;
+ break;
+ case 2:
+ disp = get16 ();
+ if ((disp & 0x8000) != 0)
+ disp -= 0x10000;
+ break;
+ }
+
+ if (!intel_syntax)
+ if (mod != 0 || rm == 6)
+ {
+ print_operand_value (scratchbuf, 0, disp);
+ oappend (scratchbuf);
+ }
+
+ if (mod != 0 || rm != 6)
+ {
+ *obufp++ = open_char;
+ *obufp = '\0';
+ oappend (index16[rm]);
+ if (intel_syntax && disp)
+ {
+ if ((bfd_signed_vma) disp > 0)
+ {
+ *obufp++ = '+';
+ *obufp = '\0';
+ }
+ else if (mod != 1)
+ {
+ *obufp++ = '-';
+ *obufp = '\0';
+ disp = - (bfd_signed_vma) disp;
+ }
+
+ print_operand_value (scratchbuf, mod != 1, disp);
+ oappend (scratchbuf);
+ }
+
+ *obufp++ = close_char;
+ *obufp = '\0';
+ }
+ else if (intel_syntax)
+ {
+ if (prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS
+ | PREFIX_ES | PREFIX_FS | PREFIX_GS))
+ ;
+ else
+ {
+ oappend (names_seg[ds_reg - es_reg]);
+ oappend (":");
+ }
+ print_operand_value (scratchbuf, 1, disp & 0xffff);
+ oappend (scratchbuf);
+ }
+ }
+}
+
+static void
+OP_G (int bytemode, int sizeflag)
+{
+ int add = 0;
+ USED_REX (REX_EXTX);
+ if (rex & REX_EXTX)
+ add += 8;
+ switch (bytemode)
+ {
+ case b_mode:
+ USED_REX (0);
+ if (rex)
+ oappend (names8rex[reg + add]);
+ else
+ oappend (names8[reg + add]);
+ break;
+ case w_mode:
+ oappend (names16[reg + add]);
+ break;
+ case d_mode:
+ oappend (names32[reg + add]);
+ break;
+ case q_mode:
+ oappend (names64[reg + add]);
+ break;
+ case v_mode:
+ case dq_mode:
+ case dqw_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ oappend (names64[reg + add]);
+ else if ((sizeflag & DFLAG) || bytemode != v_mode)
+ oappend (names32[reg + add]);
+ else
+ oappend (names16[reg + add]);
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case m_mode:
+ if (mode_64bit)
+ oappend (names64[reg + add]);
+ else
+ oappend (names32[reg + add]);
+ break;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ break;
+ }
+}
+
+static bfd_vma
+get64 (void)
+{
+ bfd_vma x;
+#ifdef BFD64
+ unsigned int a;
+ unsigned int b;
+
+ FETCH_DATA (the_info, codep + 8);
+ a = *codep++ & 0xff;
+ a |= (*codep++ & 0xff) << 8;
+ a |= (*codep++ & 0xff) << 16;
+ a |= (*codep++ & 0xff) << 24;
+ b = *codep++ & 0xff;
+ b |= (*codep++ & 0xff) << 8;
+ b |= (*codep++ & 0xff) << 16;
+ b |= (*codep++ & 0xff) << 24;
+ x = a + ((bfd_vma) b << 32);
+#else
+ abort ();
+ x = 0;
+#endif
+ return x;
+}
+
+static bfd_signed_vma
+get32 (void)
+{
+ bfd_signed_vma x = 0;
+
+ FETCH_DATA (the_info, codep + 4);
+ x = *codep++ & (bfd_signed_vma) 0xff;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 8;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 16;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 24;
+ return x;
+}
+
+static bfd_signed_vma
+get32s (void)
+{
+ bfd_signed_vma x = 0;
+
+ FETCH_DATA (the_info, codep + 4);
+ x = *codep++ & (bfd_signed_vma) 0xff;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 8;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 16;
+ x |= (*codep++ & (bfd_signed_vma) 0xff) << 24;
+
+ x = (x ^ ((bfd_signed_vma) 1 << 31)) - ((bfd_signed_vma) 1 << 31);
+
+ return x;
+}
+
+static int
+get16 (void)
+{
+ int x = 0;
+
+ FETCH_DATA (the_info, codep + 2);
+ x = *codep++ & 0xff;
+ x |= (*codep++ & 0xff) << 8;
+ return x;
+}
+
+static void
+set_op (bfd_vma op, int riprel)
+{
+ op_index[op_ad] = op_ad;
+ if (mode_64bit)
+ {
+ op_address[op_ad] = op;
+ op_riprel[op_ad] = riprel;
+ }
+ else
+ {
+ /* Mask to get a 32-bit address. */
+ op_address[op_ad] = op & 0xffffffff;
+ op_riprel[op_ad] = riprel & 0xffffffff;
+ }
+}
+
+static void
+OP_REG (int code, int sizeflag)
+{
+ const char *s;
+ int add = 0;
+ USED_REX (REX_EXTZ);
+ if (rex & REX_EXTZ)
+ add = 8;
+
+ switch (code)
+ {
+ case indir_dx_reg:
+ if (intel_syntax)
+ s = "[dx]";
+ else
+ s = "(%dx)";
+ break;
+ case ax_reg: case cx_reg: case dx_reg: case bx_reg:
+ case sp_reg: case bp_reg: case si_reg: case di_reg:
+ s = names16[code - ax_reg + add];
+ break;
+ case es_reg: case ss_reg: case cs_reg:
+ case ds_reg: case fs_reg: case gs_reg:
+ s = names_seg[code - es_reg + add];
+ break;
+ case al_reg: case ah_reg: case cl_reg: case ch_reg:
+ case dl_reg: case dh_reg: case bl_reg: case bh_reg:
+ USED_REX (0);
+ if (rex)
+ s = names8rex[code - al_reg + add];
+ else
+ s = names8[code - al_reg];
+ break;
+ case rAX_reg: case rCX_reg: case rDX_reg: case rBX_reg:
+ case rSP_reg: case rBP_reg: case rSI_reg: case rDI_reg:
+ if (mode_64bit)
+ {
+ s = names64[code - rAX_reg + add];
+ break;
+ }
+ code += eAX_reg - rAX_reg;
+ /* Fall through. */
+ case eAX_reg: case eCX_reg: case eDX_reg: case eBX_reg:
+ case eSP_reg: case eBP_reg: case eSI_reg: case eDI_reg:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ s = names64[code - eAX_reg + add];
+ else if (sizeflag & DFLAG)
+ s = names32[code - eAX_reg + add];
+ else
+ s = names16[code - eAX_reg + add];
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ default:
+ s = INTERNAL_DISASSEMBLER_ERROR;
+ break;
+ }
+ oappend (s);
+}
+
+static void
+OP_IMREG (int code, int sizeflag)
+{
+ const char *s;
+
+ switch (code)
+ {
+ case indir_dx_reg:
+ if (intel_syntax)
+ s = "[dx]";
+ else
+ s = "(%dx)";
+ break;
+ case ax_reg: case cx_reg: case dx_reg: case bx_reg:
+ case sp_reg: case bp_reg: case si_reg: case di_reg:
+ s = names16[code - ax_reg];
+ break;
+ case es_reg: case ss_reg: case cs_reg:
+ case ds_reg: case fs_reg: case gs_reg:
+ s = names_seg[code - es_reg];
+ break;
+ case al_reg: case ah_reg: case cl_reg: case ch_reg:
+ case dl_reg: case dh_reg: case bl_reg: case bh_reg:
+ USED_REX (0);
+ if (rex)
+ s = names8rex[code - al_reg];
+ else
+ s = names8[code - al_reg];
+ break;
+ case eAX_reg: case eCX_reg: case eDX_reg: case eBX_reg:
+ case eSP_reg: case eBP_reg: case eSI_reg: case eDI_reg:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ s = names64[code - eAX_reg];
+ else if (sizeflag & DFLAG)
+ s = names32[code - eAX_reg];
+ else
+ s = names16[code - eAX_reg];
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ default:
+ s = INTERNAL_DISASSEMBLER_ERROR;
+ break;
+ }
+ oappend (s);
+}
+
+static void
+OP_I (int bytemode, int sizeflag)
+{
+ bfd_signed_vma op;
+ bfd_signed_vma mask = -1;
+
+ switch (bytemode)
+ {
+ case b_mode:
+ FETCH_DATA (the_info, codep + 1);
+ op = *codep++;
+ mask = 0xff;
+ break;
+ case q_mode:
+ if (mode_64bit)
+ {
+ op = get32s ();
+ break;
+ }
+ /* Fall through. */
+ case v_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ op = get32s ();
+ else if (sizeflag & DFLAG)
+ {
+ op = get32 ();
+ mask = 0xffffffff;
+ }
+ else
+ {
+ op = get16 ();
+ mask = 0xfffff;
+ }
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case w_mode:
+ mask = 0xfffff;
+ op = get16 ();
+ break;
+ case const_1_mode:
+ if (intel_syntax)
+ oappend ("1");
+ return;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ return;
+ }
+
+ op &= mask;
+ scratchbuf[0] = '$';
+ print_operand_value (scratchbuf + 1, 1, op);
+ oappend (scratchbuf + intel_syntax);
+ scratchbuf[0] = '\0';
+}
+
+static void
+OP_I64 (int bytemode, int sizeflag)
+{
+ bfd_signed_vma op;
+ bfd_signed_vma mask = -1;
+
+ if (!mode_64bit)
+ {
+ OP_I (bytemode, sizeflag);
+ return;
+ }
+
+ switch (bytemode)
+ {
+ case b_mode:
+ FETCH_DATA (the_info, codep + 1);
+ op = *codep++;
+ mask = 0xff;
+ break;
+ case v_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ op = get64 ();
+ else if (sizeflag & DFLAG)
+ {
+ op = get32 ();
+ mask = 0xffffffff;
+ }
+ else
+ {
+ op = get16 ();
+ mask = 0xfffff;
+ }
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case w_mode:
+ mask = 0xfffff;
+ op = get16 ();
+ break;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ return;
+ }
+
+ op &= mask;
+ scratchbuf[0] = '$';
+ print_operand_value (scratchbuf + 1, 1, op);
+ oappend (scratchbuf + intel_syntax);
+ scratchbuf[0] = '\0';
+}
+
+static void
+OP_sI (int bytemode, int sizeflag)
+{
+ bfd_signed_vma op;
+ bfd_signed_vma mask = -1;
+
+ switch (bytemode)
+ {
+ case b_mode:
+ FETCH_DATA (the_info, codep + 1);
+ op = *codep++;
+ if ((op & 0x80) != 0)
+ op -= 0x100;
+ mask = 0xffffffff;
+ break;
+ case v_mode:
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ op = get32s ();
+ else if (sizeflag & DFLAG)
+ {
+ op = get32s ();
+ mask = 0xffffffff;
+ }
+ else
+ {
+ mask = 0xffffffff;
+ op = get16 ();
+ if ((op & 0x8000) != 0)
+ op -= 0x10000;
+ }
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ break;
+ case w_mode:
+ op = get16 ();
+ mask = 0xffffffff;
+ if ((op & 0x8000) != 0)
+ op -= 0x10000;
+ break;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ return;
+ }
+
+ scratchbuf[0] = '$';
+ print_operand_value (scratchbuf + 1, 1, op);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_J (int bytemode, int sizeflag)
+{
+ bfd_vma disp;
+ bfd_vma mask = -1;
+
+ switch (bytemode)
+ {
+ case b_mode:
+ FETCH_DATA (the_info, codep + 1);
+ disp = *codep++;
+ if ((disp & 0x80) != 0)
+ disp -= 0x100;
+ break;
+ case v_mode:
+ if (sizeflag & DFLAG)
+ disp = get32s ();
+ else
+ {
+ disp = get16 ();
+ /* For some reason, a data16 prefix on a jump instruction
+ means that the pc is masked to 16 bits after the
+ displacement is added! */
+ mask = 0xffff;
+ }
+ break;
+ default:
+ oappend (INTERNAL_DISASSEMBLER_ERROR);
+ return;
+ }
+ disp = (start_pc + codep - start_codep + disp) & mask;
+ set_op (disp, 0);
+ print_operand_value (scratchbuf, 1, disp);
+ oappend (scratchbuf);
+}
+
+static void
+OP_SEG (int dummy ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ oappend (names_seg[reg]);
+}
+
+static void
+OP_DIR (int dummy ATTRIBUTE_UNUSED, int sizeflag)
+{
+ int seg, offset;
+
+ if (sizeflag & DFLAG)
+ {
+ offset = get32 ();
+ seg = get16 ();
+ }
+ else
+ {
+ offset = get16 ();
+ seg = get16 ();
+ }
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (intel_syntax)
+ sprintf (scratchbuf, "0x%x,0x%x", seg, offset);
+ else
+ sprintf (scratchbuf, "$0x%x,$0x%x", seg, offset);
+ oappend (scratchbuf);
+}
+
+static void
+OP_OFF (int bytemode ATTRIBUTE_UNUSED, int sizeflag)
+{
+ bfd_vma off;
+
+ append_seg ();
+
+ if ((sizeflag & AFLAG) || mode_64bit)
+ off = get32 ();
+ else
+ off = get16 ();
+
+ if (intel_syntax)
+ {
+ if (!(prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS
+ | PREFIX_ES | PREFIX_FS | PREFIX_GS)))
+ {
+ oappend (names_seg[ds_reg - es_reg]);
+ oappend (":");
+ }
+ }
+ print_operand_value (scratchbuf, 1, off);
+ oappend (scratchbuf);
+}
+
+static void
+OP_OFF64 (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ bfd_vma off;
+
+ if (!mode_64bit)
+ {
+ OP_OFF (bytemode, sizeflag);
+ return;
+ }
+
+ append_seg ();
+
+ off = get64 ();
+
+ if (intel_syntax)
+ {
+ if (!(prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS
+ | PREFIX_ES | PREFIX_FS | PREFIX_GS)))
+ {
+ oappend (names_seg[ds_reg - es_reg]);
+ oappend (":");
+ }
+ }
+ print_operand_value (scratchbuf, 1, off);
+ oappend (scratchbuf);
+}
+
+static void
+ptr_reg (int code, int sizeflag)
+{
+ const char *s;
+
+ *obufp++ = open_char;
+ used_prefixes |= (prefixes & PREFIX_ADDR);
+ if (mode_64bit)
+ {
+ if (!(sizeflag & AFLAG))
+ s = names32[code - eAX_reg];
+ else
+ s = names64[code - eAX_reg];
+ }
+ else if (sizeflag & AFLAG)
+ s = names32[code - eAX_reg];
+ else
+ s = names16[code - eAX_reg];
+ oappend (s);
+ *obufp++ = close_char;
+ *obufp = 0;
+}
+
+static void
+OP_ESreg (int code, int sizeflag)
+{
+ if (intel_syntax)
+ {
+ if (codep[-1] & 1)
+ {
+ USED_REX (REX_MODE64);
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (rex & REX_MODE64)
+ oappend ("QWORD PTR ");
+ else if ((sizeflag & DFLAG))
+ oappend ("DWORD PTR ");
+ else
+ oappend ("WORD PTR ");
+ }
+ else
+ oappend ("BYTE PTR ");
+ }
+
+ oappend ("%es:" + intel_syntax);
+ ptr_reg (code, sizeflag);
+}
+
+static void
+OP_DSreg (int code, int sizeflag)
+{
+ if (intel_syntax)
+ {
+ if (codep[-1] != 0xd7 && (codep[-1] & 1))
+ {
+ USED_REX (REX_MODE64);
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (rex & REX_MODE64)
+ oappend ("QWORD PTR ");
+ else if ((sizeflag & DFLAG))
+ oappend ("DWORD PTR ");
+ else
+ oappend ("WORD PTR ");
+ }
+ else
+ oappend ("BYTE PTR ");
+ }
+
+ if ((prefixes
+ & (PREFIX_CS
+ | PREFIX_DS
+ | PREFIX_SS
+ | PREFIX_ES
+ | PREFIX_FS
+ | PREFIX_GS)) == 0)
+ prefixes |= PREFIX_DS;
+ append_seg ();
+ ptr_reg (code, sizeflag);
+}
+
+static void
+OP_C (int dummy ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ int add = 0;
+ if (rex & REX_EXTX)
+ {
+ USED_REX (REX_EXTX);
+ add = 8;
+ }
+ else if (!mode_64bit && (prefixes & PREFIX_LOCK))
+ {
+ used_prefixes |= PREFIX_LOCK;
+ add = 8;
+ }
+ sprintf (scratchbuf, "%%cr%d", reg + add);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_D (int dummy ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ int add = 0;
+ USED_REX (REX_EXTX);
+ if (rex & REX_EXTX)
+ add = 8;
+ if (intel_syntax)
+ sprintf (scratchbuf, "db%d", reg + add);
+ else
+ sprintf (scratchbuf, "%%db%d", reg + add);
+ oappend (scratchbuf);
+}
+
+static void
+OP_T (int dummy ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ sprintf (scratchbuf, "%%tr%d", reg);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_Rd (int bytemode, int sizeflag)
+{
+ if (mod == 3)
+ OP_E (bytemode, sizeflag);
+ else
+ BadOp ();
+}
+
+static void
+OP_MMX (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (prefixes & PREFIX_DATA)
+ {
+ int add = 0;
+ USED_REX (REX_EXTX);
+ if (rex & REX_EXTX)
+ add = 8;
+ sprintf (scratchbuf, "%%xmm%d", reg + add);
+ }
+ else
+ sprintf (scratchbuf, "%%mm%d", reg);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_XMM (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ int add = 0;
+ USED_REX (REX_EXTX);
+ if (rex & REX_EXTX)
+ add = 8;
+ sprintf (scratchbuf, "%%xmm%d", reg + add);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_EM (int bytemode, int sizeflag)
+{
+ if (mod != 3)
+ {
+ if (intel_syntax && bytemode == v_mode)
+ {
+ bytemode = (prefixes & PREFIX_DATA) ? x_mode : q_mode;
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ }
+ OP_E (bytemode, sizeflag);
+ return;
+ }
+
+ /* Skip mod/rm byte. */
+ MODRM_CHECK;
+ codep++;
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (prefixes & PREFIX_DATA)
+ {
+ int add = 0;
+
+ USED_REX (REX_EXTZ);
+ if (rex & REX_EXTZ)
+ add = 8;
+ sprintf (scratchbuf, "%%xmm%d", rm + add);
+ }
+ else
+ sprintf (scratchbuf, "%%mm%d", rm);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_EX (int bytemode, int sizeflag)
+{
+ int add = 0;
+ if (mod != 3)
+ {
+ if (intel_syntax && bytemode == v_mode)
+ {
+ switch (prefixes & (PREFIX_DATA|PREFIX_REPZ|PREFIX_REPNZ))
+ {
+ case 0: bytemode = x_mode; break;
+ case PREFIX_REPZ: bytemode = d_mode; used_prefixes |= PREFIX_REPZ; break;
+ case PREFIX_DATA: bytemode = x_mode; used_prefixes |= PREFIX_DATA; break;
+ case PREFIX_REPNZ: bytemode = q_mode; used_prefixes |= PREFIX_REPNZ; break;
+ default: bytemode = 0; break;
+ }
+ }
+ OP_E (bytemode, sizeflag);
+ return;
+ }
+ USED_REX (REX_EXTZ);
+ if (rex & REX_EXTZ)
+ add = 8;
+
+ /* Skip mod/rm byte. */
+ MODRM_CHECK;
+ codep++;
+ sprintf (scratchbuf, "%%xmm%d", rm + add);
+ oappend (scratchbuf + intel_syntax);
+}
+
+static void
+OP_MS (int bytemode, int sizeflag)
+{
+ if (mod == 3)
+ OP_EM (bytemode, sizeflag);
+ else
+ BadOp ();
+}
+
+static void
+OP_XS (int bytemode, int sizeflag)
+{
+ if (mod == 3)
+ OP_EX (bytemode, sizeflag);
+ else
+ BadOp ();
+}
+
+static void
+OP_M (int bytemode, int sizeflag)
+{
+ if (mod == 3)
+ BadOp (); /* bad lea,lds,les,lfs,lgs,lss modrm */
+ else
+ OP_E (bytemode, sizeflag);
+}
+
+static void
+OP_0f07 (int bytemode, int sizeflag)
+{
+ if (mod != 3 || rm != 0)
+ BadOp ();
+ else
+ OP_E (bytemode, sizeflag);
+}
+
+static void
+OP_0fae (int bytemode, int sizeflag)
+{
+ if (mod == 3)
+ {
++ if (reg == 5)
++ strcpy (obuf + strlen (obuf) - sizeof ("xrstor") + 1, "lfence");
+ if (reg == 7)
+ strcpy (obuf + strlen (obuf) - sizeof ("clflush") + 1, "sfence");
+
+ if (reg < 5 || rm != 0)
+ {
+ BadOp (); /* bad sfence, mfence, or lfence */
+ return;
+ }
+ }
- else if (reg != 7)
++ else if (reg != 5 && reg != 7)
+ {
- BadOp (); /* bad clflush */
++ BadOp (); /* bad xrstor or clflush */
+ return;
+ }
+
+ OP_E (bytemode, sizeflag);
+}
+
+static void
+NOP_Fixup (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ /* NOP with REPZ prefix is called PAUSE. */
+ if (prefixes == PREFIX_REPZ)
+ strcpy (obuf, "pause");
+}
+
+static const char *const Suffix3DNow[] = {
+/* 00 */ NULL, NULL, NULL, NULL,
+/* 04 */ NULL, NULL, NULL, NULL,
+/* 08 */ NULL, NULL, NULL, NULL,
+/* 0C */ "pi2fw", "pi2fd", NULL, NULL,
+/* 10 */ NULL, NULL, NULL, NULL,
+/* 14 */ NULL, NULL, NULL, NULL,
+/* 18 */ NULL, NULL, NULL, NULL,
+/* 1C */ "pf2iw", "pf2id", NULL, NULL,
+/* 20 */ NULL, NULL, NULL, NULL,
+/* 24 */ NULL, NULL, NULL, NULL,
+/* 28 */ NULL, NULL, NULL, NULL,
+/* 2C */ NULL, NULL, NULL, NULL,
+/* 30 */ NULL, NULL, NULL, NULL,
+/* 34 */ NULL, NULL, NULL, NULL,
+/* 38 */ NULL, NULL, NULL, NULL,
+/* 3C */ NULL, NULL, NULL, NULL,
+/* 40 */ NULL, NULL, NULL, NULL,
+/* 44 */ NULL, NULL, NULL, NULL,
+/* 48 */ NULL, NULL, NULL, NULL,
+/* 4C */ NULL, NULL, NULL, NULL,
+/* 50 */ NULL, NULL, NULL, NULL,
+/* 54 */ NULL, NULL, NULL, NULL,
+/* 58 */ NULL, NULL, NULL, NULL,
+/* 5C */ NULL, NULL, NULL, NULL,
+/* 60 */ NULL, NULL, NULL, NULL,
+/* 64 */ NULL, NULL, NULL, NULL,
+/* 68 */ NULL, NULL, NULL, NULL,
+/* 6C */ NULL, NULL, NULL, NULL,
+/* 70 */ NULL, NULL, NULL, NULL,
+/* 74 */ NULL, NULL, NULL, NULL,
+/* 78 */ NULL, NULL, NULL, NULL,
+/* 7C */ NULL, NULL, NULL, NULL,
+/* 80 */ NULL, NULL, NULL, NULL,
+/* 84 */ NULL, NULL, NULL, NULL,
+/* 88 */ NULL, NULL, "pfnacc", NULL,
+/* 8C */ NULL, NULL, "pfpnacc", NULL,
+/* 90 */ "pfcmpge", NULL, NULL, NULL,
+/* 94 */ "pfmin", NULL, "pfrcp", "pfrsqrt",
+/* 98 */ NULL, NULL, "pfsub", NULL,
+/* 9C */ NULL, NULL, "pfadd", NULL,
+/* A0 */ "pfcmpgt", NULL, NULL, NULL,
+/* A4 */ "pfmax", NULL, "pfrcpit1", "pfrsqit1",
+/* A8 */ NULL, NULL, "pfsubr", NULL,
+/* AC */ NULL, NULL, "pfacc", NULL,
+/* B0 */ "pfcmpeq", NULL, NULL, NULL,
+/* B4 */ "pfmul", NULL, "pfrcpit2", "pfmulhrw",
+/* B8 */ NULL, NULL, NULL, "pswapd",
+/* BC */ NULL, NULL, NULL, "pavgusb",
+/* C0 */ NULL, NULL, NULL, NULL,
+/* C4 */ NULL, NULL, NULL, NULL,
+/* C8 */ NULL, NULL, NULL, NULL,
+/* CC */ NULL, NULL, NULL, NULL,
+/* D0 */ NULL, NULL, NULL, NULL,
+/* D4 */ NULL, NULL, NULL, NULL,
+/* D8 */ NULL, NULL, NULL, NULL,
+/* DC */ NULL, NULL, NULL, NULL,
+/* E0 */ NULL, NULL, NULL, NULL,
+/* E4 */ NULL, NULL, NULL, NULL,
+/* E8 */ NULL, NULL, NULL, NULL,
+/* EC */ NULL, NULL, NULL, NULL,
+/* F0 */ NULL, NULL, NULL, NULL,
+/* F4 */ NULL, NULL, NULL, NULL,
+/* F8 */ NULL, NULL, NULL, NULL,
+/* FC */ NULL, NULL, NULL, NULL,
+};
+
+static void
+OP_3DNowSuffix (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ const char *mnemonic;
+
+ FETCH_DATA (the_info, codep + 1);
+ /* AMD 3DNow! instructions are specified by an opcode suffix in the
+ place where an 8-bit immediate would normally go. ie. the last
+ byte of the instruction. */
+ obufp = obuf + strlen (obuf);
+ mnemonic = Suffix3DNow[*codep++ & 0xff];
+ if (mnemonic)
+ oappend (mnemonic);
+ else
+ {
+ /* Since a variable sized modrm/sib chunk is between the start
+ of the opcode (0x0f0f) and the opcode suffix, we need to do
+ all the modrm processing first, and don't know until now that
+ we have a bad opcode. This necessitates some cleaning up. */
+ op1out[0] = '\0';
+ op2out[0] = '\0';
+ BadOp ();
+ }
+}
+
+static const char *simd_cmp_op[] = {
+ "eq",
+ "lt",
+ "le",
+ "unord",
+ "neq",
+ "nlt",
+ "nle",
+ "ord"
+};
+
+static void
+OP_SIMD_Suffix (int bytemode ATTRIBUTE_UNUSED, int sizeflag ATTRIBUTE_UNUSED)
+{
+ unsigned int cmp_type;
+
+ FETCH_DATA (the_info, codep + 1);
+ obufp = obuf + strlen (obuf);
+ cmp_type = *codep++ & 0xff;
+ if (cmp_type < 8)
+ {
+ char suffix1 = 'p', suffix2 = 's';
+ used_prefixes |= (prefixes & PREFIX_REPZ);
+ if (prefixes & PREFIX_REPZ)
+ suffix1 = 's';
+ else
+ {
+ used_prefixes |= (prefixes & PREFIX_DATA);
+ if (prefixes & PREFIX_DATA)
+ suffix2 = 'd';
+ else
+ {
+ used_prefixes |= (prefixes & PREFIX_REPNZ);
+ if (prefixes & PREFIX_REPNZ)
+ suffix1 = 's', suffix2 = 'd';
+ }
+ }
+ sprintf (scratchbuf, "cmp%s%c%c",
+ simd_cmp_op[cmp_type], suffix1, suffix2);
+ used_prefixes |= (prefixes & PREFIX_REPZ);
+ oappend (scratchbuf);
+ }
+ else
+ {
+ /* We have a bad extension byte. Clean up. */
+ op1out[0] = '\0';
+ op2out[0] = '\0';
+ BadOp ();
+ }
+}
+
+static void
+SIMD_Fixup (int extrachar, int sizeflag ATTRIBUTE_UNUSED)
+{
+ /* Change movlps/movhps to movhlps/movlhps for 2 register operand
+ forms of these instructions. */
+ if (mod == 3)
+ {
+ char *p = obuf + strlen (obuf);
+ *(p + 1) = '\0';
+ *p = *(p - 1);
+ *(p - 1) = *(p - 2);
+ *(p - 2) = *(p - 3);
+ *(p - 3) = extrachar;
+ }
+}
+
+static void
+PNI_Fixup (int extrachar ATTRIBUTE_UNUSED, int sizeflag)
+{
+ if (mod == 3 && reg == 1 && rm <= 1)
+ {
+ /* Override "sidt". */
+ char *p = obuf + strlen (obuf) - 4;
+
+ /* We might have a suffix when disassembling with -Msuffix. */
+ if (*p == 'i')
+ --p;
+
+ if (rm)
+ {
+ /* mwait %eax,%ecx */
+ strcpy (p, "mwait");
+ if (!intel_syntax)
+ strcpy (op1out, names32[0]);
+ }
+ else
+ {
+ /* monitor %eax,%ecx,%edx" */
+ strcpy (p, "monitor");
+ if (!intel_syntax)
+ {
+ if (!mode_64bit)
+ strcpy (op1out, names32[0]);
+ else if (!(prefixes & PREFIX_ADDR))
+ strcpy (op1out, names64[0]);
+ else
+ {
+ strcpy (op1out, names32[0]);
+ used_prefixes |= PREFIX_ADDR;
+ }
+ strcpy (op3out, names32[2]);
+ }
+ }
+ if (!intel_syntax)
+ {
+ strcpy (op2out, names32[1]);
+ two_source_ops = 1;
+ }
+
+ codep++;
+ }
+ else
+ OP_M (0, sizeflag);
+}
+
+static void
+SVME_Fixup (int bytemode, int sizeflag)
+{
+ const char *alt;
+ char *p;
+
+ switch (*codep)
+ {
+ case 0xd8:
+ alt = "vmrun";
+ break;
+ case 0xd9:
+ alt = "vmmcall";
+ break;
+ case 0xda:
+ alt = "vmload";
+ break;
+ case 0xdb:
+ alt = "vmsave";
+ break;
+ case 0xdc:
+ alt = "stgi";
+ break;
+ case 0xdd:
+ alt = "clgi";
+ break;
+ case 0xde:
+ alt = "skinit";
+ break;
+ case 0xdf:
+ alt = "invlpga";
+ break;
+ default:
+ OP_M (bytemode, sizeflag);
+ return;
+ }
+ /* Override "lidt". */
+ p = obuf + strlen (obuf) - 4;
+ /* We might have a suffix. */
+ if (*p == 'i')
+ --p;
+ strcpy (p, alt);
+ if (!(prefixes & PREFIX_ADDR))
+ {
+ ++codep;
+ return;
+ }
+ used_prefixes |= PREFIX_ADDR;
+ switch (*codep++)
+ {
+ case 0xdf:
+ strcpy (op2out, names32[1]);
+ two_source_ops = 1;
+ /* Fall through. */
+ case 0xd8:
+ case 0xda:
+ case 0xdb:
+ *obufp++ = open_char;
+ if (mode_64bit || (sizeflag & AFLAG))
+ alt = names32[0];
+ else
+ alt = names16[0];
+ strcpy (obufp, alt);
+ obufp += strlen (alt);
+ *obufp++ = close_char;
+ *obufp = '\0';
+ break;
+ }
+}
+
+static void
+INVLPG_Fixup (int bytemode, int sizeflag)
+{
+ const char *alt;
+
+ switch (*codep)
+ {
+ case 0xf8:
+ alt = "swapgs";
+ break;
+ case 0xf9:
+ alt = "rdtscp";
+ break;
+ default:
+ OP_M (bytemode, sizeflag);
+ return;
+ }
+ /* Override "invlpg". */
+ strcpy (obuf + strlen (obuf) - 6, alt);
+ codep++;
+}
+
+static void
+BadOp (void)
+{
+ /* Throw away prefixes and 1st. opcode byte. */
+ codep = insn_codep + 1;
+ oappend ("(bad)");
+}
+
+static void
+SEG_Fixup (int extrachar, int sizeflag)
+{
+ if (mod == 3)
+ {
+ /* We need to add a proper suffix with
+
+ movw %ds,%ax
+ movl %ds,%eax
+ movq %ds,%rax
+ movw %ax,%ds
+ movl %eax,%ds
+ movq %rax,%ds
+ */
+ const char *suffix;
+
+ if (prefixes & PREFIX_DATA)
+ suffix = "w";
+ else
+ {
+ USED_REX (REX_MODE64);
+ if (rex & REX_MODE64)
+ suffix = "q";
+ else
+ suffix = "l";
+ }
+ strcat (obuf, suffix);
+ }
+ else
+ {
+ /* We need to fix the suffix for
+
+ movw %ds,(%eax)
+ movw %ds,(%rax)
+ movw (%eax),%ds
+ movw (%rax),%ds
+
+ Override "mov[l|q]". */
+ char *p = obuf + strlen (obuf) - 1;
+
+ /* We might not have a suffix. */
+ if (*p == 'v')
+ ++p;
+ *p = 'w';
+ }
+
+ OP_E (extrachar, sizeflag);
+}
+
+static void
+VMX_Fixup (int extrachar ATTRIBUTE_UNUSED, int sizeflag)
+{
+ if (mod == 3 && reg == 0 && rm >=1 && rm <= 4)
+ {
+ /* Override "sgdt". */
+ char *p = obuf + strlen (obuf) - 4;
+
+ /* We might have a suffix when disassembling with -Msuffix. */
+ if (*p == 'g')
+ --p;
+
+ switch (rm)
+ {
+ case 1:
+ strcpy (p, "vmcall");
+ break;
+ case 2:
+ strcpy (p, "vmlaunch");
+ break;
+ case 3:
+ strcpy (p, "vmresume");
+ break;
+ case 4:
+ strcpy (p, "vmxoff");
+ break;
+ }
+
+ codep++;
+ }
+ else
+ OP_E (0, sizeflag);
+}
+
+static void
+OP_VMX (int bytemode, int sizeflag)
+{
+ used_prefixes |= (prefixes & (PREFIX_DATA | PREFIX_REPZ));
+ if (prefixes & PREFIX_DATA)
+ strcpy (obuf, "vmclear");
+ else if (prefixes & PREFIX_REPZ)
+ strcpy (obuf, "vmxon");
+ else
+ strcpy (obuf, "vmptrld");
+ OP_E (bytemode, sizeflag);
+}
}
return 0;
}
- #endif
+static int __init force_acpi_rsdt(const struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=rsdt\n",
+ d->ident);
+ acpi_rsdt_forced = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: "
+ "acpi=rsdt\n");
+ }
+ return 0;
+
+}
+
/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
#include <linux/bootmem.h>
#include <linux/dmar.h>
#include <linux/hpet.h>
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
- #endif /* CONFIG_KDB */
++#endif
#include <asm/idle.h>
#include <asm/io.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
- #include <linux/ftrace.h>
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#endif
#include <asm/stacktrace.h>
+#include <linux/unwind.h>
#include "dumpstack.h"
CFI_ENDPROC
END(call_softirq)
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movq %r15, R15(%rdi)
+ movq %r14, R14(%rdi)
+ xchgq %rsi, %rdx
+ movq %r13, R13(%rdi)
+ movq %r12, R12(%rdi)
+ xorl %eax, %eax
+ movq %rbp, RBP(%rdi)
+ movq %rbx, RBX(%rdi)
+ movq (%rsp), %r9
+ xchgq %rdx, %rcx
+ movq %rax, R11(%rdi)
+ movq %rax, R10(%rdi)
+ movq %rax, R9(%rdi)
+ movq %rax, R8(%rdi)
+ movq %rax, RAX(%rdi)
+ movq %rax, RCX(%rdi)
+ movq %rax, RDX(%rdi)
+ movq %rax, RSI(%rdi)
+ movq %rax, RDI(%rdi)
+ movq %rax, ORIG_RAX(%rdi)
+ movq %r9, RIP(%rdi)
+ leaq 8(%rsp), %r9
+ movq $__KERNEL_CS, CS(%rdi)
+ movq %rax, EFLAGS(%rdi)
+ movq %r9, RSP(%rdi)
+ movq $__KERNEL_DS, SS(%rdi)
+ jmpq *%rcx
+ CFI_ENDPROC
+END(arch_unwind_init_running)
+#endif
+
- #ifdef CONFIG_PARAVIRT_XEN
+ #ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
/*
CFI_ENDPROC
END(xen_failsafe_callback)
- #endif /* CONFIG_PARAVIRT_XEN */
+ #endif /* CONFIG_XEN */
+#ifdef CONFIG_KDB
+
+#ifdef CONFIG_SMP
+apicinterrupt KDB_VECTOR \
+ kdb_interrupt, smp_kdb_interrupt
+#endif /* CONFIG_SMP */
+
+ENTRY(kdb_call)
+ INTR_FRAME
+ cld
+ pushq $-1 # orig_eax
+ CFI_ADJUST_CFA_OFFSET 8
+ SAVE_ALL
+ movq $1,%rdi # KDB_REASON_ENTER
+ movq $0,%rsi # error_code
+ movq %rsp,%rdx # struct pt_regs
+ call kdb
+ RESTORE_ALL
+ addq $8,%rsp # forget orig_eax
+ CFI_ADJUST_CFA_OFFSET -8
+ iretq
+ CFI_ENDPROC
+END(kdb_call)
+
+#endif /* CONFIG_KDB */
+
+
/*
* Some functions should be protected against kprobes
*/
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/efi.h>
- #include <linux/dmi.h>
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#endif /* CONFIG_KDB */
+#include <linux/kexec.h>
+ #include <linux/dmi.h>
+ #include <linux/tboot.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
static atomic_t waiting_for_crash_ipi;
+#ifdef CONFIG_KDB_KDUMP
+void halt_current_cpu(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+ struct pt_regs fixed_regs;
- #endif
++#endif
+ local_irq_disable();
+#ifdef CONFIG_X86_32
+ if (!user_mode_vm(regs)) {
+ crash_fixup_ss_esp(&fixed_regs, regs);
+ regs = &fixed_regs;
+ }
+#endif
+ crash_save_cpu(regs, raw_smp_processor_id());
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ halt();
+ for(;;)
+ cpu_relax();
+}
+#endif /* CONFIG_KDB_KDUMP */
+
static int crash_nmi_callback(struct notifier_block *self,
unsigned long val, void *data)
{
obj-y += base/ block/ misc/ mfd/
obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/
- obj-$(CONFIG_XEN) += xen/
-obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
obj-$(CONFIG_ATA) += ata/
+ obj-$(CONFIG_MTD) += mtd/
+ obj-$(CONFIG_SPI) += spi/
+obj-$(CONFIG_IDE) += ide/
obj-y += net/
+
+# gpu/ comes after char for AGP vs DRM startup
+obj-y += gpu/
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/i810/
+obj-$(CONFIG_FB_INTEL) += video/intelfb/
+
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_FUSION) += message/
obj-$(CONFIG_FIREWIRE) += firewire/
obj-$(CONFIG_BLK_DEV_UB) += ub.o
obj-$(CONFIG_BLK_DEV_HD) += hd.o
- obj-$(CONFIG_XEN_BLKFRONT) += xen-blkfront.o
+ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
+obj-$(CONFIG_CIPHER_TWOFISH) += loop_fish2.o
swim_mod-objs := swim.o swim_asm.o
goto err_free;
}
+#ifdef CONFIG_KDB_USB
+ /* Attach USB keyboards to kdb */
+ if (intf->cur_altsetting->desc.bInterfaceProtocol ==
+ USB_INTERFACE_PROTOCOL_KEYBOARD) {
+ int ret;
+ struct usbhid_device *usbhid = hid->driver_data;
+ extern void *usb_hcd_get_kdb_poll_func(struct usb_device *udev);
++ extern void * usb_hcd_get_kdb_completion_func(struct usb_device *udev);
++ extern int usb_hcd_check_uhci(struct usb_device *udev);
++ extern kdb_hc_keyboard_attach_t
++ usb_hcd_get_hc_keyboard_attach(struct usb_device *udev);
++ extern kdb_hc_keyboard_detach_t
++ usb_hcd_get_hc_keyboard_detach(struct usb_device *udev);
+
+ ret = kdb_usb_keyboard_attach(usbhid->urbin, usbhid->inbuf,
- usb_hcd_get_kdb_poll_func(interface_to_usbdev(intf)));
++ usb_hcd_get_kdb_poll_func(interface_to_usbdev(intf)),
++ usb_hcd_get_kdb_completion_func(interface_to_usbdev(intf)),
++ usb_hcd_get_hc_keyboard_attach(interface_to_usbdev(intf)),
++ usb_hcd_get_hc_keyboard_detach(interface_to_usbdev(intf)),
++ usbhid->bufsize,
++ NULL);
+
+ if (ret == -1)
+ printk(": FAILED to register keyboard (%s) "
+ "with KDB\n", hid->phys);
+ }
+#endif /* CONFIG_KDB_USB */
+
return 0;
err_free:
kfree(usbhid);
PSMOUSE_TRACKPOINT,
PSMOUSE_TOUCHKIT_PS2,
PSMOUSE_CORTRON,
+ PSMOUSE_ELFTOUCH_PS2,
PSMOUSE_HGPK,
PSMOUSE_ELANTECH,
+ PSMOUSE_FSP,
PSMOUSE_AUTO /* This one should always be last */
};
return 0;
}
+
- int elftouch_ps2_detect(struct psmouse *psmouse, int set_properties)
++int elftouch_ps2_detect(struct psmouse *psmouse, bool set_properties)
+{
+ struct input_dev *dev = psmouse->dev;
+ unsigned char param[16];
+ int command, res;
+
+ param[0]=0x0f4;
+ command = TOUCHKIT_SEND_PARMS(1, 0, TOUCHKIT_CMD);
+ res=ps2_command(&psmouse->ps2dev, param, command);
+ if(res) { return -ENODEV; }
+
+ param[0]=0x0b0;
+ command = TOUCHKIT_SEND_PARMS(1, 1, TOUCHKIT_CMD);
+ res=ps2_command(&psmouse->ps2dev, param, command);
+ if(res) { return -ENODEV; }
+
+ if (set_properties) {
+ dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+ set_bit(BTN_TOUCH, dev->keybit);
+ input_set_abs_params(dev, ABS_X, 0, ELFTOUCH_MAX_XC, 0, 0);
+ input_set_abs_params(dev, ABS_Y, 0, ELFTOUCH_MAX_YC, 0, 0);
+
+ psmouse->vendor = "ElfTouch";
+ psmouse->name = "Touchscreen";
+ psmouse->protocol_handler = touchkit_ps2_process_byte;
+ psmouse->pktsize = 5;
+ }
+ return 0;
+}
#define _TOUCHKIT_PS2_H
#ifdef CONFIG_MOUSE_PS2_TOUCHKIT
- int touchkit_ps2_detect(struct psmouse *psmouse, int set_properties);
- int elftouch_ps2_detect(struct psmouse *psmouse, int set_properties);
+ int touchkit_ps2_detect(struct psmouse *psmouse, bool set_properties);
++int elftouch_ps2_detect(struct psmouse *psmouse, bool set_properties);
#else
static inline int touchkit_ps2_detect(struct psmouse *psmouse,
- int set_properties)
+ bool set_properties)
{
return -ENOSYS;
}
+static inline int elftouch_ps2_detect(struct psmouse *psmouse,
- int set_properties)
++ bool set_properties)
+{
+ return -ENOSYS;
+}
#endif /* CONFIG_MOUSE_PS2_TOUCHKIT */
#endif
obj-$(CONFIG_TOUCHSCREEN_GUNZE) += gunze.o
obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o
obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o
+obj-$(CONFIG_TOUCHSCREEN_ELOUSB) += elousb.o
obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
+ obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o
obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o
obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o
obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o
--- /dev/null
+/*
+ * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+ *
+ * Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.com>
+ *
+ * This file is released under the GPL.
+ *
+ *
+ * Linux 2.6 Device Mapper RAID4 and RAID5 target.
+ *
+ * Supports:
+ * o RAID4 with dedicated and selectable parity device
+ * o RAID5 with rotating parity (left+right, symmetric+asymmetric)
+ * o run time optimization of xor algorithm used to calculate parity
+ *
+ *
+ * Thanks to MD for:
+ * o the raid address calculation algorithm
+ * o the base of the biovec <-> page list copier.
+ *
+ *
+ * Uses region hash to keep track of how many writes are in flight to
+ * regions in order to use dirty log to keep state of regions to recover:
+ *
+ * o clean regions (those which are synchronized
+ * and don't have write io in flight)
+ * o dirty regions (those with write io in flight)
+ *
+ *
+ * On startup, any dirty regions are migrated to the 'nosync' state
+ * and are subject to recovery by the daemon.
+ *
+ * See raid_ctr() for table definition.
+ *
+ *
+ * FIXME:
+ * o add virtual interface for locking
+ * o remove instrumentation (REMOVEME:)
+ *
+ */
+
+static const char *version = "v0.2431";
+
+#include "dm.h"
+#include "dm-memcache.h"
+#include "dm-message.h"
+#include "dm-raid45.h"
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dm-io.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/dm-region-hash.h>
+
+/* # of parallel recovered regions */
+/* FIXME: cope with multiple recovery stripes in raid_set struct. */
+#define MAX_RECOVER 1 /* needs to be 1! */
+
+/*
+ * Configurable parameters
+ */
+#define INLINE
+
+/* Default # of stripes if not set in constructor. */
+#define STRIPES 64
+
+/* Minimum/maximum # of selectable stripes. */
+#define STRIPES_MIN 8
+#define STRIPES_MAX 16384
+
+/* Default chunk size in sectors if not set in constructor. */
+#define CHUNK_SIZE 64
+
+/* Default io size in sectors if not set in constructor. */
+#define IO_SIZE_MIN SECTORS_PER_PAGE
+#define IO_SIZE IO_SIZE_MIN
+
+/* Maximum setable chunk size in sectors. */
+#define CHUNK_SIZE_MAX 16384
+
+/* Recover io size default in sectors. */
+#define RECOVER_IO_SIZE_MIN 64
+#define RECOVER_IO_SIZE 256
+
+/* Default percentage recover io bandwidth. */
+#define BANDWIDTH 10
+#define BANDWIDTH_MIN 1
+#define BANDWIDTH_MAX 100
+/*
+ * END Configurable parameters
+ */
+
+#define TARGET "dm-raid45"
+#define DAEMON "kraid45d"
+#define DM_MSG_PREFIX TARGET
+
+#define SECTORS_PER_PAGE (PAGE_SIZE >> SECTOR_SHIFT)
+
+/* Amount/size for __xor(). */
+#define SECTORS_PER_XOR SECTORS_PER_PAGE
+#define XOR_SIZE PAGE_SIZE
+
+/* Derive raid_set from stripe_cache pointer. */
+#define RS(x) container_of(x, struct raid_set, sc)
+
+/* Check value in range. */
+#define range_ok(i, min, max) (i >= min && i <= max)
+
+/* Page reference. */
+#define PAGE(stripe, p) ((stripe)->obj[p].pl->page)
+
+/* Bio list reference. */
+#define BL(stripe, p, rw) (stripe->ss[p].bl + rw)
+
+/* Page list reference. */
+#define PL(stripe, p) (stripe->obj[p].pl)
+
+/* Check argument is power of 2. */
+#define POWER_OF_2(a) (!(a & (a - 1)))
+
+/* Factor out to dm-bio-list.h */
+static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = bl->head;
+ bl->head = bio;
+
+ if (!bl->tail)
+ bl->tail = bio;
+}
+
+/* Factor out to dm.h */
+#define TI_ERR_RET(str, ret) \
+ do { ti->error = DM_MSG_PREFIX ": " str; return ret; } while (0);
+#define TI_ERR(str) TI_ERR_RET(str, -EINVAL)
+
+/*-----------------------------------------------------------------
+ * Stripe cache
+ *
+ * Cache for all reads and writes to raid sets (operational or degraded)
+ *
+ * We need to run all data to and from a RAID set through this cache,
+ * because parity chunks need to get calculated from data chunks
+ * or, in the degraded/resynchronization case, missing chunks need
+ * to be reconstructed using the other chunks of the stripe.
+ *---------------------------------------------------------------*/
+/* Protect kmem cache # counter. */
+static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
+
+/* A stripe set (holds bios hanging off). */
+struct stripe_set {
+ struct stripe *stripe; /* Backpointer to stripe for endio(). */
+ struct bio_list bl[3]; /* Reads, writes, and writes merged. */
+#define WRITE_MERGED 2
+};
+
+#if READ != 0 || WRITE != 1
+#error dm-raid45: READ/WRITE != 0/1 used as index!!!
+#endif
+
+/*
+ * Stripe linked list indexes. Keep order, because the stripe
+ * and the stripe cache rely on the first 3!
+ */
+enum list_types {
+ LIST_IO = 0, /* Stripes with io pending. */
+ LIST_ENDIO, /* Stripes to endio. */
+ LIST_LRU, /* Least recently used stripes. */
+ LIST_HASH, /* Hashed stripes. */
+ LIST_RECOVER = LIST_HASH, /* For recovery type stripes only. */
+ NR_LISTS, /* To size array in struct stripe. */
+};
+
+enum lock_types {
+ LOCK_ENDIO = 0, /* Protect endio list. */
+ LOCK_LRU, /* Protect lru list. */
+ NR_LOCKS, /* To size array in struct stripe_cache. */
+};
+
+/* A stripe: the io object to handle all reads and writes to a RAID set. */
+struct stripe {
+ struct stripe_cache *sc; /* Backpointer to stripe cache. */
+
+ sector_t key; /* Hash key. */
+ region_t region; /* Region stripe is mapped to. */
+
+ /* Reference count. */
+ atomic_t cnt;
+
+ struct {
+ unsigned long flags; /* flags (see below). */
+
+ /*
+ * Pending ios in flight:
+ *
+ * used as a 'lock' to control move of stripe to endio list
+ */
+ atomic_t pending; /* Pending ios in flight. */
+
+ /* Sectors to read and write for multi page stripe sets. */
+ unsigned size;
+ } io;
+
+ /* Lock on stripe (for clustering). */
+ void *lock;
+
+ /*
+ * 4 linked lists:
+ * o io list to flush io
+ * o endio list
+ * o LRU list to put stripes w/o reference count on
+ * o stripe cache hash
+ */
+ struct list_head lists[NR_LISTS];
+
+ struct {
+ unsigned short parity; /* Parity chunk index. */
+ short recover; /* Recovery chunk index. */
+ } idx;
+
+ /* This sets memory cache object (dm-mem-cache). */
+ struct dm_mem_cache_object *obj;
+
+ /* Array of stripe sets (dynamically allocated). */
+ struct stripe_set ss[0];
+};
+
+/* States stripes can be in (flags field). */
+enum stripe_states {
+ STRIPE_ACTIVE, /* Active io on stripe. */
+ STRIPE_ERROR, /* io error on stripe. */
+ STRIPE_MERGED, /* Writes got merged. */
+ STRIPE_READ, /* Read. */
+ STRIPE_RBW, /* Read-before-write. */
+ STRIPE_RECONSTRUCT, /* reconstruct of a missing chunk required. */
+ STRIPE_RECOVER, /* Stripe used for RAID set recovery. */
+};
+
+/* ... and macros to access them. */
+#define BITOPS(name, what, var, flag) \
+static inline int TestClear ## name ## what(struct var *v) \
+{ return test_and_clear_bit(flag, &v->io.flags); } \
+static inline int TestSet ## name ## what(struct var *v) \
+{ return test_and_set_bit(flag, &v->io.flags); } \
+static inline void Clear ## name ## what(struct var *v) \
+{ clear_bit(flag, &v->io.flags); } \
+static inline void Set ## name ## what(struct var *v) \
+{ set_bit(flag, &v->io.flags); } \
+static inline int name ## what(struct var *v) \
+{ return test_bit(flag, &v->io.flags); }
+
+
+BITOPS(Stripe, Active, stripe, STRIPE_ACTIVE)
+BITOPS(Stripe, Merged, stripe, STRIPE_MERGED)
+BITOPS(Stripe, Error, stripe, STRIPE_ERROR)
+BITOPS(Stripe, Read, stripe, STRIPE_READ)
+BITOPS(Stripe, RBW, stripe, STRIPE_RBW)
+BITOPS(Stripe, Reconstruct, stripe, STRIPE_RECONSTRUCT)
+BITOPS(Stripe, Recover, stripe, STRIPE_RECOVER)
+
+/* A stripe hash. */
+struct stripe_hash {
+ struct list_head *hash;
+ unsigned buckets;
+ unsigned mask;
+ unsigned prime;
+ unsigned shift;
+};
+
+/* A stripe cache. */
+struct stripe_cache {
+ /* Stripe hash. */
+ struct stripe_hash hash;
+
+ /* Stripes with io to flush, stripes to endio and LRU lists. */
+ struct list_head lists[3];
+
+ /* Locks to protect endio and lru lists. */
+ spinlock_t locks[NR_LOCKS];
+
+ /* Slab cache to allocate stripes from. */
+ struct {
+ struct kmem_cache *cache; /* Cache itself. */
+ char name[32]; /* Unique name. */
+ } kc;
+
+ struct dm_io_client *dm_io_client; /* dm-io client resource context. */
+
+ /* dm-mem-cache client resource context. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ int stripes_parm; /* # stripes parameter from constructor. */
+ atomic_t stripes; /* actual # of stripes in cache. */
+ atomic_t stripes_to_shrink; /* # of stripes to shrink cache by. */
+ atomic_t stripes_last; /* last # of stripes in cache. */
+ atomic_t active_stripes; /* actual # of active stripes in cache. */
+
+ /* REMOVEME: */
+ atomic_t max_active_stripes; /* actual # of active stripes in cache. */
+};
+
+/* Flag specs for raid_dev */ ;
+enum raid_dev_flags { DEVICE_FAILED, IO_QUEUED };
+
+/* The raid device in a set. */
+struct raid_dev {
+ struct dm_dev *dev;
+ unsigned long flags; /* raid_dev_flags. */
+ sector_t start; /* offset to map to. */
+};
+
+/* Flags spec for raid_set. */
+enum raid_set_flags {
+ RS_CHECK_OVERWRITE, /* Check for chunk overwrites. */
+ RS_DEAD, /* RAID set inoperational. */
+ RS_DEVEL_STATS, /* REMOVEME: display status information. */
+ RS_IO_ERROR, /* io error on set. */
+ RS_RECOVER, /* Do recovery. */
+ RS_RECOVERY_BANDWIDTH, /* Allow recovery bandwidth (delayed bios). */
+ RS_REGION_GET, /* get a region to recover. */
+ RS_SC_BUSY, /* stripe cache busy -> send an event. */
+ RS_SUSPENDED, /* RAID set suspendedn. */
+};
+
+/* REMOVEME: devel stats counters. */
+enum stats_types {
+ S_BIOS_READ,
+ S_BIOS_ADDED_READ,
+ S_BIOS_ENDIO_READ,
+ S_BIOS_WRITE,
+ S_BIOS_ADDED_WRITE,
+ S_BIOS_ENDIO_WRITE,
+ S_CAN_MERGE,
+ S_CANT_MERGE,
+ S_CONGESTED,
+ S_DM_IO_READ,
+ S_DM_IO_WRITE,
+ S_ACTIVE_READS,
+ S_BANDWIDTH,
+ S_BARRIER,
+ S_BIO_COPY_PL_NEXT,
+ S_DEGRADED,
+ S_DELAYED_BIOS,
+ S_EVICT,
+ S_FLUSHS,
+ S_HITS_1ST,
+ S_IOS_POST,
+ S_INSCACHE,
+ S_MAX_LOOKUP,
+ S_MERGE_PAGE_LOCKED,
+ S_NO_BANDWIDTH,
+ S_NOT_CONGESTED,
+ S_NO_RW,
+ S_NOSYNC,
+ S_PROHIBITPAGEIO,
+ S_RECONSTRUCT_EI,
+ S_RECONSTRUCT_DEV,
+ S_REDO,
+ S_REQUEUE,
+ S_STRIPE_ERROR,
+ S_SUM_DELAYED_BIOS,
+ S_XORS,
+ S_NR_STATS, /* # of stats counters. */
+};
+
+/* Status type -> string mappings. */
+struct stats_map {
+ const enum stats_types type;
+ const char *str;
+};
+
+static struct stats_map stats_map[] = {
+ { S_BIOS_READ, "r=" },
+ { S_BIOS_ADDED_READ, "/" },
+ { S_BIOS_ENDIO_READ, "/" },
+ { S_BIOS_WRITE, " w=" },
+ { S_BIOS_ADDED_WRITE, "/" },
+ { S_BIOS_ENDIO_WRITE, "/" },
+ { S_DM_IO_READ, " rc=" },
+ { S_DM_IO_WRITE, " wc=" },
+ { S_ACTIVE_READS, " active_reads=" },
+ { S_BANDWIDTH, " bandwidth=" },
+ { S_NO_BANDWIDTH, " no_bandwidth=" },
+ { S_BARRIER, " barrier=" },
+ { S_BIO_COPY_PL_NEXT, " bio_copy_pl_next=" },
+ { S_CAN_MERGE, " can_merge=" },
+ { S_MERGE_PAGE_LOCKED, "/page_locked=" },
+ { S_CANT_MERGE, "/cant_merge=" },
+ { S_CONGESTED, " congested=" },
+ { S_NOT_CONGESTED, "/not_congested=" },
+ { S_DEGRADED, " degraded=" },
+ { S_DELAYED_BIOS, " delayed_bios=" },
+ { S_SUM_DELAYED_BIOS, "/sum_delayed_bios=" },
+ { S_EVICT, " evict=" },
+ { S_FLUSHS, " flushs=" },
+ { S_HITS_1ST, " hits_1st=" },
+ { S_IOS_POST, " ios_post=" },
+ { S_INSCACHE, " inscache=" },
+ { S_MAX_LOOKUP, " max_lookup=" },
+ { S_NO_RW, " no_rw=" },
+ { S_NOSYNC, " nosync=" },
+ { S_PROHIBITPAGEIO, " ProhibitPageIO=" },
+ { S_RECONSTRUCT_EI, " reconstruct_ei=" },
+ { S_RECONSTRUCT_DEV, " reconstruct_dev=" },
+ { S_REDO, " redo=" },
+ { S_REQUEUE, " requeue=" },
+ { S_STRIPE_ERROR, " stripe_error=" },
+ { S_XORS, " xors=" },
+};
+
+/*
+ * A RAID set.
+ */
+typedef void (*xor_function_t)(unsigned count, unsigned long **data);
+struct raid_set {
+ struct dm_target *ti; /* Target pointer. */
+
+ struct {
+ unsigned long flags; /* State flags. */
+ spinlock_t in_lock; /* Protects central input list below. */
+ struct bio_list in; /* Pending ios (central input list). */
+ struct bio_list work; /* ios work set. */
+ wait_queue_head_t suspendq; /* suspend synchronization. */
+ atomic_t in_process; /* counter of queued bios (suspendq). */
+ atomic_t in_process_max;/* counter of queued bios max. */
+
+ /* io work. */
+ struct workqueue_struct *wq;
+ struct delayed_work dws;
+ } io;
+
+ /* External locking. */
+ struct dm_raid45_locking_type *locking;
+
+ struct stripe_cache sc; /* Stripe cache for this set. */
+
+ /* Xor optimization. */
+ struct {
+ struct xor_func *f;
+ unsigned chunks;
+ unsigned speed;
+ } xor;
+
+ /* Recovery parameters. */
+ struct recover {
+ struct dm_dirty_log *dl; /* Dirty log. */
+ struct dm_region_hash *rh; /* Region hash. */
+
+ /* dm-mem-cache client resource context for recovery stripes. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ struct list_head stripes; /* List of recovery stripes. */
+
+ region_t nr_regions;
+ region_t nr_regions_to_recover;
+ region_t nr_regions_recovered;
+ unsigned long start_jiffies;
+ unsigned long end_jiffies;
+
+ unsigned bandwidth; /* Recovery bandwidth [%]. */
+ unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
+ unsigned bandwidth_parm; /* " constructor parm. */
+ unsigned io_size; /* io size <= chunk size. */
+ unsigned io_size_parm; /* io size ctr parameter. */
+
+ /* recovery io throttling. */
+ atomic_t io_count[2]; /* counter recover/regular io. */
+ unsigned long last_jiffies;
+
+ struct dm_region *reg; /* Actual region to recover. */
+ sector_t pos; /* Position within region to recover. */
+ sector_t end; /* End of region to recover. */
+ } recover;
+
+ /* RAID set parameters. */
+ struct {
+ struct raid_type *raid_type; /* RAID type (eg, RAID4). */
+ unsigned raid_parms; /* # variable raid parameters. */
+
+ unsigned chunk_size; /* Sectors per chunk. */
+ unsigned chunk_size_parm;
+ unsigned chunk_mask; /* Mask for amount. */
+ unsigned chunk_shift; /* rsector chunk size shift. */
+
+ unsigned io_size; /* Sectors per io. */
+ unsigned io_size_parm;
+ unsigned io_mask; /* Mask for amount. */
+ unsigned io_shift_mask; /* Mask for raid_address(). */
+ unsigned io_shift; /* rsector io size shift. */
+ unsigned pages_per_io; /* Pages per io. */
+
+ sector_t sectors_per_dev; /* Sectors per device. */
+
+ atomic_t failed_devs; /* Amount of devices failed. */
+
+ /* Index of device to initialize. */
+ int dev_to_init;
+ int dev_to_init_parm;
+
+ /* Raid devices dynamically allocated. */
+ unsigned raid_devs; /* # of RAID devices below. */
+ unsigned data_devs; /* # of RAID data devices. */
+
+ int ei; /* index of failed RAID device. */
+
+ /* index of dedicated parity device (i.e. RAID4). */
+ int pi;
+ int pi_parm; /* constructor parm for status output. */
+ } set;
+
+ /* REMOVEME: devel stats counters. */
+ atomic_t stats[S_NR_STATS];
+
+ /* Dynamically allocated temporary pointers for xor(). */
+ unsigned long **data;
+
+ /* Dynamically allocated RAID devices. Alignment? */
+ struct raid_dev dev[0];
+};
+
+
+BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
+BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
+BITOPS(RS, Dead, raid_set, RS_DEAD)
+BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
+BITOPS(RS, IoError, raid_set, RS_IO_ERROR)
+BITOPS(RS, Recover, raid_set, RS_RECOVER)
+BITOPS(RS, RegionGet, raid_set, RS_REGION_GET)
+BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
+BITOPS(RS, Suspended, raid_set, RS_SUSPENDED)
+#undef BITOPS
+
+#define PageIO(page) PageChecked(page)
+#define AllowPageIO(page) SetPageChecked(page)
+#define ProhibitPageIO(page) ClearPageChecked(page)
+
+/*-----------------------------------------------------------------
+ * Raid-4/5 set structures.
+ *---------------------------------------------------------------*/
+/* RAID level definitions. */
+enum raid_level {
+ raid4,
+ raid5,
+};
+
+/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
+enum raid_algorithm {
+ none,
+ left_asym,
+ right_asym,
+ left_sym,
+ right_sym,
+};
+
+struct raid_type {
+ const char *name; /* RAID algorithm. */
+ const char *descr; /* Descriptor text for logging. */
+ const unsigned parity_devs; /* # of parity devices. */
+ const unsigned minimal_devs; /* minimal # of devices in set. */
+ const enum raid_level level; /* RAID level. */
+ const enum raid_algorithm algorithm; /* RAID algorithm. */
+};
+
+/* Supported raid types and properties. */
+static struct raid_type raid_types[] = {
+ {"raid4", "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
+ {"raid5_la", "RAID5 (left asymmetric)", 1, 3, raid5, left_asym},
+ {"raid5_ra", "RAID5 (right asymmetric)", 1, 3, raid5, right_asym},
+ {"raid5_ls", "RAID5 (left symmetric)", 1, 3, raid5, left_sym},
+ {"raid5_rs", "RAID5 (right symmetric)", 1, 3, raid5, right_sym},
+};
+
+/* Address as calculated by raid_address(). */
+struct address {
+ sector_t key; /* Hash key (start address of stripe). */
+ unsigned di, pi; /* Data and parity disks index. */
+};
+
+/* REMOVEME: reset statistics counters. */
+static void stats_reset(struct raid_set *rs)
+{
+ unsigned s = S_NR_STATS;
+
+ while (s--)
+ atomic_set(rs->stats + s, 0);
+}
+
+/*----------------------------------------------------------------
+ * RAID set management routines.
+ *--------------------------------------------------------------*/
+/*
+ * Begin small helper functions.
+ */
+/* Queue (optionally delayed) io work. */
+static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
+{
+ struct delayed_work *dws = &rs->io.dws;
+
+ cancel_delayed_work(dws);
+ queue_delayed_work(rs->io.wq, dws, delay);
+}
+
+/* Queue io work immediately (called from region hash too). */
+static INLINE void wake_do_raid(void *context)
+{
+ wake_do_raid_delayed(context, 0);
+}
+
+/* Wait until all io has been processed. */
+static INLINE void wait_ios(struct raid_set *rs)
+{
+ wait_event(rs->io.suspendq, !atomic_read(&rs->io.in_process));
+}
+
+/* Declare io queued to device. */
+static INLINE void io_dev_queued(struct raid_dev *dev)
+{
+ set_bit(IO_QUEUED, &dev->flags);
+}
+
+/* Io on device and reset ? */
+static inline int io_dev_clear(struct raid_dev *dev)
+{
+ return test_and_clear_bit(IO_QUEUED, &dev->flags);
+}
+
+/* Get an io reference. */
+static INLINE void io_get(struct raid_set *rs)
+{
+ int p = atomic_inc_return(&rs->io.in_process);
+
+ if (p > atomic_read(&rs->io.in_process_max))
+ atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
+}
+
+/* Put the io reference and conditionally wake io waiters. */
+static INLINE void io_put(struct raid_set *rs)
+{
+ /* Intel: rebuild data corrupter? */
+ if (!atomic_read(&rs->io.in_process)) {
+ DMERR("%s would go negative!!!", __func__);
+ return;
+ }
+
+ if (atomic_dec_and_test(&rs->io.in_process))
+ wake_up(&rs->io.suspendq);
+}
+
+/* Calculate device sector offset. */
+static INLINE sector_t _sector(struct raid_set *rs, struct bio *bio)
+{
+ sector_t sector = bio->bi_sector;
+
+ sector_div(sector, rs->set.data_devs);
+ return sector;
+}
+
+/* Test device operational. */
+static INLINE int dev_operational(struct raid_set *rs, unsigned p)
+{
+ return !test_bit(DEVICE_FAILED, &rs->dev[p].flags);
+}
+
+/* Return # of active stripes in stripe cache. */
+static INLINE int sc_active(struct stripe_cache *sc)
+{
+ return atomic_read(&sc->active_stripes);
+}
+
+/* Test io pending on stripe. */
+static INLINE int stripe_io(struct stripe *stripe)
+{
+ return atomic_read(&stripe->io.pending);
+}
+
+static INLINE void stripe_io_inc(struct stripe *stripe)
+{
+ atomic_inc(&stripe->io.pending);
+}
+
+static INLINE void stripe_io_dec(struct stripe *stripe)
+{
+ atomic_dec(&stripe->io.pending);
+}
+
+/* Wrapper needed by for_each_io_dev(). */
+static void _stripe_io_inc(struct stripe *stripe, unsigned p)
+{
+ stripe_io_inc(stripe);
+}
+
+/* Error a stripe. */
+static INLINE void stripe_error(struct stripe *stripe, struct page *page)
+{
+ SetStripeError(stripe);
+ SetPageError(page);
+ atomic_inc(RS(stripe->sc)->stats + S_STRIPE_ERROR);
+}
+
+/* Page IOed ok. */
+enum dirty_type { CLEAN, DIRTY };
+static INLINE void page_set(struct page *page, enum dirty_type type)
+{
+ switch (type) {
+ case DIRTY:
+ SetPageDirty(page);
+ AllowPageIO(page);
+ break;
+
+ case CLEAN:
+ ClearPageDirty(page);
+ break;
+
+ default:
+ BUG();
+ }
+
+ SetPageUptodate(page);
+ ClearPageError(page);
+}
+
+/* Return region state for a sector. */
+static INLINE int
+region_state(struct raid_set *rs, sector_t sector, unsigned long state)
+{
+ struct dm_region_hash *rh = rs->recover.rh;
+
+ return RSRecover(rs) ?
+ (dm_rh_get_state(rh, dm_rh_sector_to_region(rh, sector), 1) &
+ state) : 0;
+}
+
+/* Check maximum devices which may fail in a raid set. */
+static inline int raid_set_degraded(struct raid_set *rs)
+{
+ return RSIoError(rs);
+}
+
+/* Check # of devices which may fail in a raid set. */
+static INLINE int raid_set_operational(struct raid_set *rs)
+{
+ /* Too many failed devices -> BAD. */
+ return atomic_read(&rs->set.failed_devs) <=
+ rs->set.raid_type->parity_devs;
+}
+
+/*
+ * Return true in case a page_list should be read/written
+ *
+ * Conditions to read/write:
+ * o 1st page in list not uptodate
+ * o 1st page in list dirty
+ * o if we optimized io away, we flag it using the pages checked bit.
+ */
+static INLINE unsigned page_io(struct page *page)
+{
+ /* Optimization: page was flagged to need io during first run. */
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ return 1;
+ }
+
+ /* Avoid io if prohibited or a locked page. */
+ if (!PageIO(page) || PageLocked(page))
+ return 0;
+
+ if (!PageUptodate(page) || PageDirty(page)) {
+ /* Flag page needs io for second run optimization. */
+ SetPagePrivate(page);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Call a function on each page list needing io. */
+static INLINE unsigned
+for_each_io_dev(struct raid_set *rs, struct stripe *stripe,
+ void (*f_io)(struct stripe *stripe, unsigned p))
+{
+ unsigned p = rs->set.raid_devs, r = 0;
+
+ while (p--) {
+ if (page_io(PAGE(stripe, p))) {
+ f_io(stripe, p);
+ r++;
+ }
+ }
+
+ return r;
+}
+
+/* Reconstruct a particular device ?. */
+static INLINE int dev_to_init(struct raid_set *rs)
+{
+ return rs->set.dev_to_init > -1;
+}
+
+/*
+ * Index of device to calculate parity on.
+ * Either the parity device index *or* the selected device to init
+ * after a spare replacement.
+ */
+static INLINE unsigned dev_for_parity(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ return dev_to_init(rs) ? rs->set.dev_to_init : stripe->idx.parity;
+}
+
+/* Return the index of the device to be recovered. */
+static int idx_get(struct raid_set *rs)
+{
+ /* Avoid to read in the pages to be reconstructed anyway. */
+ if (dev_to_init(rs))
+ return rs->set.dev_to_init;
+ else if (rs->set.raid_type->level == raid4)
+ return rs->set.pi;
+
+ return -1;
+}
+
+/* RAID set congested function. */
+static int raid_set_congested(void *congested_data, int bdi_bits)
+{
+ struct raid_set *rs = congested_data;
+ int r = 0; /* Assume uncongested. */
+ unsigned p = rs->set.raid_devs;
+
+ /* If any of our component devices are overloaded. */
+ while (p--) {
+ struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+
+ r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ }
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
+ return r;
+}
+
+/* Display RAID set dead message once. */
+static void raid_set_dead(struct raid_set *rs)
+{
+ if (!TestSetRSDead(rs)) {
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("FATAL: too many devices failed -> RAID set dead");
+
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ if (!dev_operational(rs, p))
+ DMERR("device /dev/%s failed",
+ bdevname(rs->dev[p].dev->bdev, buf));
+ }
+ }
+}
+
+/* RAID set degrade check. */
+static INLINE int
+raid_set_check_and_degrade(struct raid_set *rs,
+ struct stripe *stripe, unsigned p)
+{
+ if (test_and_set_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ return -EPERM;
+
+ /* Through an event in case of member device errors. */
+ dm_table_event(rs->ti->table);
+ atomic_inc(&rs->set.failed_devs);
+
+ /* Only log the first member error. */
+ if (!TestSetRSIoError(rs)) {
+ char buf[BDEVNAME_SIZE];
+
+ /* Store index for recovery. */
+ mb();
+ rs->set.ei = p;
+ mb();
+
+ DMERR("CRITICAL: %sio error on device /dev/%s "
+ "in region=%llu; DEGRADING RAID set",
+ stripe ? "" : "FAKED ",
+ bdevname(rs->dev[p].dev->bdev, buf),
+ (unsigned long long) (stripe ? stripe->key : 0));
+ DMERR("further device error messages suppressed");
+ }
+
+ return 0;
+}
+
+static void
+raid_set_check_degrade(struct raid_set *rs, struct stripe *stripe)
+{
+ unsigned p = rs->set.raid_devs;
+
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ if (PageError(page)) {
+ ClearPageError(page);
+ raid_set_check_and_degrade(rs, stripe, p);
+ }
+ }
+}
+
+/* RAID set upgrade check. */
+static int raid_set_check_and_upgrade(struct raid_set *rs, unsigned p)
+{
+ if (!test_and_clear_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ return -EPERM;
+
+ if (atomic_dec_and_test(&rs->set.failed_devs)) {
+ ClearRSIoError(rs);
+ rs->set.ei = -1;
+ }
+
+ return 0;
+}
+
+/* Lookup a RAID device by name or by major:minor number. */
+union dev_lookup {
+ const char *dev_name;
+ struct raid_dev *dev;
+};
+enum lookup_type { byname, bymajmin, bynumber };
+static int raid_dev_lookup(struct raid_set *rs, enum lookup_type by,
+ union dev_lookup *dl)
+{
+ unsigned p;
+
+ /*
+ * Must be an incremental loop, because the device array
+ * can have empty slots still on calls from raid_ctr()
+ */
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ char buf[BDEVNAME_SIZE];
+ struct raid_dev *dev = rs->dev + p;
+
+ if (!dev->dev)
+ break;
+
+ /* Format dev string appropriately if necessary. */
+ if (by == byname)
+ bdevname(dev->dev->bdev, buf);
+ else if (by == bymajmin)
+ format_dev_t(buf, dev->dev->bdev->bd_dev);
+
+ /* Do the actual check. */
+ if (by == bynumber) {
+ if (dl->dev->dev->bdev->bd_dev ==
+ dev->dev->bdev->bd_dev)
+ return p;
+ } else if (!strcmp(dl->dev_name, buf))
+ return p;
+ }
+
+ return -ENODEV;
+}
+
+/* End io wrapper. */
+static INLINE void
+_bio_endio(struct raid_set *rs, struct bio *bio, int error)
+{
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (bio_data_dir(bio) == WRITE ?
+ S_BIOS_ENDIO_WRITE : S_BIOS_ENDIO_READ));
+ bio_endio(bio, error);
+ io_put(rs); /* Wake any suspend waiters. */
+}
+
+/*
+ * End small helper functions.
+ */
+
+
+/*
+ * Stripe hash functions
+ */
+/* Initialize/destroy stripe hash. */
+static int hash_init(struct stripe_hash *hash, unsigned stripes)
+{
+ unsigned buckets = 2, max_buckets = stripes / 4;
+ unsigned hash_primes[] = {
+ /* Table of primes for hash_fn/table size optimization. */
+ 3, 7, 13, 27, 53, 97, 193, 389, 769,
+ 1543, 3079, 6151, 12289, 24593,
+ };
+
+ /* Calculate number of buckets (2^^n <= stripes / 4). */
+ while (buckets < max_buckets)
+ buckets <<= 1;
+
+ /* Allocate stripe hash. */
+ hash->hash = vmalloc(buckets * sizeof(*hash->hash));
+ if (!hash->hash)
+ return -ENOMEM;
+
+ hash->buckets = buckets;
+ hash->mask = buckets - 1;
+ hash->shift = ffs(buckets);
+ if (hash->shift > ARRAY_SIZE(hash_primes) + 1)
+ hash->shift = ARRAY_SIZE(hash_primes) + 1;
+
+ BUG_ON(hash->shift - 2 > ARRAY_SIZE(hash_primes) + 1);
+ hash->prime = hash_primes[hash->shift - 2];
+
+ /* Initialize buckets. */
+ while (buckets--)
+ INIT_LIST_HEAD(hash->hash + buckets);
+
+ return 0;
+}
+
+static INLINE void hash_exit(struct stripe_hash *hash)
+{
+ if (hash->hash) {
+ vfree(hash->hash);
+ hash->hash = NULL;
+ }
+}
+
+/* List add (head/tail/locked/unlocked) inlines. */
+enum list_lock_type { LIST_LOCKED, LIST_UNLOCKED };
+#define LIST_DEL(name, list) \
+static void stripe_ ## name ## _del(struct stripe *stripe, \
+ enum list_lock_type lock) { \
+ struct list_head *lh = stripe->lists + (list); \
+ spinlock_t *l = NULL; \
+\
+ if (lock == LIST_LOCKED) { \
+ l = stripe->sc->locks + LOCK_LRU; \
+ spin_lock_irq(l); \
+ } \
+\
+\
+ if (!list_empty(lh)) \
+ list_del_init(lh); \
+\
+ if (lock == LIST_LOCKED) \
+ spin_unlock_irq(l); \
+}
+
+LIST_DEL(hash, LIST_HASH)
+LIST_DEL(lru, LIST_LRU)
+#undef LIST_DEL
+
+enum list_pos_type { POS_HEAD, POS_TAIL };
+#define LIST_ADD(name, list) \
+static void stripe_ ## name ## _add(struct stripe *stripe, \
+ enum list_pos_type pos, \
+ enum list_lock_type lock) { \
+ struct list_head *lh = stripe->lists + (list); \
+ struct stripe_cache *sc = stripe->sc; \
+ spinlock_t *l = NULL; \
+\
+ if (lock == LIST_LOCKED) { \
+ l = sc->locks + LOCK_LRU; \
+ spin_lock_irq(l); \
+ } \
+\
+ if (list_empty(lh)) { \
+ if (pos == POS_HEAD) \
+ list_add(lh, sc->lists + (list)); \
+ else \
+ list_add_tail(lh, sc->lists + (list)); \
+ } \
+\
+ if (lock == LIST_LOCKED) \
+ spin_unlock_irq(l); \
+}
+
+LIST_ADD(endio, LIST_ENDIO)
+LIST_ADD(io, LIST_IO)
+LIST_ADD(lru, LIST_LRU)
+#undef LIST_ADD
+
+#define POP(list) \
+ do { \
+ if (list_empty(sc->lists + list)) \
+ stripe = NULL; \
+ else { \
+ stripe = list_first_entry(&sc->lists[list], \
+ struct stripe, \
+ lists[list]); \
+ list_del_init(&stripe->lists[list]); \
+ } \
+ } while (0);
+
+/* Pop an available stripe off the lru list. */
+static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+ spinlock_t *lock = sc->locks + LOCK_LRU;
+
+ spin_lock_irq(lock);
+ POP(LIST_LRU);
+ spin_unlock_irq(lock);
+
+ if (stripe)
+ /* Remove from hash before reuse. */
+ stripe_hash_del(stripe, LIST_UNLOCKED);
+
+ return stripe;
+}
+
+static inline unsigned hash_fn(struct stripe_hash *hash, sector_t key)
+{
+ return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
+}
+
+static inline struct list_head *
+hash_bucket(struct stripe_hash *hash, sector_t key)
+{
+ return hash->hash + hash_fn(hash, key);
+}
+
+/* Insert an entry into a hash. */
+static inline void hash_insert(struct stripe_hash *hash, struct stripe *stripe)
+{
+ list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
+}
+
+/* Insert an entry into the stripe hash. */
+static inline void
+sc_insert(struct stripe_cache *sc, struct stripe *stripe)
+{
+ hash_insert(&sc->hash, stripe);
+}
+
+/* Lookup an entry in the stripe hash. */
+static inline struct stripe *
+stripe_lookup(struct stripe_cache *sc, sector_t key)
+{
+ unsigned c = 0;
+ struct stripe *stripe;
+ struct list_head *bucket = hash_bucket(&sc->hash, key);
+
+ list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
+ /* REMOVEME: statisics. */
+ if (++c > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
+ atomic_set(RS(sc)->stats + S_MAX_LOOKUP, c);
+
+ if (stripe->key == key)
+ return stripe;
+ }
+
+ return NULL;
+}
+
+/* Resize the stripe cache hash on size changes. */
+static int hash_resize(struct stripe_cache *sc)
+{
+ /* Resize threshold reached? */
+ if (atomic_read(&sc->stripes) > 2 * atomic_read(&sc->stripes_last)
+ || atomic_read(&sc->stripes) < atomic_read(&sc->stripes_last) / 4) {
+ int r;
+ struct stripe_hash hash, hash_tmp;
+ spinlock_t *lock;
+
+ r = hash_init(&hash, atomic_read(&sc->stripes));
+ if (r)
+ return r;
+
+ lock = sc->locks + LOCK_LRU;
+ spin_lock_irq(lock);
+ if (sc->hash.hash) {
+ unsigned b = sc->hash.buckets;
+ struct list_head *pos, *tmp;
+
+ /* Walk old buckets and insert into new. */
+ while (b--) {
+ list_for_each_safe(pos, tmp, sc->hash.hash + b)
+ hash_insert(&hash,
+ list_entry(pos, struct stripe,
+ lists[LIST_HASH]));
+ }
+
+ }
+
+ memcpy(&hash_tmp, &sc->hash, sizeof(hash_tmp));
+ memcpy(&sc->hash, &hash, sizeof(sc->hash));
+ atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
+ spin_unlock_irq(lock);
+
+ hash_exit(&hash_tmp);
+ }
+
+ return 0;
+}
+
+/*
+ * Stripe cache locking functions
+ */
+/* Dummy lock function for local RAID4+5. */
+static void *no_lock(sector_t key, enum dm_lock_type type)
+{
+ return &no_lock;
+}
+
+/* Dummy unlock function for local RAID4+5. */
+static void no_unlock(void *lock_handle)
+{
+}
+
+/* No locking (for local RAID 4+5). */
+static struct dm_raid45_locking_type locking_none = {
+ .lock = no_lock,
+ .unlock = no_unlock,
+};
+
+/* Clustered RAID 4+5. */
+/* FIXME: code this. */
+static struct dm_raid45_locking_type locking_cluster = {
+ .lock = no_lock,
+ .unlock = no_unlock,
+};
+
+/* Lock a stripe (for clustering). */
+static int
+stripe_lock(struct raid_set *rs, struct stripe *stripe, int rw, sector_t key)
+{
+ stripe->lock = rs->locking->lock(key, rw == READ ? DM_RAID45_SHARED :
+ DM_RAID45_EX);
+ return stripe->lock ? 0 : -EPERM;
+}
+
+/* Unlock a stripe (for clustering). */
+static void stripe_unlock(struct raid_set *rs, struct stripe *stripe)
+{
+ rs->locking->unlock(stripe->lock);
+ stripe->lock = NULL;
+}
+
+/*
+ * Stripe cache functions.
+ */
+/*
+ * Invalidate all page lists pages of a stripe.
+ *
+ * I only keep state for the whole list in the first page.
+ */
+static INLINE void
+stripe_pages_invalidate(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ ProhibitPageIO(page);
+ ClearPageChecked(page);
+ ClearPageDirty(page);
+ ClearPageError(page);
+ __clear_page_locked(page);
+ ClearPagePrivate(page);
+ ClearPageUptodate(page);
+ }
+}
+
+/* Prepare stripe for (re)use. */
+static INLINE void stripe_invalidate(struct stripe *stripe)
+{
+ stripe->io.flags = 0;
+ stripe_pages_invalidate(stripe);
+}
+
+/* Allow io on all chunks of a stripe. */
+static INLINE void stripe_allow_io(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ AllowPageIO(PAGE(stripe, p));
+}
+
+/* Initialize a stripe. */
+static void
+stripe_init(struct stripe_cache *sc, struct stripe *stripe)
+{
+ unsigned p = RS(sc)->set.raid_devs;
+ unsigned i;
+
+ /* Work all io chunks. */
+ while (p--) {
+ struct stripe_set *ss = stripe->ss + p;
+
+ stripe->obj[p].private = ss;
+ ss->stripe = stripe;
+
+ i = ARRAY_SIZE(ss->bl);
+ while (i--)
+ bio_list_init(ss->bl + i);
+ }
+
+ stripe->sc = sc;
+
+ i = ARRAY_SIZE(stripe->lists);
+ while (i--)
+ INIT_LIST_HEAD(stripe->lists + i);
+
+ atomic_set(&stripe->cnt, 0);
+ atomic_set(&stripe->io.pending, 0);
+
+ stripe_invalidate(stripe);
+}
+
+/* Number of pages per chunk. */
+static inline unsigned chunk_pages(unsigned io_size)
+{
+ return dm_div_up(io_size, SECTORS_PER_PAGE);
+}
+
+/* Number of pages per stripe. */
+static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
+{
+ return chunk_pages(io_size) * rs->set.raid_devs;
+}
+
+/* Initialize part of page_list (recovery). */
+static INLINE void stripe_zero_pl_part(struct stripe *stripe, unsigned p,
+ unsigned start, unsigned count)
+{
+ unsigned pages = chunk_pages(count);
+ /* Get offset into the page_list. */
+ struct page_list *pl = pl_elem(PL(stripe, p), start / SECTORS_PER_PAGE);
+
+ BUG_ON(!pl);
+ while (pl && pages--) {
+ BUG_ON(!pl->page);
+ memset(page_address(pl->page), 0, PAGE_SIZE);
+ pl = pl->next;
+ }
+}
+
+/* Initialize parity chunk of stripe. */
+static INLINE void stripe_zero_chunk(struct stripe *stripe, unsigned p)
+{
+ stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
+}
+
+/* Return dynamic stripe structure size. */
+static INLINE size_t stripe_size(struct raid_set *rs)
+{
+ return sizeof(struct stripe) +
+ rs->set.raid_devs * sizeof(struct stripe_set);
+}
+
+/* Allocate a stripe and its memory object. */
+/* XXX adjust to cope with stripe cache and recovery stripe caches. */
+enum grow { SC_GROW, SC_KEEP };
+static struct stripe *stripe_alloc(struct stripe_cache *sc,
+ struct dm_mem_cache_client *mc,
+ enum grow grow)
+{
+ int r;
+ struct stripe *stripe;
+
+ stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
+ if (stripe) {
+ /* Grow the dm-mem-cache by one object. */
+ if (grow == SC_GROW) {
+ r = dm_mem_cache_grow(mc, 1);
+ if (r)
+ goto err_free;
+ }
+
+ stripe->obj = dm_mem_cache_alloc(mc);
+ if (!stripe->obj)
+ goto err_shrink;
+
+ stripe_init(sc, stripe);
+ }
+
+ return stripe;
+
+err_shrink:
+ if (grow == SC_GROW)
+ dm_mem_cache_shrink(mc, 1);
+err_free:
+ kmem_cache_free(sc->kc.cache, stripe);
+ return NULL;
+}
+
+/*
+ * Free a stripes memory object, shrink the
+ * memory cache and free the stripe itself
+ */
+static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
+{
+ dm_mem_cache_free(mc, stripe->obj);
+ dm_mem_cache_shrink(mc, 1);
+ kmem_cache_free(stripe->sc->kc.cache, stripe);
+}
+
+/* Free the recovery stripe. */
+static void stripe_recover_free(struct raid_set *rs)
+{
+ struct recover *rec = &rs->recover;
+ struct list_head *stripes = &rec->stripes;
+
+ while (!list_empty(stripes)) {
+ struct stripe *stripe = list_first_entry(stripes, struct stripe,
+ lists[LIST_RECOVER]);
+ list_del(stripe->lists + LIST_RECOVER);
+ stripe_free(stripe, rec->mem_cache_client);
+ }
+}
+
+/* Push a stripe safely onto the endio list to be handled by do_endios(). */
+static INLINE void stripe_endio_push(struct stripe *stripe)
+{
+ int wake;
+ unsigned long flags;
+ struct stripe_cache *sc = stripe->sc;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ spin_lock_irqsave(lock, flags);
+ wake = list_empty(sc->lists + LIST_ENDIO);
+ stripe_endio_add(stripe, POS_HEAD, LIST_UNLOCKED);
+ spin_unlock_irqrestore(lock, flags);
+
+ if (wake)
+ wake_do_raid(RS(sc));
+}
+
+/* Protected check for stripe cache endio list empty. */
+static INLINE int stripe_endio_empty(struct stripe_cache *sc)
+{
+ int r;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ spin_lock_irq(lock);
+ r = list_empty(sc->lists + LIST_ENDIO);
+ spin_unlock_irq(lock);
+
+ return r;
+}
+
+/* Pop a stripe off safely off the endio list. */
+static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ /* This runs in parallel with endio(). */
+ spin_lock_irq(lock);
+ POP(LIST_ENDIO)
+ spin_unlock_irq(lock);
+ return stripe;
+}
+
+#undef POP
+
+/* Evict stripe from cache. */
+static void stripe_evict(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ stripe_hash_del(stripe, LIST_UNLOCKED); /* Take off hash. */
+
+ if (list_empty(stripe->lists + LIST_LRU)) {
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ atomic_inc(rs->stats + S_EVICT); /* REMOVEME: statistics. */
+ }
+}
+
+/* Grow stripe cache. */
+static int
+sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
+{
+ int r = 0;
+ struct raid_set *rs = RS(sc);
+
+ /* Try to allocate this many (additional) stripes. */
+ while (stripes--) {
+ struct stripe *stripe =
+ stripe_alloc(sc, sc->mem_cache_client, grow);
+
+ if (likely(stripe)) {
+ stripe->io.size = rs->set.io_size;
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ atomic_inc(&sc->stripes);
+ } else {
+ r = -ENOMEM;
+ break;
+ }
+ }
+
+ ClearRSScBusy(rs);
+ return r ? r : hash_resize(sc);
+}
+
+/* Shrink stripe cache. */
+static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
+{
+ int r = 0;
+
+ /* Try to get unused stripe from LRU list. */
+ while (stripes--) {
+ struct stripe *stripe;
+
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ /* An lru stripe may never have ios pending! */
+ BUG_ON(stripe_io(stripe));
+ stripe_free(stripe, sc->mem_cache_client);
+ atomic_dec(&sc->stripes);
+ } else {
+ r = -ENOENT;
+ break;
+ }
+ }
+
+ /* Check if stats are still sane. */
+ if (atomic_read(&sc->max_active_stripes) >
+ atomic_read(&sc->stripes))
+ atomic_set(&sc->max_active_stripes, 0);
+
+ if (r)
+ return r;
+
+ ClearRSScBusy(RS(sc));
+ return hash_resize(sc);
+}
+
+/* Create stripe cache. */
+static int sc_init(struct raid_set *rs, unsigned stripes)
+{
+ unsigned i, nr;
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+ struct recover *rec = &rs->recover;
+
+ /* Initialize lists and locks. */
+ i = ARRAY_SIZE(sc->lists);
+ while (i--)
+ INIT_LIST_HEAD(sc->lists + i);
+
+ i = NR_LOCKS;
+ while (i--)
+ spin_lock_init(sc->locks + i);
+
+ /* Initialize atomic variables. */
+ atomic_set(&sc->stripes, 0);
+ atomic_set(&sc->stripes_last, 0);
+ atomic_set(&sc->stripes_to_shrink, 0);
+ atomic_set(&sc->active_stripes, 0);
+ atomic_set(&sc->max_active_stripes, 0); /* REMOVEME: statistics. */
+
+ /*
+ * We need a runtime unique # to suffix the kmem cache name
+ * because we'll have one for each active RAID set.
+ */
+ nr = atomic_inc_return(&_stripe_sc_nr);
+ sprintf(sc->kc.name, "%s_%d", TARGET, nr);
+ sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
+ 0, 0, NULL);
+ if (!sc->kc.cache)
+ return -ENOMEM;
+
+ /* Create memory cache client context for RAID stripe cache. */
+ sc->mem_cache_client =
+ dm_mem_cache_client_create(stripes, rs->set.raid_devs,
+ chunk_pages(rs->set.io_size));
+ if (IS_ERR(sc->mem_cache_client))
+ return PTR_ERR(sc->mem_cache_client);
+
+ /* Create memory cache client context for RAID recovery stripe(s). */
+ rec->mem_cache_client =
+ dm_mem_cache_client_create(MAX_RECOVER, rs->set.raid_devs,
+ chunk_pages(rec->io_size));
+ if (IS_ERR(rec->mem_cache_client))
+ return PTR_ERR(rec->mem_cache_client);
+
+ /* Allocate stripe for set recovery. */
+ /* XXX: cope with MAX_RECOVERY. */
+ INIT_LIST_HEAD(&rec->stripes);
+ for (i = 0; i < MAX_RECOVER; i++) {
+ stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
+ if (!stripe)
+ return -ENOMEM;
+
+ SetStripeRecover(stripe);
+ stripe->io.size = rec->io_size;
+ list_add(stripe->lists + LIST_RECOVER, &rec->stripes);
+ }
+
+ /*
+ * Allocate the stripe objetcs from the
+ * cache and add them to the LRU list.
+ */
+ return sc_grow(sc, stripes, SC_KEEP);
+}
+
+/* Destroy the stripe cache. */
+static void sc_exit(struct stripe_cache *sc)
+{
+ if (sc->kc.cache) {
+ BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
+ kmem_cache_destroy(sc->kc.cache);
+ }
+
+ if (sc->mem_cache_client)
+ dm_mem_cache_client_destroy(sc->mem_cache_client);
+
+ ClearRSRecover(RS(sc));
+ stripe_recover_free(RS(sc));
+ if (RS(sc)->recover.mem_cache_client)
+ dm_mem_cache_client_destroy(RS(sc)->recover.mem_cache_client);
+
+ hash_exit(&sc->hash);
+}
+
+/*
+ * Calculate RAID address
+ *
+ * Delivers tuple with the index of the data disk holding the chunk
+ * in the set, the parity disks index and the start of the stripe
+ * within the address space of the set (used as the stripe cache hash key).
+ */
+/* thx MD. */
+static struct address *
+raid_address(struct raid_set *rs, sector_t sector, struct address *addr)
+{
+ unsigned data_devs = rs->set.data_devs, di, pi,
+ raid_devs = rs->set.raid_devs;
+ sector_t stripe, tmp;
+
+ /*
+ * chunk_number = sector / chunk_size
+ * stripe = chunk_number / data_devs
+ * di = stripe % data_devs;
+ */
+ stripe = sector >> rs->set.chunk_shift;
+ di = sector_div(stripe, data_devs);
+
+ switch (rs->set.raid_type->level) {
+ case raid5:
+ tmp = stripe;
+ pi = sector_div(tmp, raid_devs);
+
+ switch (rs->set.raid_type->algorithm) {
+ case left_asym: /* Left asymmetric. */
+ pi = data_devs - pi;
+ case right_asym: /* Right asymmetric. */
+ if (di >= pi)
+ di++;
+ break;
+
+ case left_sym: /* Left symmetric. */
+ pi = data_devs - pi;
+ case right_sym: /* Right symmetric. */
+ di = (pi + di + 1) % raid_devs;
+ break;
+
+ default:
+ DMERR("Unknown RAID algorithm %d",
+ rs->set.raid_type->algorithm);
+ goto out;
+ }
+
+ break;
+
+ case raid4:
+ pi = rs->set.pi;
+ if (di >= pi)
+ di++;
+ break;
+
+ default:
+ DMERR("Unknown RAID level %d", rs->set.raid_type->level);
+ goto out;
+ }
+
+ /*
+ * Hash key = start offset on any single device of the RAID set;
+ * adjusted in case io size differs from chunk size.
+ */
+ addr->key = (stripe << rs->set.chunk_shift) +
+ (sector & rs->set.io_shift_mask);
+ addr->di = di;
+ addr->pi = pi;
+
+out:
+ return addr;
+}
+
+/*
+ * Copy data across between stripe pages and bio vectors.
+ *
+ * Pay attention to data alignment in stripe and bio pages.
+ */
+static void
+bio_copy_page_list(int rw, struct stripe *stripe,
+ struct page_list *pl, struct bio *bio)
+{
+ unsigned i, page_offset;
+ void *page_addr;
+ struct raid_set *rs = RS(stripe->sc);
+ struct bio_vec *bv;
+
+ /* Get start page in page list for this sector. */
+ i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
+ pl = pl_elem(pl, i);
+
+ page_addr = page_address(pl->page);
+ page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
+
+ /* Walk all segments and copy data across between bio_vecs and pages. */
+ bio_for_each_segment(bv, bio, i) {
+ int len = bv->bv_len, size;
+ unsigned bio_offset = 0;
+ void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
+redo:
+ size = (page_offset + len > PAGE_SIZE) ?
+ PAGE_SIZE - page_offset : len;
+
+ if (rw == READ)
+ memcpy(bio_addr + bio_offset,
+ page_addr + page_offset, size);
+ else
+ memcpy(page_addr + page_offset,
+ bio_addr + bio_offset, size);
+
+ page_offset += size;
+ if (page_offset == PAGE_SIZE) {
+ /*
+ * We reached the end of the chunk page ->
+ * need refer to the next one to copy more data.
+ */
+ len -= size;
+ if (len) {
+ /* Get next page. */
+ pl = pl->next;
+ BUG_ON(!pl);
+ page_addr = page_address(pl->page);
+ page_offset = 0;
+ bio_offset += size;
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
+ goto redo;
+ }
+ }
+
+ __bio_kunmap_atomic(bio_addr, KM_USER0);
+ }
+}
+
+/*
+ * Xor optimization macros.
+ */
+/* Xor data pointer declaration and initialization macros. */
+#define DECLARE_2 unsigned long *d0 = data[0], *d1 = data[1]
+#define DECLARE_3 DECLARE_2, *d2 = data[2]
+#define DECLARE_4 DECLARE_3, *d3 = data[3]
+#define DECLARE_5 DECLARE_4, *d4 = data[4]
+#define DECLARE_6 DECLARE_5, *d5 = data[5]
+#define DECLARE_7 DECLARE_6, *d6 = data[6]
+#define DECLARE_8 DECLARE_7, *d7 = data[7]
+
+/* Xor unrole macros. */
+#define D2(n) d0[n] = d0[n] ^ d1[n]
+#define D3(n) D2(n) ^ d2[n]
+#define D4(n) D3(n) ^ d3[n]
+#define D5(n) D4(n) ^ d4[n]
+#define D6(n) D5(n) ^ d5[n]
+#define D7(n) D6(n) ^ d6[n]
+#define D8(n) D7(n) ^ d7[n]
+
+#define X_2(macro, offset) macro(offset); macro(offset + 1);
+#define X_4(macro, offset) X_2(macro, offset); X_2(macro, offset + 2);
+#define X_8(macro, offset) X_4(macro, offset); X_4(macro, offset + 4);
+#define X_16(macro, offset) X_8(macro, offset); X_8(macro, offset + 8);
+#define X_32(macro, offset) X_16(macro, offset); X_16(macro, offset + 16);
+#define X_64(macro, offset) X_32(macro, offset); X_32(macro, offset + 32);
+
+/* Define a _xor_#chunks_#xors_per_run() function. */
+#define _XOR(chunks, xors_per_run) \
+static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
+{ \
+ unsigned end = XOR_SIZE / sizeof(data[0]), i; \
+ DECLARE_ ## chunks; \
+\
+ for (i = 0; i < end; i += xors_per_run) { \
+ X_ ## xors_per_run(D ## chunks, i); \
+ } \
+}
+
+/* Define xor functions for 2 - 8 chunks. */
+#define MAKE_XOR_PER_RUN(xors_per_run) \
+ _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
+ _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
+ _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
+ _XOR(8, xors_per_run);
+
+MAKE_XOR_PER_RUN(8) /* Define _xor_*_8() functions. */
+MAKE_XOR_PER_RUN(16) /* Define _xor_*_16() functions. */
+MAKE_XOR_PER_RUN(32) /* Define _xor_*_32() functions. */
+MAKE_XOR_PER_RUN(64) /* Define _xor_*_64() functions. */
+
+#define MAKE_XOR(xors_per_run) \
+struct { \
+ void (*f)(unsigned long **); \
+} static xor_funcs ## xors_per_run[] = { \
+ { NULL }, \
+ { NULL }, \
+ { _xor2_ ## xors_per_run }, \
+ { _xor3_ ## xors_per_run }, \
+ { _xor4_ ## xors_per_run }, \
+ { _xor5_ ## xors_per_run }, \
+ { _xor6_ ## xors_per_run }, \
+ { _xor7_ ## xors_per_run }, \
+ { _xor8_ ## xors_per_run }, \
+}; \
+\
+static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
+{ \
+ /* Call respective function for amount of chunks. */ \
+ xor_funcs ## xors_per_run[n].f(data); \
+}
+
+/* Define xor_8() - xor_64 functions. */
+MAKE_XOR(8)
+MAKE_XOR(16)
+MAKE_XOR(32)
+MAKE_XOR(64)
+
+/* Maximum number of chunks, which can be xor'ed in one go. */
+#define XOR_CHUNKS_MAX (ARRAY_SIZE(xor_funcs8) - 1)
+
+struct xor_func {
+ xor_function_t f;
+ const char *name;
+} static xor_funcs[] = {
+ {xor_8, "xor_8"},
+ {xor_16, "xor_16"},
+ {xor_32, "xor_32"},
+ {xor_64, "xor_64"},
+};
+
+/*
+ * Calculate crc.
+ *
+ * This indexes into the page list of the stripe.
+ *
+ * All chunks will be xored into the parity chunk
+ * in maximum groups of xor.chunks.
+ *
+ * FIXME: try mapping the pages on discontiguous memory.
+ */
+static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned max_chunks = rs->xor.chunks, n, p;
+ unsigned o = sector / SECTORS_PER_PAGE; /* Offset into the page_list. */
+ unsigned long **d = rs->data;
+ xor_function_t xor_f = rs->xor.f->f;
+
+ /* Address of parity page to xor into. */
+ d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
+
+ /* Preset pointers to data pages. */
+ for (n = 1, p = rs->set.raid_devs; p--; ) {
+ if (p != pi && PageIO(PAGE(stripe, p)))
+ d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
+
+ /* If max chunks -> xor .*/
+ if (n == max_chunks) {
+ xor_f(n, d);
+ n = 1;
+ }
+ }
+
+ /* If chunks -> xor. */
+ if (n > 1)
+ xor_f(n, d);
+
+ /* Set parity page uptodate and clean. */
+ page_set(PAGE(stripe, pi), CLEAN);
+}
+
+/* Common xor loop through all stripe page lists. */
+static void common_xor(struct stripe *stripe, sector_t count,
+ unsigned off, unsigned p)
+{
+ unsigned sector;
+
+ for (sector = off; sector < count; sector += SECTORS_PER_XOR)
+ xor(stripe, p, sector);
+
+ atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
+}
+
+/*
+ * Calculate parity sectors on intact stripes.
+ *
+ * Need to calculate raid address for recover stripe, because its
+ * chunk sizes differs and is typically larger than io chunk size.
+ */
+static void parity_xor(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned chunk_size = rs->set.chunk_size,
+ io_size = stripe->io.size,
+ xor_size = chunk_size > io_size ? io_size : chunk_size;
+ sector_t off;
+
+ /* This can be the recover stripe with a larger io size. */
+ for (off = 0; off < io_size; off += xor_size) {
+ unsigned pi;
+
+ /*
+ * Recover stripe likely is bigger than regular io
+ * ones and has no precalculated parity disk index ->
+ * need to calculate RAID address.
+ */
+ if (unlikely(StripeRecover(stripe))) {
+ struct address addr;
+
+ raid_address(rs,
+ (stripe->key + off) * rs->set.data_devs,
+ &addr);
+ pi = addr.pi;
+ stripe_zero_pl_part(stripe, pi, off,
+ rs->set.chunk_size);
+ } else
+ pi = stripe->idx.parity;
+
+ common_xor(stripe, xor_size, off, pi);
+ page_set(PAGE(stripe, pi), DIRTY);
+ }
+}
+
+/* Reconstruct missing chunk. */
+static void reconstruct_xor(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int p = stripe->idx.recover;
+
+ BUG_ON(p < 0);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (raid_set_degraded(rs) ?
+ S_RECONSTRUCT_EI : S_RECONSTRUCT_DEV));
+
+ /* Zero chunk to be reconstructed. */
+ stripe_zero_chunk(stripe, p);
+ common_xor(stripe, stripe->io.size, 0, p);
+}
+
+/*
+ * Try getting a stripe either from the hash or from the lru list
+ */
+static inline void _stripe_get(struct stripe *stripe)
+{
+ atomic_inc(&stripe->cnt);
+}
+
+static struct stripe *stripe_get(struct raid_set *rs, struct address *addr)
+{
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+
+ stripe = stripe_lookup(sc, addr->key);
+ if (stripe) {
+ _stripe_get(stripe);
+ /* Remove from the lru list if on. */
+ stripe_lru_del(stripe, LIST_LOCKED);
+ atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
+ } else {
+ /* Second try to get an LRU stripe. */
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ _stripe_get(stripe);
+ /* Invalidate before reinserting with changed key. */
+ stripe_invalidate(stripe);
+ stripe->key = addr->key;
+ stripe->region = dm_rh_sector_to_region(rs->recover.rh,
+ addr->key);
+ stripe->idx.parity = addr->pi;
+ sc_insert(sc, stripe);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_INSCACHE);
+ }
+ }
+
+ return stripe;
+}
+
+/*
+ * Decrement reference count on a stripe.
+ *
+ * Move it to list of LRU stripes if zero.
+ */
+static void stripe_put(struct stripe *stripe)
+{
+ if (atomic_dec_and_test(&stripe->cnt)) {
+ if (TestClearStripeActive(stripe))
+ atomic_dec(&stripe->sc->active_stripes);
+
+ /* Put stripe onto the LRU list. */
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ }
+
+ BUG_ON(atomic_read(&stripe->cnt) < 0);
+}
+
+/*
+ * Process end io
+ *
+ * I need to do it here because I can't in interrupt
+ *
+ * Read and write functions are split in order to avoid
+ * conditionals in the main loop for performamce reasons.
+ */
+
+/* Helper read bios on a page list. */
+static void _bio_copy_page_list(struct stripe *stripe, struct page_list *pl,
+ struct bio *bio)
+{
+ bio_copy_page_list(READ, stripe, pl, bio);
+}
+
+/* Helper write bios on a page list. */
+static void _rh_dec(struct stripe *stripe, struct page_list *pl,
+ struct bio *bio)
+{
+ dm_rh_dec(RS(stripe->sc)->recover.rh, stripe->region);
+}
+
+/* End io all bios on a page list. */
+static inline int
+page_list_endio(int rw, struct stripe *stripe, unsigned p, unsigned *count)
+{
+ int r = 0;
+ struct bio_list *bl = BL(stripe, p, rw);
+
+ if (!bio_list_empty(bl)) {
+ struct page_list *pl = PL(stripe, p);
+ struct page *page = pl->page;
+
+ if (PageLocked(page))
+ r = -EBUSY;
+ /*
+ * FIXME: PageUptodate() not cleared
+ * properly for missing chunks ?
+ */
+ else if (PageUptodate(page)) {
+ struct bio *bio;
+ struct raid_set *rs = RS(stripe->sc);
+ void (*h_f)(struct stripe *, struct page_list *,
+ struct bio *) =
+ (rw == READ) ? _bio_copy_page_list : _rh_dec;
+
+ while ((bio = bio_list_pop(bl))) {
+ h_f(stripe, pl, bio);
+ _bio_endio(rs, bio, 0);
+ stripe_put(stripe);
+ if (count)
+ (*count)++;
+ }
+ } else
+ r = -EAGAIN;
+ }
+
+ return r;
+}
+
+/*
+ * End io all reads/writes on a stripe copying
+ * read date accross from stripe to bios.
+ */
+static int stripe_endio(int rw, struct stripe *stripe, unsigned *count)
+{
+ int r = 0;
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ int rr = page_list_endio(rw, stripe, p, count);
+
+ if (rr && r != -EIO)
+ r = rr;
+ }
+
+ return r;
+}
+
+/* Fail all ios on a bio list and return # of bios. */
+static unsigned
+bio_list_fail(struct raid_set *rs, struct stripe *stripe, struct bio_list *bl)
+{
+ unsigned r;
+ struct bio *bio;
+
+ raid_set_dead(rs);
+
+ /* Update region counters. */
+ if (stripe) {
+ struct dm_region_hash *rh = rs->recover.rh;
+
+ bio_list_for_each(bio, bl) {
+ if (bio_data_dir(bio) == WRITE)
+ dm_rh_dec(rh, stripe->region);
+ }
+ }
+
+ /* Error end io all bios. */
+ for (r = 0; (bio = bio_list_pop(bl)); r++)
+ _bio_endio(rs, bio, -EIO);
+
+ return r;
+}
+
+/* Fail all ios of a bio list of a stripe and drop io pending count. */
+static void
+stripe_bio_list_fail(struct raid_set *rs, struct stripe *stripe,
+ struct bio_list *bl)
+{
+ unsigned put = bio_list_fail(rs, stripe, bl);
+
+ while (put--)
+ stripe_put(stripe);
+}
+
+/* Fail all ios hanging off all bio lists of a stripe. */
+static void stripe_fail_io(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p = rs->set.raid_devs;
+
+ stripe_evict(stripe);
+
+ while (p--) {
+ struct stripe_set *ss = stripe->ss + p;
+ int i = ARRAY_SIZE(ss->bl);
+
+ while (i--)
+ stripe_bio_list_fail(rs, stripe, ss->bl + i);
+ }
+}
+
+/*
+ * Handle all stripes by handing them to the daemon, because we can't
+ * map their pages to copy the data in interrupt context.
+ *
+ * We don't want to handle them here either, while interrupts are disabled.
+ */
+
+/* Read/write endio function for dm-io (interrupt context). */
+static void endio(unsigned long error, void *context)
+{
+ struct dm_mem_cache_object *obj = context;
+ struct stripe_set *ss = obj->private;
+ struct stripe *stripe = ss->stripe;
+ struct page *page = obj->pl->page;
+
+ if (unlikely(error))
+ stripe_error(stripe, page);
+ else
+ page_set(page, CLEAN);
+
+ __clear_page_locked(page);
+ stripe_io_dec(stripe);
+
+ /* Add stripe to endio list and wake daemon. */
+ stripe_endio_push(stripe);
+}
+
+/*
+ * Recovery io throttling
+ */
+/* Conditionally reset io counters. */
+enum count_type { IO_WORK = 0, IO_RECOVER };
+static int recover_io_reset(struct raid_set *rs)
+{
+ unsigned long j = jiffies;
+
+ /* Pay attention to jiffies overflows. */
+ if (j > rs->recover.last_jiffies + HZ
+ || j < rs->recover.last_jiffies) {
+ rs->recover.last_jiffies = j;
+ atomic_set(rs->recover.io_count + IO_WORK, 0);
+ atomic_set(rs->recover.io_count + IO_RECOVER, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Count ios. */
+static INLINE void
+recover_io_count(struct raid_set *rs, struct stripe *stripe)
+{
+ if (RSRecover(rs)) {
+ recover_io_reset(rs);
+ atomic_inc(rs->recover.io_count +
+ (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
+ }
+}
+
+/* Read/Write a page_list asynchronously. */
+static void page_list_rw(struct stripe *stripe, unsigned p)
+{
+ struct stripe_cache *sc = stripe->sc;
+ struct raid_set *rs = RS(sc);
+ struct dm_mem_cache_object *obj = stripe->obj + p;
+ struct page_list *pl = obj->pl;
+ struct page *page = pl->page;
+ struct raid_dev *dev = rs->dev + p;
+ struct dm_io_region io = {
+ .bdev = dev->dev->bdev,
+ .sector = stripe->key,
+ .count = stripe->io.size,
+ };
+ struct dm_io_request control = {
+ .bi_rw = PageDirty(page) ? WRITE : READ,
+ .mem.type = DM_IO_PAGE_LIST,
+ .mem.ptr.pl = pl,
+ .mem.offset = 0,
+ .notify.fn = endio,
+ .notify.context = obj,
+ .client = sc->dm_io_client,
+ };
+
+ BUG_ON(PageLocked(page));
+
+ /*
+ * Don't rw past end of device, which can happen, because
+ * typically sectors_per_dev isn't divisable by io_size.
+ */
+ if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
+ io.count = rs->set.sectors_per_dev - io.sector;
+
+ io.sector += dev->start; /* Add <offset>. */
+ recover_io_count(rs, stripe); /* Recovery io accounting. */
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats +
+ (PageDirty(page) ? S_DM_IO_WRITE : S_DM_IO_READ));
+
+ ClearPageError(page);
+ __set_page_locked(page);
+ io_dev_queued(dev);
+ BUG_ON(dm_io(&control, 1, &io, NULL));
+}
+
+/*
+ * Write dirty / read not uptodate page lists of a stripe.
+ */
+static unsigned stripe_page_lists_rw(struct raid_set *rs, struct stripe *stripe)
+{
+ unsigned r;
+
+ /*
+ * Increment the pending count on the stripe
+ * first, so that we don't race in endio().
+ *
+ * An inc (IO) is needed for any page:
+ *
+ * o not uptodate
+ * o dirtied by writes merged
+ * o dirtied by parity calculations
+ */
+ r = for_each_io_dev(rs, stripe, _stripe_io_inc);
+ if (r) {
+ /* io needed: chunks are not uptodate/dirty. */
+ int max; /* REMOVEME: */
+ struct stripe_cache *sc = &rs->sc;
+
+ if (!TestSetStripeActive(stripe))
+ atomic_inc(&sc->active_stripes);
+
+ /* Take off the lru list in case it got added there. */
+ stripe_lru_del(stripe, LIST_LOCKED);
+
+ /* Submit actual io. */
+ for_each_io_dev(rs, stripe, page_list_rw);
+
+ /* REMOVEME: statistics */
+ max = sc_active(sc);
+ if (atomic_read(&sc->max_active_stripes) < max)
+ atomic_set(&sc->max_active_stripes, max);
+
+ atomic_inc(rs->stats + S_FLUSHS);
+ /* END REMOVEME: statistics */
+ }
+
+ return r;
+}
+
+/* Work in all pending writes. */
+static INLINE void _writes_merge(struct stripe *stripe, unsigned p)
+{
+ struct bio_list *write = BL(stripe, p, WRITE);
+
+ if (!bio_list_empty(write)) {
+ struct page_list *pl = stripe->obj[p].pl;
+ struct bio *bio;
+ struct bio_list *write_merged = BL(stripe, p, WRITE_MERGED);
+
+ /*
+ * We can play with the lists without holding a lock,
+ * because it is just us accessing them anyway.
+ */
+ bio_list_for_each(bio, write)
+ bio_copy_page_list(WRITE, stripe, pl, bio);
+
+ bio_list_merge(write_merged, write);
+ bio_list_init(write);
+ page_set(pl->page, DIRTY);
+ }
+}
+
+/* Merge in all writes hence dirtying respective pages. */
+static INLINE void writes_merge(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ _writes_merge(stripe, p);
+}
+
+/* Check, if a chunk gets completely overwritten. */
+static INLINE int stripe_check_overwrite(struct stripe *stripe, unsigned p)
+{
+ unsigned sectors = 0;
+ struct bio *bio;
+ struct bio_list *bl = BL(stripe, p, WRITE);
+
+ bio_list_for_each(bio, bl)
+ sectors += bio_sectors(bio);
+
+ return sectors == RS(stripe->sc)->set.io_size;
+}
+
+/*
+ * Prepare stripe to avoid io on broken/reconstructed
+ * drive in order to reconstruct date on endio.
+ */
+enum prepare_type { IO_ALLOW, IO_PROHIBIT };
+static void stripe_prepare(struct stripe *stripe, unsigned p,
+ enum prepare_type type)
+{
+ struct page *page = PAGE(stripe, p);
+
+ switch (type) {
+ case IO_PROHIBIT:
+ /*
+ * In case we prohibit, we gotta make sure, that
+ * io on all other chunks than the one which failed
+ * or is being reconstructed is allowed and that it
+ * doesn't have state uptodate.
+ */
+ stripe_allow_io(stripe);
+ ClearPageUptodate(page);
+ ProhibitPageIO(page);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats + S_PROHIBITPAGEIO);
+ stripe->idx.recover = p;
+ SetStripeReconstruct(stripe);
+ break;
+
+ case IO_ALLOW:
+ AllowPageIO(page);
+ stripe->idx.recover = -1;
+ ClearStripeReconstruct(stripe);
+ break;
+
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Degraded/reconstruction mode.
+ *
+ * Check stripe state to figure which chunks don't need IO.
+ */
+static INLINE void stripe_check_reconstruct(struct stripe *stripe,
+ int prohibited)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ /*
+ * Degraded mode (device(s) failed) ->
+ * avoid io on the failed device.
+ */
+ if (unlikely(raid_set_degraded(rs))) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_DEGRADED);
+ stripe_prepare(stripe, rs->set.ei, IO_PROHIBIT);
+ return;
+ } else {
+ /*
+ * Reconstruction mode (ie. a particular device or
+ * some (rotating) parity chunk is being resynchronized) ->
+ * o make sure all needed pages are read in
+ * o writes are allowed to go through
+ */
+ int r = region_state(rs, stripe->key, DM_RH_NOSYNC);
+
+ if (r) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NOSYNC);
+ stripe_prepare(stripe, dev_for_parity(stripe),
+ IO_PROHIBIT);
+ return;
+ }
+ }
+
+ /*
+ * All disks good. Avoid reading parity chunk and reconstruct it
+ * unless we have prohibited io to chunk(s).
+ */
+ if (!prohibited) {
+ if (StripeMerged(stripe))
+ stripe_prepare(stripe, stripe->idx.parity, IO_ALLOW);
+ else {
+ stripe_prepare(stripe, stripe->idx.parity, IO_PROHIBIT);
+
+ /*
+ * Overrule stripe_prepare to reconstruct the
+ * parity chunk, because it'll be created new anyway.
+ */
+ ClearStripeReconstruct(stripe);
+ }
+ }
+}
+
+/* Check, if stripe is ready to merge writes. */
+static INLINE int stripe_check_merge(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int prohibited = 0;
+ unsigned chunks = 0, p = rs->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ /* Can't merge active chunks. */
+ if (PageLocked(page)) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_MERGE_PAGE_LOCKED);
+ break;
+ }
+
+ /* Can merge uptodate chunks and have to count parity chunk. */
+ if (PageUptodate(page) || p == stripe->idx.parity) {
+ chunks++;
+ continue;
+ }
+
+ /* Read before write ordering. */
+ if (RSCheckOverwrite(rs) &&
+ bio_list_empty(BL(stripe, p, READ))) {
+ int r = stripe_check_overwrite(stripe, p);
+
+ if (r) {
+ chunks++;
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats +
+ S_PROHIBITPAGEIO);
+ ProhibitPageIO(page);
+ prohibited = 1;
+ }
+ }
+ }
+
+ if (chunks == rs->set.raid_devs) {
+ /* All pages are uptodate or get written over or mixture. */
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_CAN_MERGE);
+ return 0;
+ } else
+ /* REMOVEME: statistics.*/
+ atomic_inc(rs->stats + S_CANT_MERGE);
+
+ return prohibited ? 1 : -EPERM;
+}
+
+/* Check, if stripe is ready to merge writes. */
+static INLINE int stripe_check_read(struct stripe *stripe)
+{
+ int r = 0;
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ if (!PageLocked(page) &&
+ bio_list_empty(BL(stripe, p, READ))) {
+ ProhibitPageIO(page);
+ r = 1;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Read/write a stripe.
+ *
+ * All stripe read/write activity goes through this function.
+ *
+ * States to cover:
+ * o stripe to read and/or write
+ * o stripe with error to reconstruct
+ */
+static int stripe_rw(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int prohibited = 0, r;
+
+ /*
+ * Check the state of the RAID set and if degraded (or
+ * resynchronizing for reads), read in all other chunks but
+ * the one on the dead/resynchronizing device in order to be
+ * able to reconstruct the missing one.
+ *
+ * Merge all writes hanging off uptodate pages of the stripe.
+ */
+
+ /* Initially allow io on all chunks and prohibit below, if necessary. */
+ stripe_allow_io(stripe);
+
+ if (StripeRBW(stripe)) {
+ r = stripe_check_merge(stripe);
+ if (!r) {
+ /*
+ * If I could rely on valid parity (which would only
+ * be sure in case of a full synchronization),
+ * I could xor a fraction of chunks out of
+ * parity and back in.
+ *
+ * For the time being, I got to redo parity...
+ */
+ /* parity_xor(stripe); */ /* Xor chunks out. */
+ stripe_zero_chunk(stripe, stripe->idx.parity);
+ writes_merge(stripe); /* Merge writes in. */
+ parity_xor(stripe); /* Update parity. */
+ ClearStripeRBW(stripe); /* Disable RBW. */
+ SetStripeMerged(stripe); /* Writes merged. */
+ }
+
+ if (r > 0)
+ prohibited = 1;
+ } else if (!raid_set_degraded(rs))
+ /* Only allow for read avoidance if not degraded. */
+ prohibited = stripe_check_read(stripe);
+
+ /*
+ * Check, if io needs to be allowed/prohibeted on certain chunks
+ * because of a degraded set or reconstruction on a region.
+ */
+ stripe_check_reconstruct(stripe, prohibited);
+
+ /* Now submit any reads/writes. */
+ r = stripe_page_lists_rw(rs, stripe);
+ if (!r) {
+ /*
+ * No io submitted because of chunk io prohibited or
+ * locked pages -> push to end io list for processing.
+ */
+ atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
+ stripe_endio_push(stripe);
+ wake_do_raid(rs); /* Wake myself. */
+ }
+
+ return 0;
+}
+
+/* Flush stripe either via flush list or imeediately. */
+enum flush_type { FLUSH_DELAY, FLUSH_NOW };
+static int stripe_flush(struct stripe *stripe, enum flush_type type)
+{
+ int r = 0;
+
+ stripe_lru_del(stripe, LIST_LOCKED);
+
+ /* Immediately flush. */
+ if (type == FLUSH_NOW) {
+ if (likely(raid_set_operational(RS(stripe->sc))))
+ r = stripe_rw(stripe); /* Read/write stripe. */
+ else
+ /* Optimization: Fail early on failed sets. */
+ stripe_fail_io(stripe);
+ /* Delay flush by putting it on io list for later processing. */
+ } else if (type == FLUSH_DELAY)
+ stripe_io_add(stripe, POS_TAIL, LIST_UNLOCKED);
+ else
+ BUG();
+
+ return r;
+}
+
+/*
+ * Queue reads and writes to a stripe by hanging
+ * their bios off the stripsets read/write lists.
+ *
+ * Endio reads on uptodate chunks.
+ */
+static INLINE int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
+ struct bio_list *reject)
+{
+ int r = 0;
+ struct address addr;
+ struct stripe *stripe =
+ stripe_get(rs, raid_address(rs, bio->bi_sector, &addr));
+
+ if (stripe) {
+ int rr, rw = bio_data_dir(bio);
+
+ rr = stripe_lock(rs, stripe, rw, addr.key); /* Lock stripe */
+ if (rr) {
+ stripe_put(stripe);
+ goto out;
+ }
+
+ /* Distinguish read and write cases. */
+ bio_list_add(BL(stripe, addr.di, rw), bio);
+
+ /* REMOVEME: statistics */
+ atomic_inc(rs->stats + (rw == WRITE ?
+ S_BIOS_ADDED_WRITE : S_BIOS_ADDED_READ));
+
+ if (rw == READ)
+ SetStripeRead(stripe);
+ else {
+ SetStripeRBW(stripe);
+
+ /* Inrement pending write count on region. */
+ dm_rh_inc(rs->recover.rh, stripe->region);
+ r = 1; /* Region hash needs a flush. */
+ }
+
+ /*
+ * Optimize stripe flushing:
+ *
+ * o directly start io for read stripes.
+ *
+ * o put stripe onto stripe caches io_list for RBW,
+ * so that do_flush() can belabour it after we put
+ * more bios to the stripe for overwrite optimization.
+ */
+ stripe_flush(stripe,
+ StripeRead(stripe) ? FLUSH_NOW : FLUSH_DELAY);
+
+ /* Got no stripe from cache -> reject bio. */
+ } else {
+out:
+ bio_list_add(reject, bio);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_IOS_POST);
+ }
+
+ return r;
+}
+
+/*
+ * Recovery functions
+ */
+/* Read a stripe off a raid set for recovery. */
+static int recover_read(struct raid_set *rs, struct stripe *stripe, int idx)
+{
+ /* Invalidate all pages so that they get read in. */
+ stripe_pages_invalidate(stripe);
+
+ /* Allow io on all recovery chunks. */
+ stripe_allow_io(stripe);
+
+ if (idx > -1)
+ ProhibitPageIO(PAGE(stripe, idx));
+
+ stripe->key = rs->recover.pos;
+ return stripe_page_lists_rw(rs, stripe);
+}
+
+/* Write a stripe to a raid set for recovery. */
+static int recover_write(struct raid_set *rs, struct stripe *stripe, int idx)
+{
+ /*
+ * If this is a reconstruct of a particular device, then
+ * reconstruct the respective page(s), else create parity page(s).
+ */
+ if (idx > -1) {
+ struct page *page = PAGE(stripe, idx);
+
+ AllowPageIO(page);
+ stripe_zero_chunk(stripe, idx);
+ common_xor(stripe, stripe->io.size, 0, idx);
+ page_set(page, DIRTY);
+ } else
+ parity_xor(stripe);
+
+ return stripe_page_lists_rw(rs, stripe);
+}
+
+/* Recover bandwidth available ?. */
+static int recover_bandwidth(struct raid_set *rs)
+{
+ int r, work;
+
+ /* On reset -> allow recovery. */
+ r = recover_io_reset(rs);
+ if (r || RSBandwidth(rs))
+ goto out;
+
+ work = atomic_read(rs->recover.io_count + IO_WORK);
+ if (work) {
+ /* Pay attention to larger recover stripe size. */
+ int recover =
+ atomic_read(rs->recover.io_count + IO_RECOVER) *
+ rs->recover.io_size /
+ rs->set.io_size;
+
+ /*
+ * Don't use more than given bandwidth of
+ * the work io for recovery.
+ */
+ if (recover > work / rs->recover.bandwidth_work) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NO_BANDWIDTH);
+ return 0;
+ }
+ }
+
+out:
+ atomic_inc(rs->stats + S_BANDWIDTH); /* REMOVEME: statistics. */
+ return 1;
+}
+
+/* Try to get a region to recover. */
+static int recover_get_region(struct raid_set *rs)
+{
+ struct recover *rec = &rs->recover;
+ struct dm_region_hash *rh = rec->rh;
+
+ /* Start quiescing some regions. */
+ if (!RSRegionGet(rs)) {
+ int r = recover_bandwidth(rs); /* Enough bandwidth ?. */
+
+ if (r) {
+ r = dm_rh_recovery_prepare(rh);
+ if (r < 0) {
+ DMINFO("No %sregions to recover",
+ rec->nr_regions_to_recover ?
+ "more " : "");
+ return -ENOENT;
+ }
+ } else
+ return -EAGAIN;
+
+ SetRSRegionGet(rs);
+ }
+
+ if (!rec->reg) {
+ rec->reg = dm_rh_recovery_start(rh);
+ if (rec->reg) {
+ /*
+ * A reference for the the region I'll
+ * keep till I've completely synced it.
+ */
+ io_get(rs);
+ rec->pos = dm_rh_region_to_sector(rh,
+ dm_rh_get_region_key(rec->reg));
+ rec->end = rec->pos + dm_rh_get_region_size(rh);
+ return 1;
+ } else
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Read/write a recovery stripe. */
+static INLINE int recover_stripe_rw(struct raid_set *rs, struct stripe *stripe)
+{
+ /* Read/write flip-flop. */
+ if (TestClearStripeRBW(stripe)) {
+ SetStripeRead(stripe);
+ return recover_read(rs, stripe, idx_get(rs));
+ } else if (TestClearStripeRead(stripe))
+ return recover_write(rs, stripe, idx_get(rs));
+
+ return 0;
+}
+
+/* Reset recovery variables. */
+static void recovery_region_reset(struct raid_set *rs)
+{
+ rs->recover.reg = NULL;
+ ClearRSRegionGet(rs);
+}
+
+/* Update region hash state. */
+static void recover_rh_update(struct raid_set *rs, int error)
+{
+ struct recover *rec = &rs->recover;
+ struct dm_region *reg = rec->reg;
+
+ if (reg) {
+ dm_rh_recovery_end(reg, error);
+ if (!error)
+ rec->nr_regions_recovered++;
+
+ recovery_region_reset(rs);
+ }
+
+ dm_rh_update_states(reg->rh, 1);
+ dm_rh_flush(reg->rh);
+ io_put(rs); /* Release the io reference for the region. */
+}
+
+/* Called by main io daemon to recover regions. */
+/* FIXME: cope with MAX_RECOVER > 1. */
+static INLINE void _do_recovery(struct raid_set *rs, struct stripe *stripe)
+{
+ int r;
+ struct recover *rec = &rs->recover;
+
+ /* If recovery is active -> return. */
+ if (StripeActive(stripe))
+ return;
+
+ /* io error is fatal for recovery -> stop it. */
+ if (unlikely(StripeError(stripe)))
+ goto err;
+
+ /* Get a region to recover. */
+ r = recover_get_region(rs);
+ switch (r) {
+ case 1: /* Got a new region. */
+ /* Flag read before write. */
+ ClearStripeRead(stripe);
+ SetStripeRBW(stripe);
+ break;
+
+ case 0:
+ /* Got a region in the works. */
+ r = recover_bandwidth(rs);
+ if (r) /* Got enough bandwidth. */
+ break;
+
+ case -EAGAIN:
+ /* No bandwidth/quiesced region yet, try later. */
+ wake_do_raid_delayed(rs, HZ / 10);
+ return;
+
+ case -ENOENT: /* No more regions. */
+ dm_table_event(rs->ti->table);
+ goto free;
+ }
+
+ /* Read/write a recover stripe. */
+ r = recover_stripe_rw(rs, stripe);
+ if (r) {
+ /* IO initiated, get another reference for the IO. */
+ io_get(rs);
+ return;
+ }
+
+ /* Update recovery position within region. */
+ rec->pos += stripe->io.size;
+
+ /* If we're at end of region, update region hash. */
+ if (rec->pos >= rec->end ||
+ rec->pos >= rs->set.sectors_per_dev)
+ recover_rh_update(rs, 0);
+ else
+ SetStripeRBW(stripe);
+
+ /* Schedule myself for another round... */
+ wake_do_raid(rs);
+ return;
+
+err:
+ raid_set_check_degrade(rs, stripe);
+
+ {
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("stopping recovery due to "
+ "ERROR on /dev/%s, stripe at offset %llu",
+ bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
+ (unsigned long long) stripe->key);
+
+ }
+
+ /* Make sure, that all quiesced regions get released. */
+ do {
+ if (rec->reg)
+ dm_rh_recovery_end(rec->reg, -EIO);
+
+ rec->reg = dm_rh_recovery_start(rec->rh);
+ } while (rec->reg);
+
+ recover_rh_update(rs, -EIO);
+free:
+ rs->set.dev_to_init = -1;
+
+ /* Check for jiffies overrun. */
+ rs->recover.end_jiffies = jiffies;
+ if (rs->recover.end_jiffies < rs->recover.start_jiffies)
+ rs->recover.end_jiffies = ~0;
+
+ ClearRSRecover(rs);
+}
+
+static INLINE void do_recovery(struct raid_set *rs)
+{
+ struct stripe *stripe;
+
+ list_for_each_entry(stripe, &rs->recover.stripes, lists[LIST_RECOVER])
+ _do_recovery(rs, stripe);
+
+ if (!RSRecover(rs))
+ stripe_recover_free(rs);
+}
+
+/*
+ * END recovery functions
+ */
+
+/* End io process all stripes handed in by endio() callback. */
+static void do_endios(struct raid_set *rs)
+{
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+
+ while ((stripe = stripe_endio_pop(sc))) {
+ unsigned count;
+
+ /* Recovery stripe special case. */
+ if (unlikely(StripeRecover(stripe))) {
+ if (stripe_io(stripe))
+ continue;
+
+ io_put(rs); /* Release region io reference. */
+ ClearStripeActive(stripe);
+
+ /* REMOVEME: statistics*/
+ atomic_dec(&sc->active_stripes);
+ continue;
+ }
+
+ /* Early end io all reads on any uptodate chunks. */
+ stripe_endio(READ, stripe, (count = 0, &count));
+ if (stripe_io(stripe)) {
+ if (count) /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_ACTIVE_READS);
+
+ continue;
+ }
+
+ /* Set stripe inactive after all io got processed. */
+ if (TestClearStripeActive(stripe))
+ atomic_dec(&sc->active_stripes);
+
+ /* Unlock stripe (for clustering). */
+ stripe_unlock(rs, stripe);
+
+ /*
+ * If an io error on a stripe occured and the RAID set
+ * is still operational, requeue the stripe for io.
+ */
+ if (TestClearStripeError(stripe)) {
+ raid_set_check_degrade(rs, stripe);
+ ClearStripeReconstruct(stripe);
+
+ if (!StripeMerged(stripe) &&
+ raid_set_operational(rs)) {
+ stripe_pages_invalidate(stripe);
+ stripe_flush(stripe, FLUSH_DELAY);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_REQUEUE);
+ continue;
+ }
+ }
+
+ /* Check if the RAID set is inoperational to error ios. */
+ if (!raid_set_operational(rs)) {
+ ClearStripeReconstruct(stripe);
+ stripe_fail_io(stripe);
+ BUG_ON(atomic_read(&stripe->cnt));
+ continue;
+ }
+
+ /* Got to reconstruct a missing chunk. */
+ if (TestClearStripeReconstruct(stripe))
+ reconstruct_xor(stripe);
+
+ /*
+ * Now that we've got a complete stripe, we can
+ * process the rest of the end ios on reads.
+ */
+ BUG_ON(stripe_endio(READ, stripe, NULL));
+ ClearStripeRead(stripe);
+
+ /*
+ * Read-before-write stripes need to be flushed again in
+ * order to work the write data into the pages *after*
+ * they were read in.
+ */
+ if (TestClearStripeMerged(stripe))
+ /* End io all bios which got merged already. */
+ BUG_ON(stripe_endio(WRITE_MERGED, stripe, NULL));
+
+ /* Got to put on flush list because of new writes. */
+ if (StripeRBW(stripe))
+ stripe_flush(stripe, FLUSH_DELAY);
+ }
+}
+
+/*
+ * Stripe cache shrinking.
+ */
+static INLINE void do_sc_shrink(struct raid_set *rs)
+{
+ unsigned shrink = atomic_read(&rs->sc.stripes_to_shrink);
+
+ if (shrink) {
+ unsigned cur = atomic_read(&rs->sc.stripes);
+
+ sc_shrink(&rs->sc, shrink);
+ shrink -= cur - atomic_read(&rs->sc.stripes);
+ atomic_set(&rs->sc.stripes_to_shrink, shrink);
+
+ /*
+ * Wake myself up in case we failed to shrink the
+ * requested amount in order to try again later.
+ */
+ if (shrink)
+ wake_do_raid(rs);
+ }
+}
+
+
+/*
+ * Process all ios
+ *
+ * We do different things with the io depending on the
+ * state of the region that it's in:
+ *
+ * o reads: hang off stripe cache or postpone if full
+ *
+ * o writes:
+ *
+ * CLEAN/DIRTY/NOSYNC: increment pending and hang io off stripe's stripe set.
+ * In case stripe cache is full or busy, postpone the io.
+ *
+ * RECOVERING: delay the io until recovery of the region completes.
+ *
+ */
+static INLINE void do_ios(struct raid_set *rs, struct bio_list *ios)
+{
+ int r;
+ unsigned flush = 0;
+ struct dm_region_hash *rh = rs->recover.rh;
+ struct bio *bio;
+ struct bio_list delay, reject;
+
+ bio_list_init(&delay);
+ bio_list_init(&reject);
+
+ /*
+ * Classify each io:
+ * o delay to recovering regions
+ * o queue to all other regions
+ */
+ while ((bio = bio_list_pop(ios))) {
+ /*
+ * In case we get a barrier bio, push it back onto
+ * the input queue unless all work queues are empty
+ * and the stripe cache is inactive.
+ */
- if (unlikely(bio_barrier(bio))) {
++ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BARRIER);
+ if (!list_empty(rs->sc.lists + LIST_IO) ||
+ !bio_list_empty(&delay) ||
+ !bio_list_empty(&reject) ||
+ sc_active(&rs->sc)) {
+ bio_list_push(ios, bio);
+ break;
+ }
+ }
+
+ r = region_state(rs, _sector(rs, bio), DM_RH_RECOVERING);
+ if (unlikely(r)) {
+ /* Got to wait for recovering regions. */
+ bio_list_add(&delay, bio);
+ SetRSBandwidth(rs);
+ } else {
+ /*
+ * Process ios to non-recovering regions by queueing
+ * them to stripes (does rh_inc()) for writes).
+ */
+ flush += stripe_queue_bio(rs, bio, &reject);
+ }
+ }
+
+ if (flush) {
+ r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
+ if (r)
+ DMERR("dirty log flush");
+ }
+
+ /* Delay ios to regions which are recovering. */
+ while ((bio = bio_list_pop(&delay))) {
+ /* REMOVEME: statistics.*/
+ atomic_inc(rs->stats + S_DELAYED_BIOS);
+ atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
+ dm_rh_delay(rh, bio);
+
+ }
+
+ /* Merge any rejected bios back to the head of the input list. */
+ bio_list_merge_head(ios, &reject);
+}
+
+/* Flush any stripes on the io list. */
+static INLINE void do_flush(struct raid_set *rs)
+{
+ struct list_head *list = rs->sc.lists + LIST_IO, *pos, *tmp;
+
+ list_for_each_safe(pos, tmp, list) {
+ int r = stripe_flush(list_entry(pos, struct stripe,
+ lists[LIST_IO]), FLUSH_NOW);
+
+ /* Remove from the list only if the stripe got processed. */
+ if (!r)
+ list_del_init(pos);
+ }
+}
+
+/* Send an event in case we're getting too busy. */
+static INLINE void do_busy_event(struct raid_set *rs)
+{
+ if ((sc_active(&rs->sc) > atomic_read(&rs->sc.stripes) * 4 / 5)) {
+ if (!TestSetRSScBusy(rs))
+ dm_table_event(rs->ti->table);
+ } else
+ ClearRSScBusy(rs);
+}
+
+/* Unplug: let the io role on the sets devices. */
+static INLINE void do_unplug(struct raid_set *rs)
+{
+ struct raid_dev *dev = rs->dev + rs->set.raid_devs;
+
+ while (dev-- > rs->dev) {
+ /* Only call any device unplug function, if io got queued. */
+ if (io_dev_clear(dev))
+ blk_unplug(bdev_get_queue(dev->dev->bdev));
+ }
+}
+
+/*-----------------------------------------------------------------
+ * RAID daemon
+ *---------------------------------------------------------------*/
+/*
+ * o belabour all end ios
+ * o optionally shrink the stripe cache
+ * o update the region hash states
+ * o optionally do recovery
+ * o grab the input queue
+ * o work an all requeued or new ios and perform stripe cache flushs
+ * unless the RAID set is inoperational (when we error ios)
+ * o check, if the stripe cache gets too busy and throw an event if so
+ * o unplug any component raid devices with queued bios
+ */
+static void do_raid(struct work_struct *ws)
+{
+ struct raid_set *rs = container_of(ws, struct raid_set, io.dws.work);
+ struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
+ spinlock_t *lock = &rs->io.in_lock;
+
+ /*
+ * We always need to end io, so that ios
+ * can get errored in case the set failed
+ * and the region counters get decremented
+ * before we update the region hash states.
+ */
+redo:
+ do_endios(rs);
+
+ /*
+ * Now that we've end io'd, which may have put stripes on
+ * the LRU list, we shrink the stripe cache if requested.
+ */
+ do_sc_shrink(rs);
+
+ /* Update region hash states before we go any further. */
+ dm_rh_update_states(rs->recover.rh, 1);
+
+ /* Try to recover regions. */
+ if (RSRecover(rs))
+ do_recovery(rs);
+
+ /* More endios -> process. */
+ if (!stripe_endio_empty(&rs->sc)) {
+ atomic_inc(rs->stats + S_REDO);
+ goto redo;
+ }
+
+ /* Quickly grab all new ios queued and add them to the work list. */
+ spin_lock_irq(lock);
+ bio_list_merge(ios, ios_in);
+ bio_list_init(ios_in);
+ spin_unlock_irq(lock);
+
+ /* Let's assume we're operational most of the time ;-). */
+ if (likely(raid_set_operational(rs))) {
+ /* If we got ios, work them into the cache. */
+ if (!bio_list_empty(ios)) {
+ do_ios(rs, ios);
+ do_unplug(rs); /* Unplug the sets device queues. */
+ }
+
+ do_flush(rs); /* Flush any stripes on io list. */
+ do_unplug(rs); /* Unplug the sets device queues. */
+ do_busy_event(rs); /* Check if we got too busy. */
+
+ /* More endios -> process. */
+ if (!stripe_endio_empty(&rs->sc)) {
+ atomic_inc(rs->stats + S_REDO);
+ goto redo;
+ }
+ } else
+ /* No way to reconstruct data with too many devices failed. */
+ bio_list_fail(rs, NULL, ios);
+}
+
+/*
+ * Callback for region hash to dispatch
+ * delayed bios queued to recovered regions
+ * (Gets called via rh_update_states()).
+ */
+static void dispatch_delayed_bios(void *context, struct bio_list *bl)
+{
+ struct raid_set *rs = context;
+ struct bio *bio;
+
+ /* REMOVEME: decrement pending delayed bios counter. */
+ bio_list_for_each(bio, bl)
+ atomic_dec(rs->stats + S_DELAYED_BIOS);
+
+ /* Merge region hash private list to work list. */
+ bio_list_merge_head(&rs->io.work, bl);
+ bio_list_init(bl);
+ ClearRSBandwidth(rs);
+}
+
+/*************************************************************
+ * Constructor helpers
+ *************************************************************/
+/* Calculate MB/sec. */
+static INLINE unsigned mbpers(struct raid_set *rs, unsigned speed)
+{
+ return to_bytes(speed * rs->set.data_devs *
+ rs->recover.io_size * HZ >> 10) >> 10;
+}
+
+/*
+ * Discover fastest xor algorithm and # of chunks combination.
+ */
+/* Calculate speed for algorithm and # of chunks. */
+static INLINE unsigned xor_speed(struct stripe *stripe)
+{
+ unsigned r = 0;
+ unsigned long j;
+
+ /* Wait for next tick. */
+ for (j = jiffies; j == jiffies;)
+ ;
+
+ /* Do xors for a full tick. */
+ for (j = jiffies; j == jiffies;) {
+ mb();
+ common_xor(stripe, stripe->io.size, 0, 0);
+ mb();
+ r++;
+ mb();
+ }
+
+ return r;
+}
+
+/* Optimize xor algorithm for this RAID set. */
+static unsigned xor_optimize(struct raid_set *rs)
+{
+ unsigned chunks_max = 2, speed_max = 0;
+ struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
+ struct stripe *stripe;
+
+ BUG_ON(list_empty(&rs->recover.stripes));
+ stripe = list_first_entry(&rs->recover.stripes, struct stripe,
+ lists[LIST_RECOVER]);
+
+ /*
+ * Got to allow io on all chunks, so that
+ * xor() will actually work on them.
+ */
+ stripe_allow_io(stripe);
+
+ /* Try all xor functions. */
+ while (f-- > xor_funcs) {
+ unsigned speed;
+
+ /* Set actual xor function for common_xor(). */
+ rs->xor.f = f;
+ rs->xor.chunks = XOR_CHUNKS_MAX + 1;
+
+ while (rs->xor.chunks-- > 2) {
+ speed = xor_speed(stripe);
+ if (speed > speed_max) {
+ speed_max = speed;
+ chunks_max = rs->xor.chunks;
+ f_max = f;
+ }
+ }
+ }
+
+ /* Memorize optimum parameters. */
+ rs->xor.f = f_max;
+ rs->xor.chunks = chunks_max;
+ return speed_max;
+}
+
+static inline int array_too_big(unsigned long fixed, unsigned long obj,
+ unsigned long num)
+{
+ return (num > (ULONG_MAX - fixed) / obj);
+}
+
+static void wakeup_all_recovery_waiters(void *context)
+{
+}
+
+/*
+ * Allocate a RAID context (a RAID set)
+ */
+static int
+context_alloc(struct raid_set **raid_set, struct raid_type *raid_type,
+ unsigned stripes, unsigned chunk_size, unsigned io_size,
+ unsigned recover_io_size, unsigned raid_devs,
+ sector_t sectors_per_dev,
+ struct dm_target *ti, unsigned dl_parms, char **argv)
+{
+ int r;
+ unsigned p;
+ size_t len;
+ sector_t region_size, ti_len;
+ struct raid_set *rs = NULL;
+ struct dm_dirty_log *dl;
+ struct recover *rec;
+
+ /*
+ * Create the dirty log
+ *
+ * We need to change length for the dirty log constructor,
+ * because we want an amount of regions for all stripes derived
+ * from the single device size, so that we can keep region
+ * size = 2^^n independant of the number of devices
+ */
+ ti_len = ti->len;
+ ti->len = sectors_per_dev;
+ dl = dm_dirty_log_create(argv[0], ti, dl_parms, argv + 2);
+ ti->len = ti_len;
+ if (!dl)
+ goto bad_dirty_log;
+
+ /* Chunk size *must* be smaller than region size. */
+ region_size = dl->type->get_region_size(dl);
+ if (chunk_size > region_size)
+ goto bad_chunk_size;
+
+ /* Recover io size *must* be smaller than region size as well. */
+ if (recover_io_size > region_size)
+ goto bad_recover_io_size;
+
+ /* Size and allocate the RAID set structure. */
+ len = sizeof(*rs->data) + sizeof(*rs->dev);
+ if (array_too_big(sizeof(*rs), len, raid_devs))
+ goto bad_array;
+
+ len = sizeof(*rs) + raid_devs * len;
+ rs = kzalloc(len, GFP_KERNEL);
+ if (!rs)
+ goto bad_alloc;
+
+ rec = &rs->recover;
+ atomic_set(&rs->io.in_process, 0);
+ atomic_set(&rs->io.in_process_max, 0);
+ rec->io_size = recover_io_size;
+
+ /* Pointer to data array. */
+ rs->data = (unsigned long **)
+ ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
+ rec->dl = dl;
+ rs->set.raid_devs = p = raid_devs;
+ rs->set.data_devs = raid_devs - raid_type->parity_devs;
+ rs->set.raid_type = raid_type;
+
+ /*
+ * Set chunk and io size and respective shifts
+ * (used to avoid divisions)
+ */
+ rs->set.chunk_size = chunk_size;
+ rs->set.chunk_mask = chunk_size - 1;
+ rs->set.chunk_shift = ffs(chunk_size) - 1;
+
+ rs->set.io_size = io_size;
+ rs->set.io_mask = io_size - 1;
+ rs->set.io_shift = ffs(io_size) - 1;
+ rs->set.io_shift_mask = rs->set.chunk_mask & ~rs->set.io_mask;
+
+ rs->set.pages_per_io = chunk_pages(io_size);
+ rs->set.sectors_per_dev = sectors_per_dev;
+
+ rs->set.ei = -1; /* Indicate no failed device. */
+ atomic_set(&rs->set.failed_devs, 0);
+
+ rs->ti = ti;
+
+ atomic_set(rec->io_count + IO_WORK, 0);
+ atomic_set(rec->io_count + IO_RECOVER, 0);
+
+ /* Initialize io lock and queues. */
+ spin_lock_init(&rs->io.in_lock);
+ bio_list_init(&rs->io.in);
+ bio_list_init(&rs->io.work);
+
+ init_waitqueue_head(&rs->io.suspendq); /* Suspend waiters (dm-io). */
+
+ rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
+
+ rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios, wake_do_raid,
+ wakeup_all_recovery_waiters,
+ rs->ti->begin, MAX_RECOVER, dl,
+ region_size, rs->recover.nr_regions);
+ if (IS_ERR(rec->rh))
+ goto bad_rh;
+
+ /* Initialize stripe cache. */
+ r = sc_init(rs, stripes);
+ if (r)
+ goto bad_sc;
+
+ /* Create dm-io client context. */
+ rs->sc.dm_io_client = dm_io_client_create(rs->set.raid_devs *
+ rs->set.pages_per_io);
+ if (IS_ERR(rs->sc.dm_io_client))
+ goto bad_dm_io_client;
+
+ /* REMOVEME: statistics. */
+ stats_reset(rs);
+ ClearRSDevelStats(rs); /* Disnable development status. */
+
+ *raid_set = rs;
+ return 0;
+
+bad_dirty_log:
+ TI_ERR_RET("Error creating dirty log", -ENOMEM);
+
+
+bad_chunk_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Chunk size larger than region size");
+
+bad_recover_io_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Recover stripe io size larger than region size");
+
+bad_array:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Arry too big");
+
+bad_alloc:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Cannot allocate raid context", -ENOMEM);
+
+bad_rh:
+ dm_dirty_log_destroy(dl);
+ ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
+ goto free_rs;
+
+bad_sc:
+ ti->error = DM_MSG_PREFIX "Error creating stripe cache";
+ goto free;
+
+bad_dm_io_client:
+ ti->error = DM_MSG_PREFIX "Error allocating dm-io resources";
+free:
+ dm_region_hash_destroy(rec->rh);
+ sc_exit(&rs->sc);
+ dm_region_hash_destroy(rec->rh); /* Destroys dirty log as well. */
+free_rs:
+ kfree(rs);
+ return -ENOMEM;
+}
+
+/* Free a RAID context (a RAID set). */
+static void
+context_free(struct raid_set *rs, struct dm_target *ti, unsigned r)
+{
+ while (r--)
+ dm_put_device(ti, rs->dev[r].dev);
+
+ dm_io_client_destroy(rs->sc.dm_io_client);
+ sc_exit(&rs->sc);
+ dm_region_hash_destroy(rs->recover.rh);
+ dm_dirty_log_destroy(rs->recover.dl);
+ kfree(rs);
+}
+
+/* Create work queue and initialize work. */
+static int rs_workqueue_init(struct raid_set *rs)
+{
+ struct dm_target *ti = rs->ti;
+
+ rs->io.wq = create_singlethread_workqueue(DAEMON);
+ if (!rs->io.wq)
+ TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
+
+ INIT_DELAYED_WORK(&rs->io.dws, do_raid);
+ return 0;
+}
+
+/* Return pointer to raid_type structure for raid name. */
+static struct raid_type *get_raid_type(char *name)
+{
+ struct raid_type *r = ARRAY_END(raid_types);
+
+ while (r-- > raid_types) {
+ if (!strnicmp(STR_LEN(r->name, name)))
+ return r;
+ }
+
+ return NULL;
+}
+
+/* FIXME: factor out to dm core. */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+ sector_t r = a;
+
+ sector_div(r, b);
+ *n = r;
+ return a == r * b;
+}
+
+/* Log RAID set information to kernel log. */
+static void raid_set_log(struct raid_set *rs, unsigned speed)
+{
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ for (p = 0; p < rs->set.raid_devs; p++)
+ DMINFO("/dev/%s is raid disk %u",
+ bdevname(rs->dev[p].dev->bdev, buf), p);
+
+ DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes",
+ rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
+ atomic_read(&rs->sc.stripes));
+ DMINFO("algorithm \"%s\", %u chunks with %uMB/s", rs->xor.f->name,
+ rs->xor.chunks, mbpers(rs, speed));
+ DMINFO("%s set with net %u/%u devices", rs->set.raid_type->descr,
+ rs->set.data_devs, rs->set.raid_devs);
+}
+
+/* Get all devices and offsets. */
+static int
+dev_parms(struct dm_target *ti, struct raid_set *rs,
+ char **argv, int *p)
+{
+ for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
+ int r;
+ unsigned long long tmp;
+ struct raid_dev *dev = rs->dev + *p;
+ union dev_lookup dl = {.dev = dev };
+
+ /* Get offset and device. */
+ r = sscanf(argv[1], "%llu", &tmp);
+ if (r != 1)
+ TI_ERR("Invalid RAID device offset parameter");
+
+ dev->start = tmp;
+ r = dm_get_device(ti, argv[0], dev->start,
+ rs->set.sectors_per_dev,
+ dm_table_get_mode(ti->table), &dev->dev);
+ if (r)
+ TI_ERR_RET("RAID device lookup failure", r);
+
+ r = raid_dev_lookup(rs, bynumber, &dl);
+ if (r != -ENODEV && r < *p) {
+ (*p)++; /* Ensure dm_put_device() on actual device. */
+ TI_ERR_RET("Duplicate RAID device", -ENXIO);
+ }
+ }
+
+ return 0;
+}
+
+/* Set recovery bandwidth. */
+static INLINE void
+recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
+{
+ rs->recover.bandwidth = bandwidth;
+ rs->recover.bandwidth_work = 100 / bandwidth;
+}
+
+/* Handle variable number of RAID parameters. */
+static int
+raid_variable_parms(struct dm_target *ti, char **argv,
+ unsigned i, int *raid_parms,
+ int *chunk_size, int *chunk_size_parm,
+ int *stripes, int *stripes_parm,
+ int *io_size, int *io_size_parm,
+ int *recover_io_size, int *recover_io_size_parm,
+ int *bandwidth, int *bandwidth_parm)
+{
+ /* Fetch # of variable raid parameters. */
+ if (sscanf(argv[i++], "%d", raid_parms) != 1 ||
+ !range_ok(*raid_parms, 0, 5))
+ TI_ERR("Bad variable raid parameters number");
+
+ if (*raid_parms) {
+ /*
+ * If we've got variable RAID parameters,
+ * chunk size is the first one
+ */
+ if (sscanf(argv[i++], "%d", chunk_size) != 1 ||
+ (*chunk_size != -1 &&
+ (!POWER_OF_2(*chunk_size) ||
+ !range_ok(*chunk_size, IO_SIZE_MIN, CHUNK_SIZE_MAX))))
+ TI_ERR("Invalid chunk size; must be 2^^n and <= 16384");
+
+ *chunk_size_parm = *chunk_size;
+ if (*chunk_size == -1)
+ *chunk_size = CHUNK_SIZE;
+
+ /*
+ * In case we've got 2 or more variable raid
+ * parameters, the number of stripes is the second one
+ */
+ if (*raid_parms > 1) {
+ if (sscanf(argv[i++], "%d", stripes) != 1 ||
+ (*stripes != -1 &&
+ !range_ok(*stripes, STRIPES_MIN,
+ STRIPES_MAX)))
+ TI_ERR("Invalid number of stripes: must "
+ "be >= 8 and <= 8192");
+ }
+
+ *stripes_parm = *stripes;
+ if (*stripes == -1)
+ *stripes = STRIPES;
+
+ /*
+ * In case we've got 3 or more variable raid
+ * parameters, the io size is the third one.
+ */
+ if (*raid_parms > 2) {
+ if (sscanf(argv[i++], "%d", io_size) != 1 ||
+ (*io_size != -1 &&
+ (!POWER_OF_2(*io_size) ||
+ !range_ok(*io_size, IO_SIZE_MIN,
+ min(BIO_MAX_SECTORS / 2,
+ *chunk_size)))))
+ TI_ERR("Invalid io size; must "
+ "be 2^^n and less equal "
+ "min(BIO_MAX_SECTORS/2, chunk size)");
+ } else
+ *io_size = *chunk_size;
+
+ *io_size_parm = *io_size;
+ if (*io_size == -1)
+ *io_size = *chunk_size;
+
+ /*
+ * In case we've got 4 variable raid parameters,
+ * the recovery stripe io_size is the fourth one
+ */
+ if (*raid_parms > 3) {
+ if (sscanf(argv[i++], "%d", recover_io_size) != 1 ||
+ (*recover_io_size != -1 &&
+ (!POWER_OF_2(*recover_io_size) ||
+ !range_ok(*recover_io_size, RECOVER_IO_SIZE_MIN,
+ BIO_MAX_SECTORS / 2))))
+ TI_ERR("Invalid recovery io size; must be "
+ "2^^n and less equal BIO_MAX_SECTORS/2");
+ }
+
+ *recover_io_size_parm = *recover_io_size;
+ if (*recover_io_size == -1)
+ *recover_io_size = RECOVER_IO_SIZE;
+
+ /*
+ * In case we've got 5 variable raid parameters,
+ * the recovery io bandwidth is the fifth one
+ */
+ if (*raid_parms > 4) {
+ if (sscanf(argv[i++], "%d", bandwidth) != 1 ||
+ (*bandwidth != -1 &&
+ !range_ok(*bandwidth, BANDWIDTH_MIN,
+ BANDWIDTH_MAX)))
+ TI_ERR("Invalid recovery bandwidth "
+ "percentage; must be > 0 and <= 100");
+ }
+
+ *bandwidth_parm = *bandwidth;
+ if (*bandwidth == -1)
+ *bandwidth = BANDWIDTH;
+ }
+
+ return 0;
+}
+
+/* Parse optional locking parameters. */
+static int
+raid_locking_parms(struct dm_target *ti, char **argv,
+ unsigned i, int *locking_parms,
+ struct dm_raid45_locking_type **locking_type)
+{
+ *locking_parms = 0;
+ *locking_type = &locking_none;
+
+ if (!strnicmp(argv[i], "none", strlen(argv[i])))
+ *locking_parms = 1;
+ else if (!strnicmp(argv[i + 1], "locking", strlen(argv[i + 1]))) {
+ *locking_type = &locking_none;
+ *locking_parms = 2;
+ } else if (!strnicmp(argv[i + 1], "cluster", strlen(argv[i + 1]))) {
+ *locking_type = &locking_cluster;
+ /* FIXME: namespace. */
+ *locking_parms = 3;
+ }
+
+ return *locking_parms == 1 ? -EINVAL : 0;
+}
+
+/* Set backing device information properties of RAID set. */
+static void rs_set_bdi(struct raid_set *rs, unsigned stripes, unsigned chunks)
+{
+ unsigned p, ra_pages;
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+
+ /* Set read-ahead for the RAID set and the component devices. */
+ bdi->ra_pages = stripes * stripe_pages(rs, rs->set.io_size);
+ ra_pages = chunks * chunk_pages(rs->set.io_size);
+ for (p = rs->set.raid_devs; p--; ) {
+ struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+
+ q->backing_dev_info.ra_pages = ra_pages;
+ }
+
+ /* Set congested function and data. */
+ bdi->congested_fn = raid_set_congested;
+ bdi->congested_data = rs;
+
+ dm_put(md);
+}
+
+/* Get backing device information properties of RAID set. */
+static void rs_get_ra(struct raid_set *rs, unsigned *stripes, unsigned *chunks)
+{
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+
+ *stripes = dm_disk(md)->queue->backing_dev_info.ra_pages
+ / stripe_pages(rs, rs->set.io_size);
+ *chunks = bdev_get_queue(rs->dev->dev->bdev)->backing_dev_info.ra_pages
+ / chunk_pages(rs->set.io_size);
+
+ dm_put(md);
+}
+
+/*
+ * Construct a RAID4/5 mapping:
+ *
+ * log_type #log_params <log_params> \
+ * raid_type [#parity_dev] #raid_variable_params <raid_params> \
+ * [locking "none"/"cluster"]
+ * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
+ *
+ * log_type = "core"/"disk",
+ * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
+ * log_params = [dirty_log_path] region_size [[no]sync])
+ *
+ * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
+ *
+ * #parity_dev = N if raid_type = "raid4"
+ * o N = -1: pick default = last device
+ * o N >= 0 and < #raid_devs: parity device index
+ *
+ * #raid_variable_params = 0-5; raid_params (-1 = default):
+ * [chunk_size [#stripes [io_size [recover_io_size [%recovery_bandwidth]]]]]
+ * o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
+ * and <= CHUNK_SIZE_MAX)
+ * o #stripes is number of stripes allocated to stripe cache
+ * (must be > 1 and < STRIPES_MAX)
+ * o io_size (io unit size per device in sectors; must be 2^^n and > 8)
+ * o recover_io_size (io unit size per device for recovery in sectors;
+ must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
+ * o %recovery_bandwith is the maximum amount spend for recovery during
+ * application io (1-100%)
+ * If raid_variable_params = 0, defaults will be used.
+ * Any raid_variable_param can be set to -1 to apply a default
+ *
+ * #raid_devs = N (N >= 3)
+ *
+ * #dev_to_initialize = N
+ * -1: initialize parity on all devices
+ * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
+ * of a failed devices content after replacement
+ *
+ * <dev_path> = device_path (eg, /dev/sdd1)
+ * <offset> = begin at offset on <dev_path>
+ *
+ */
+#define MIN_PARMS 13
+static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ int bandwidth = BANDWIDTH, bandwidth_parm = -1,
+ chunk_size = CHUNK_SIZE, chunk_size_parm = -1,
+ dev_to_init, dl_parms, locking_parms, parity_parm, pi = -1,
+ i, io_size = IO_SIZE, io_size_parm = -1,
+ r, raid_devs, raid_parms,
+ recover_io_size = RECOVER_IO_SIZE, recover_io_size_parm = -1,
+ stripes = STRIPES, stripes_parm = -1;
+ unsigned speed;
+ sector_t tmp, sectors_per_dev;
+ struct dm_raid45_locking_type *locking;
+ struct raid_set *rs;
+ struct raid_type *raid_type;
+
+ /* Ensure minimum number of parameters. */
+ if (argc < MIN_PARMS)
+ TI_ERR("Not enough parameters");
+
+ /* Fetch # of dirty log parameters. */
+ if (sscanf(argv[1], "%d", &dl_parms) != 1
+ || !range_ok(dl_parms, 1, 4711))
+ TI_ERR("Bad dirty log parameters number");
+
+ /* Check raid_type. */
+ raid_type = get_raid_type(argv[dl_parms + 2]);
+ if (!raid_type)
+ TI_ERR("Bad raid type");
+
+ /* In case of RAID4, parity drive is selectable. */
+ parity_parm = !!(raid_type->level == raid4);
+
+ /* Handle variable number of RAID parameters. */
+ r = raid_variable_parms(ti, argv, dl_parms + parity_parm + 3,
+ &raid_parms,
+ &chunk_size, &chunk_size_parm,
+ &stripes, &stripes_parm,
+ &io_size, &io_size_parm,
+ &recover_io_size, &recover_io_size_parm,
+ &bandwidth, &bandwidth_parm);
+ if (r)
+ return r;
+
+ r = raid_locking_parms(ti, argv,
+ dl_parms + parity_parm + raid_parms + 4,
+ &locking_parms, &locking);
+ if (r)
+ return r;
+
+ /* # of raid devices. */
+ i = dl_parms + parity_parm + raid_parms + locking_parms + 4;
+ if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
+ raid_devs < raid_type->minimal_devs)
+ TI_ERR("Invalid number of raid devices");
+
+ /* In case of RAID4, check parity drive index is in limits. */
+ if (raid_type->level == raid4) {
+ /* Fetch index of parity device. */
+ if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
+ !range_ok(pi, 0, raid_devs - 1))
+ TI_ERR("Invalid RAID4 parity device index");
+ }
+
+ /*
+ * Index of device to initialize starts at 0
+ *
+ * o -1 -> don't initialize a particular device,
+ * o 0..raid_devs-1 -> initialize respective device
+ * (used for reconstruction of a replaced device)
+ */
+ if (sscanf
+ (argv[dl_parms + parity_parm + raid_parms + locking_parms + 5],
+ "%d", &dev_to_init) != 1
+ || !range_ok(dev_to_init, -1, raid_devs - 1))
+ TI_ERR("Invalid number for raid device to initialize");
+
+ /* Check # of raid device arguments. */
+ if (argc - dl_parms - parity_parm - raid_parms - 6 !=
+ 2 * raid_devs)
+ TI_ERR("Wrong number of raid device/offset arguments");
+
+ /*
+ * Check that the table length is devisable
+ * w/o rest by (raid_devs - parity_devs)
+ */
+ if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
+ §ors_per_dev))
+ TI_ERR
+ ("Target length not divisable by number of data devices");
+
+ /*
+ * Check that the device size is
+ * devisable w/o rest by chunk size
+ */
+ if (!multiple(sectors_per_dev, chunk_size, &tmp))
+ TI_ERR("Device length not divisable by chunk_size");
+
+ /****************************************************************
+ * Now that we checked the constructor arguments ->
+ * let's allocate the RAID set
+ ****************************************************************/
+ r = context_alloc(&rs, raid_type, stripes, chunk_size, io_size,
+ recover_io_size, raid_devs, sectors_per_dev,
+ ti, dl_parms, argv);
+ if (r)
+ return r;
+
+ /*
+ * Set these here in order to avoid passing
+ * too many arguments to context_alloc()
+ */
+ rs->set.dev_to_init_parm = dev_to_init;
+ rs->set.dev_to_init = dev_to_init;
+ rs->set.pi_parm = pi;
+ rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
+ rs->set.raid_parms = raid_parms;
+ rs->set.chunk_size_parm = chunk_size_parm;
+ rs->set.io_size_parm = io_size_parm;
+ rs->sc.stripes_parm = stripes_parm;
+ rs->recover.io_size_parm = recover_io_size_parm;
+ rs->recover.bandwidth_parm = bandwidth_parm;
+ recover_set_bandwidth(rs, bandwidth);
+
+ /* Use locking type to lock stripe access. */
+ rs->locking = locking;
+
+ /* Get the device/offset tupels. */
+ argv += dl_parms + 6 + parity_parm + raid_parms;
+ r = dev_parms(ti, rs, argv, &i);
+ if (r)
+ goto err;
+
+ /* Initialize recovery. */
+ rs->recover.start_jiffies = jiffies;
+ rs->recover.end_jiffies = 0;
+ recovery_region_reset(rs);
+
+ /* Allow for recovery of any nosync regions. */
+ SetRSRecover(rs);
+
+ /* Set backing device information (eg. read ahead). */
+ rs_set_bdi(rs, chunk_size * 2, io_size * 4);
+ SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
+
+ speed = xor_optimize(rs); /* Select best xor algorithm. */
+
+ /* Initialize work queue to handle this RAID set's io. */
+ r = rs_workqueue_init(rs);
+ if (r)
+ goto err;
+
+ raid_set_log(rs, speed); /* Log information about RAID set. */
+
+ /*
+ * Make sure that dm core only hands maximum io size
+ * length down and pays attention to io boundaries.
+ */
+ ti->split_io = rs->set.io_size;
+ ti->private = rs;
+ return 0;
+
+err:
+ context_free(rs, ti, i);
+ return r;
+}
+
+/*
+ * Destruct a raid mapping
+ */
+static void raid_dtr(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+
+ /* Indicate recovery end so that ios in flight drain. */
+ ClearRSRecover(rs);
+
+ wake_do_raid(rs); /* Wake daemon. */
+ wait_ios(rs); /* Wait for any io still being processed. */
+ destroy_workqueue(rs->io.wq);
+ context_free(rs, ti, rs->set.raid_devs);
+}
+
+/* Queues ios to RAID sets. */
+static inline void queue_bio(struct raid_set *rs, struct bio *bio)
+{
+ int wake;
+ struct bio_list *in = &rs->io.in;
+ spinlock_t *in_lock = &rs->io.in_lock;
+
+ spin_lock_irq(in_lock);
+ wake = bio_list_empty(in);
+ bio_list_add(in, bio);
+ spin_unlock_irq(in_lock);
+
+ /* Wake daemon if input list was empty. */
+ if (wake)
+ wake_do_raid(rs);
+}
+
+/* Raid mapping function. */
+static int raid_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ /* I don't want to waste stripe cache capacity. */
+ if (bio_rw(bio) == READA)
+ return -EIO;
+ else {
+ struct raid_set *rs = ti->private;
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats +
+ (bio_data_dir(bio) == WRITE ?
+ S_BIOS_WRITE : S_BIOS_READ));
+
+ /*
+ * Get io reference to be waiting for to drop
+ * to zero on device suspension/destruction.
+ */
+ io_get(rs);
+ bio->bi_sector -= ti->begin; /* Remap sector. */
+ queue_bio(rs, bio); /* Queue to the daemon. */
+ return DM_MAPIO_SUBMITTED; /* Handle later. */
+ }
+}
+
+/* Device suspend. */
+static void raid_postsuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct dm_dirty_log *dl = rs->recover.dl;
+
+ SetRSSuspended(rs);
+
+ if (RSRecover(rs))
+ dm_rh_stop_recovery(rs->recover.rh); /* Wakes do_raid(). */
+ else
+ wake_do_raid(rs);
+
+ wait_ios(rs); /* Wait for completion of all ios being processed. */
+ if (dl->type->postsuspend && dl->type->postsuspend(dl))
+ /* Suspend dirty log. */
+ /* FIXME: need better error handling. */
+ DMWARN("log suspend failed");
+}
+
+/* Device resume. */
+static void raid_resume(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct dm_dirty_log *dl = rec->dl;
+
+ if (dl->type->resume && dl->type->resume(dl))
+ /* Resume dirty log. */
+ /* FIXME: need better error handling. */
+ DMWARN("log resume failed");
+
+ rec->nr_regions_to_recover =
+ rec->nr_regions - dl->type->get_sync_count(dl);
+
+ ClearRSSuspended(rs);
+
+ /* Reset any unfinished recovery. */
+ if (RSRecover(rs)) {
+ recovery_region_reset(rs);
+ dm_rh_start_recovery(rec->rh);/* Calls wake_do_raid(). */
+ } else
+ wake_do_raid(rs);
+}
+
+static INLINE unsigned sc_size(struct raid_set *rs)
+{
+ return to_sector(atomic_read(&rs->sc.stripes) *
+ (sizeof(struct stripe) +
+ (sizeof(struct stripe_set) +
+ (sizeof(struct page_list) +
+ to_bytes(rs->set.io_size) *
+ rs->set.raid_devs)) +
+ (rs->recover.
+ end_jiffies ? 0 : to_bytes(rs->set.raid_devs *
+ rs->recover.
+ io_size))));
+}
+
+/* REMOVEME: status output for development. */
+static void
+raid_devel_stats(struct dm_target *ti, char *result,
+ unsigned *size, unsigned maxlen)
+{
+ unsigned chunks, stripes, sz = *size;
+ unsigned long j;
+ char buf[BDEVNAME_SIZE], *p;
+ struct stats_map *sm, *sm_end = ARRAY_END(stats_map);
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct timespec ts;
+
+ DMEMIT("%s ", version);
+ DMEMIT("io_inprocess=%d ", atomic_read(&rs->io.in_process));
+ DMEMIT("io_inprocess_max=%d ", atomic_read(&rs->io.in_process_max));
+
+ for (sm = stats_map; sm < sm_end; sm++)
+ DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
+
+ DMEMIT(" overwrite=%s ", RSCheckOverwrite(rs) ? "on" : "off");
+ DMEMIT("sc=%u/%u/%u/%u/%u ", rs->set.chunk_size, rs->set.io_size,
+ atomic_read(&rs->sc.stripes), rs->sc.hash.buckets,
+ sc_size(rs));
+
+ j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
+ rec->start_jiffies;
+ jiffies_to_timespec(j, &ts);
+ sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
+ p = strchr(buf, '.');
+ p[3] = 0;
+
+ DMEMIT("rg=%llu%s/%llu/%llu/%u %s ",
+ (unsigned long long) rec->nr_regions_recovered,
+ RSRegionGet(rs) ? "+" : "",
+ (unsigned long long) rec->nr_regions_to_recover,
+ (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
+
+ rs_get_ra(rs, &stripes, &chunks);
+ DMEMIT("ra=%u/%u ", stripes, chunks);
+
+ *size = sz;
+}
+
+static int
+raid_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ unsigned i, sz = 0;
+ char buf[BDEVNAME_SIZE];
+ struct raid_set *rs = ti->private;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ /* REMOVEME: statistics. */
+ if (RSDevelStats(rs))
+ raid_devel_stats(ti, result, &sz, maxlen);
+
+ DMEMIT("%u ", rs->set.raid_devs);
+
+ for (i = 0; i < rs->set.raid_devs; i++)
+ DMEMIT("%s ",
+ format_dev_t(buf, rs->dev[i].dev->bdev->bd_dev));
+
+ DMEMIT("1 ");
+ for (i = 0; i < rs->set.raid_devs; i++) {
+ DMEMIT("%c", dev_operational(rs, i) ? 'A' : 'D');
+
+ if (rs->set.raid_type->level == raid4 &&
+ i == rs->set.pi)
+ DMEMIT("p");
+
+ if (rs->set.dev_to_init == i)
+ DMEMIT("i");
+ }
+
+ break;
+
+ case STATUSTYPE_TABLE:
+ sz = rs->recover.dl->type->status(rs->recover.dl, type,
+ result, maxlen);
+ DMEMIT("%s %u ", rs->set.raid_type->name,
+ rs->set.raid_parms);
+
+ if (rs->set.raid_type->level == raid4)
+ DMEMIT("%d ", rs->set.pi_parm);
+
+ if (rs->set.raid_parms)
+ DMEMIT("%d ", rs->set.chunk_size_parm);
+
+ if (rs->set.raid_parms > 1)
+ DMEMIT("%d ", rs->sc.stripes_parm);
+
+ if (rs->set.raid_parms > 2)
+ DMEMIT("%d ", rs->set.io_size_parm);
+
+ if (rs->set.raid_parms > 3)
+ DMEMIT("%d ", rs->recover.io_size_parm);
+
+ if (rs->set.raid_parms > 4)
+ DMEMIT("%d ", rs->recover.bandwidth_parm);
+
+ DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
+
+ for (i = 0; i < rs->set.raid_devs; i++)
+ DMEMIT("%s %llu ",
+ format_dev_t(buf,
+ rs->dev[i].dev->bdev->bd_dev),
+ (unsigned long long) rs->dev[i].start);
+ }
+
+ return 0;
+}
+
+/*
+ * Message interface
+ */
+enum raid_msg_actions {
+ act_bw, /* Recovery bandwidth switch. */
+ act_dev, /* Device failure switch. */
+ act_overwrite, /* Stripe overwrite check. */
+ act_read_ahead, /* Set read ahead. */
+ act_stats, /* Development statistics switch. */
+ act_sc, /* Stripe cache switch. */
+
+ act_on, /* Set entity on. */
+ act_off, /* Set entity off. */
+ act_reset, /* Reset entity. */
+
+ act_set = act_on, /* Set # absolute. */
+ act_grow = act_off, /* Grow # by an amount. */
+ act_shrink = act_reset, /* Shrink # by an amount. */
+};
+
+/* Turn a delta to absolute. */
+static int _absolute(unsigned long action, int act, int r)
+{
+ /* Make delta absolute. */
+ if (test_bit(act_set, &action))
+ ;
+ else if (test_bit(act_grow, &action))
+ r += act;
+ else if (test_bit(act_shrink, &action))
+ r = act - r;
+ else
+ r = -EINVAL;
+
+ return r;
+}
+
+ /* Change recovery io bandwidth. */
+static int bandwidth_change(struct dm_msg *msg, void *context)
+{
+ struct raid_set *rs = context;
+ int act = rs->recover.bandwidth;
+ int bandwidth = DM_MSG_INT_ARG(msg);
+
+ if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ /* Make delta bandwidth absolute. */
+ bandwidth = _absolute(msg->action, act, bandwidth);
+
+ /* Check range. */
+ if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ recover_set_bandwidth(rs, bandwidth);
+ return 0;
+ }
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Change state of a device (running/offline). */
+/* FIXME: this only works while recovering!. */
+static int device_state(struct dm_msg *msg, void *context)
+{
+ int r;
+ const char *str = "is already ";
+ union dev_lookup dl = { .dev_name = DM_MSG_STR_ARG(msg) };
+ struct raid_set *rs = context;
+
+ r = raid_dev_lookup(rs, strchr(dl.dev_name, ':') ?
+ bymajmin : byname, &dl);
+ if (r == -ENODEV) {
+ DMERR("device %s is no member of this set", dl.dev_name);
+ return r;
+ }
+
+ if (test_bit(act_off, &msg->action)) {
+ if (dev_operational(rs, r))
+ str = "";
+ } else if (!dev_operational(rs, r))
+ str = "";
+
+ DMINFO("/dev/%s %s%s", dl.dev_name, str,
+ test_bit(act_off, &msg->action) ? "offline" : "running");
+
+ return test_bit(act_off, &msg->action) ?
+ raid_set_check_and_degrade(rs, NULL, r) :
+ raid_set_check_and_upgrade(rs, r);
+}
+
+/* Set/reset development feature flags. */
+static int devel_flags(struct dm_msg *msg, void *context)
+{
+ struct raid_set *rs = context;
+
+ if (test_bit(act_on, &msg->action))
+ return test_and_set_bit(msg->spec->parm,
+ &rs->io.flags) ? -EPERM : 0;
+ else if (test_bit(act_off, &msg->action))
+ return test_and_clear_bit(msg->spec->parm,
+ &rs->io.flags) ? 0 : -EPERM;
+ else if (test_bit(act_reset, &msg->action)) {
+ if (test_bit(act_stats, &msg->action)) {
+ stats_reset(rs);
+ goto on;
+ } else if (test_bit(act_overwrite, &msg->action)) {
+on:
+ set_bit(msg->spec->parm, &rs->io.flags);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+ /* Set stripe and chunk read ahead pages. */
+static int read_ahead_set(struct dm_msg *msg, void *context)
+{
+ int stripes = DM_MSG_INT_ARGS(msg, 0);
+ int chunks = DM_MSG_INT_ARGS(msg, 1);
+
+ if (range_ok(stripes, 1, 512) &&
+ range_ok(chunks, 1, 512)) {
+ rs_set_bdi(context, stripes, chunks);
+ return 0;
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Resize the stripe cache. */
+static int stripecache_resize(struct dm_msg *msg, void *context)
+{
+ int act, stripes;
+ struct raid_set *rs = context;
+
+ /* Deny permission in case the daemon is still shrinking!. */
+ if (atomic_read(&rs->sc.stripes_to_shrink))
+ return -EPERM;
+
+ stripes = DM_MSG_INT_ARG(msg);
+ if (stripes > 0) {
+ act = atomic_read(&rs->sc.stripes);
+
+ /* Make delta stripes absolute. */
+ stripes = _absolute(msg->action, act, stripes);
+
+ /*
+ * Check range and that the # of stripes changes.
+ * We can grow from gere but need to leave any
+ * shrinking to the worker for synchronization.
+ */
+ if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX)) {
+ if (stripes > act)
+ return sc_grow(&rs->sc, stripes - act, SC_GROW);
+ else if (stripes < act) {
+ atomic_set(&rs->sc.stripes_to_shrink,
+ act - stripes);
+ wake_do_raid(rs);
+ }
+
+ return 0;
+ }
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Parse the RAID message action. */
+/*
+ * 'ba[ndwidth] {se[t],g[row],sh[rink]} #' # e.g 'ba se 50'
+ * 'de{vice] o[ffline]/r[unning] DevName/maj:min' # e.g 'device o /dev/sda'
+ * "o[verwrite] {on,of[f],r[eset]}' # e.g. 'o of'
+ * "r[ead_ahead] set #stripes #chunks # e.g. 'r se 3 2'
+ * 'sta[tistics] {on,of[f],r[eset]}' # e.g. 'stat of'
+ * 'str[ipecache] {se[t],g[row],sh[rink]} #' # e.g. 'stripe set 1024'
+ *
+ */
+static int
+raid_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+ /* Variables to store the parsed parameters im. */
+ static int i[2];
+ static unsigned long *i_arg[] = {
+ (unsigned long *) i + 0,
+ (unsigned long *) i + 1,
+ };
+ static char *p;
+ static unsigned long *p_arg[] = { (unsigned long *) &p };
+
+ /* Declare all message option strings. */
+ static char *str_sgs[] = { "set", "grow", "shrink" };
+ static char *str_dev[] = { "running", "offline" };
+ static char *str_oor[] = { "on", "off", "reset" };
+
+ /* Declare all actions. */
+ static unsigned long act_sgs[] = { act_set, act_grow, act_shrink };
+ static unsigned long act_oor[] = { act_on, act_off, act_reset };
+
+ /* Bandwidth option. */
+ static struct dm_message_option bw_opt = { 3, str_sgs, act_sgs };
+ static struct dm_message_argument bw_args = {
+ 1, i_arg, { dm_msg_int_t }
+ };
+
+ /* Device option. */
+ static struct dm_message_option dev_opt = { 2, str_dev, act_oor };
+ static struct dm_message_argument dev_args = {
+ 1, p_arg, { dm_msg_base_t }
+ };
+
+ /* Read ahead option. */
+ static struct dm_message_option ra_opt = { 1, str_sgs, act_sgs };
+ static struct dm_message_argument ra_args = {
+ 2, i_arg, { dm_msg_int_t, dm_msg_int_t }
+ };
+
+ static struct dm_message_argument null_args = {
+ 0, NULL, { dm_msg_int_t }
+ };
+
+ /* Overwrite and statistics option. */
+ static struct dm_message_option ovr_stats_opt = { 3, str_oor, act_oor };
+
+ /* Sripecache option. */
+ static struct dm_message_option stripe_opt = { 3, str_sgs, act_sgs };
+
+ /* Declare messages. */
+ static struct dm_msg_spec specs[] = {
+ { "bandwidth", act_bw, &bw_opt, &bw_args,
+ 0, bandwidth_change },
+ { "device", act_dev, &dev_opt, &dev_args,
+ 0, device_state },
+ { "overwrite", act_overwrite, &ovr_stats_opt, &null_args,
+ RS_CHECK_OVERWRITE, devel_flags },
+ { "read_ahead", act_read_ahead, &ra_opt, &ra_args,
+ 0, read_ahead_set },
+ { "statistics", act_stats, &ovr_stats_opt, &null_args,
+ RS_DEVEL_STATS, devel_flags },
+ { "stripecache", act_sc, &stripe_opt, &bw_args,
+ 0, stripecache_resize },
+ };
+
+ /* The message for the parser. */
+ struct dm_msg msg = {
+ .num_specs = ARRAY_SIZE(specs),
+ .specs = specs,
+ };
+
+ return dm_message_parse(TARGET, &msg, ti->private, argc, argv);
+}
+/*
+ * END message interface
+ */
+
+static struct target_type raid_target = {
+ .name = "raid45",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = raid_ctr,
+ .dtr = raid_dtr,
+ .map = raid_map,
+ .postsuspend = raid_postsuspend,
+ .resume = raid_resume,
+ .status = raid_status,
+ .message = raid_message,
+};
+
+static void init_exit(const char *bad_msg, const char *good_msg, int r)
+{
+ if (r)
+ DMERR("Failed to %sregister target [%d]", bad_msg, r);
+ else
+ DMINFO("%s %s", good_msg, version);
+}
+
+static int __init dm_raid_init(void)
+{
+ int r;
+
+ r = dm_register_target(&raid_target);
+ init_exit("", "initialized", r);
+ return r;
+}
+
+static void __exit dm_raid_exit(void)
+{
+ dm_unregister_target(&raid_target);
+ init_exit("un", "exit", 0);
+}
+
+/* Module hooks. */
+module_init(dm_raid_init);
+module_exit(dm_raid_exit);
+
+MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
+MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+MODULE_LICENSE("GPL");
__u8 *data;
int ret;
- if ((video->dev->quirks & UVC_QUIRK_HUE_EPIPE) &&
++ if ((chain->dev->quirks & UVC_QUIRK_HUE_EPIPE) &&
+ (v4l2_ctrl->id == V4L2_CID_HUE))
+ return -EINVAL;
+
- ctrl = uvc_find_control(video, v4l2_ctrl->id, &mapping);
+ ctrl = uvc_find_control(chain, v4l2_ctrl->id, &mapping);
if (ctrl == NULL)
return -EINVAL;
#define UVC_QUIRK_STREAM_NO_FID 0x00000010
#define UVC_QUIRK_IGNORE_SELECTOR_UNIT 0x00000020
#define UVC_QUIRK_FIX_BANDWIDTH 0x00000080
- #define UVC_QUIRK_HUE_EPIPE 0x00000100
+ #define UVC_QUIRK_PROBE_DEF 0x00000100
++#define UVC_QUIRK_HUE_EPIPE 0x00000200
/* Format flags */
#define UVC_FMT_FLAG_COMPRESSED 0x00000001
#include "suncore.h"
#endif
- #include <linux/kdb.h>
+#ifdef CONFIG_KDB
++#include <linux/kdb.h>
+/*
+ * kdb_serial_line records the serial line number of the first serial console.
+ * NOTE: The kernel ignores characters on the serial line unless a user space
+ * program has opened the line first. To enter kdb before user space has opened
+ * the serial line, you can use the 'kdb=early' flag to lilo and set the
+ * appropriate breakpoints.
+ */
+
+static int kdb_serial_line = -1;
+static const char *kdb_serial_ptr = kdb_serial_str;
++#else
++#define KDB_8250() 0
+#endif /* CONFIG_KDB */
+
/*
* Configuration:
* share_irqs - whether we pass IRQF_SHARED to request_irq(). This option
ch.ChanNo = 1;
hal_set_current_channel(&priv->sHwData, ch);
-- hal_set_beacon_period(&priv->sHwData, conf->beacon_int);
hal_set_accept_broadcast(&priv->sHwData, 1);
hal_set_accept_promiscuous(&priv->sHwData, 1);
hal_set_accept_multicast(&priv->sHwData, 1);
#include <asm/unaligned.h>
#include <linux/platform_device.h>
#include <linux/workqueue.h>
++#ifdef CONFIG_KDB_USB
++#include <linux/kdb.h>
++#endif
#include <linux/usb.h>
}
EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown);
+#ifdef CONFIG_KDB_USB
+void *
+usb_hcd_get_kdb_poll_func(struct usb_device *udev)
+{
- struct usb_hcd *hcd = bus_to_hcd(udev->bus);
++ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+
+ if (hcd && hcd->driver)
+ return (void *)(hcd->driver->kdb_poll_char);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL (usb_hcd_get_kdb_poll_func);
++
++void *
++usb_hcd_get_kdb_completion_func(struct usb_device *udev)
++{
++ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
++
++ if (hcd && hcd->driver)
++ return (void *)(hcd->driver->kdb_completion);
++
++ return NULL;
++}
++EXPORT_SYMBOL_GPL (usb_hcd_get_kdb_completion_func);
++
++int
++usb_hcd_check_uhci(struct usb_device *udev)
++{
++ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
++
++ if (hcd && hcd->driver){
++ if (!(strcmp(hcd->driver->description, "uhci_hcd")))
++ return 1;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL (usb_hcd_check_uhci);
++
++kdb_hc_keyboard_attach_t
++usb_hcd_get_hc_keyboard_attach(struct usb_device *udev)
++{
++ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
++
++ if (hcd && hcd->driver){
++ return hcd->driver->kdb_hc_keyboard_attach;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL (usb_hcd_get_hc_keyboard_attach);
++
++kdb_hc_keyboard_detach_t
++usb_hcd_get_hc_keyboard_detach(struct usb_device *udev)
++{
++ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
++
++ if (hcd && hcd->driver){
++ return hcd->driver->kdb_hc_keyboard_detach;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL (usb_hcd_get_hc_keyboard_detach);
++
++
+#endif /* CONFIG_KDB_USB */
+
/*-------------------------------------------------------------------------*/
#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
#ifdef __KERNEL__
#include <linux/rwsem.h>
++#ifdef CONFIG_KDB_USB
++#include <linux/kdb.h>
++#endif
#define MAX_TOPO_LEVEL 6
void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
/* Returns the hardware-chosen device address */
int (*address_device)(struct usb_hcd *, struct usb_device *udev);
+ /* Notifies the HCD after a hub descriptor is fetched.
+ * Will block.
+ */
+ int (*update_hub_device)(struct usb_hcd *, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags);
++
+#ifdef CONFIG_KDB_USB
+ /* KDB poll function for this HC */
+ int (*kdb_poll_char)(struct urb *urb);
++ void (*kdb_completion)(struct urb *urb);
++ kdb_hc_keyboard_attach_t kdb_hc_keyboard_attach;
++ kdb_hc_keyboard_detach_t kdb_hc_keyboard_detach;
+#endif /* CONFIG_KDB_USB */
};
extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
.port_handed_over = ehci_port_handed_over,
.clear_tt_buffer_complete = ehci_clear_tt_buffer_complete,
+
+#ifdef CONFIG_KDB_USB
- .kdb_poll_char = ehci_kdb_poll_char,
++ .kdb_poll_char = ehci_kdb_poll_char,
+#endif
};
/*-------------------------------------------------------------------------*/
#include "uhci-hcd.h"
#include "pci-quirks.h"
++#ifdef CONFIG_KDB_USB
++#include <linux/kdb.h>
++#include <linux/kdbprivate.h>
++#endif
++
/*
* Version Information
*/
return IRQ_HANDLED;
}
++#ifdef CONFIG_KDB_USB
++/* Unlink KDB QH from hardware and software scheduler */
++static void kdb_unlink_uhci_qh(struct urb *urb, struct uhci_qh *qh)
++{
++ unsigned long flags;
++ struct uhci_hcd *uhci;
++
++ uhci = (struct uhci_hcd *) hcd_to_uhci(bus_to_hcd(urb->dev->bus));
++
++ spin_lock_irqsave(&uhci->lock, flags);
++ unlink_interrupt(NULL, qh);
++ list_del(&(qh->node));
++ spin_unlock_irqrestore(&uhci->lock, flags);
++
++}
++
++static int uhci_kdb_poll_char(struct urb *urb)
++{
++ if (!urb) /* can happen if no keyboard attached */
++ return -1;
++
++ return uhci_check_kdb_uhci_qh(kdb_uhci_keyboard_get_qh(urb));
++}
++
++/* Only 1 UHCI Keyboard supported */
++static inline void kdb_usb_fill_int_urb (struct urb *urb,
++ struct usb_device *dev,
++ unsigned int pipe,
++ void *transfer_buffer,
++ int buffer_length,
++ usb_complete_t complete_fn,
++ void *context,
++ int interval)
++{
++ urb->dev = dev;
++ urb->pipe = pipe;
++ urb->transfer_buffer = transfer_buffer;
++ urb->transfer_buffer_length = buffer_length;
++ urb->complete = complete_fn;
++ urb->context = context;
++ urb->interval = interval;
++ urb->start_frame = -1;
++}
++
++static int kdb_uhci_keyboard_attach(int i, unsigned int usbhid_bufsize)
++{
++ struct urb *kdb_urb;
++ unsigned char *kdb_buffer;
++ dma_addr_t uhci_inbuf_dma;
++ struct urb *hid_inurb = kdb_usb_kbds[i].urb;
++ int ret = -1;
++
++ kdb_usb_kbds[i].hid_urb = hid_inurb;
++
++ kdb_urb = NULL;
++ kdb_buffer = NULL;
++ if (!(kdb_buffer = usb_buffer_alloc(hid_inurb->dev,
++ usbhid_bufsize, GFP_ATOMIC,
++ &uhci_inbuf_dma)))
++ goto out;
++
++ if (!(kdb_urb = usb_alloc_urb(0, GFP_KERNEL)))
++ goto out;
++
++ kdb_usb_fill_int_urb(kdb_urb,
++ hid_inurb->dev,
++ hid_inurb->pipe,
++ kdb_buffer,
++ hid_inurb->transfer_buffer_length,
++ hid_inurb->complete,
++ hid_inurb->context,
++ hid_inurb->interval
++ );
++
++ (kdb_urb)->transfer_dma = uhci_inbuf_dma;
++ (kdb_urb)->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
++
++ kdb_usb_kbds[i].urb = kdb_urb;
++ kdb_usb_kbds[i].buffer = kdb_buffer;
++
++ if (usb_submit_urb(kdb_urb, GFP_ATOMIC)){
++ kdb_usb_keyboard_detach(hid_inurb);
++ goto out;
++ }
++ /* Remove KDB special URB from endpoin queue to
++ * prevent hang during hid_disconnect().
++ */
++ list_del(&(kdb_urb->urb_list));
++
++ ret = 0;
++ return ret;
++out:
++ /* Some Error Cleanup */
++ ret = -1;
++ printk("KDB: Error, UHCI Keyboard HID won't work!\n");
++
++ if (kdb_buffer)
++ usb_buffer_free(hid_inurb->dev,
++ usbhid_bufsize, kdb_buffer,
++ uhci_inbuf_dma);
++
++ if (kdb_urb)
++ usb_free_urb(kdb_urb);
++
++ return ret;
++}
++
++static int kdb_uhci_keyboard_detach(struct urb *urb, int i)
++{
++ int ret;
++
++ if (kdb_usb_kbds[i].qh && (kdb_usb_kbds[i].hid_urb == urb)) {
++ /* UHCI keyboard */
++ kdb_unlink_uhci_qh(kdb_usb_kbds[i].urb, kdb_usb_kbds[i].qh);
++ ret = 0;
++ }
++ ret = -1;
++
++ return ret;
++}
++
++/* Check if URB is managed by KDB code */
++static int kdb_uhci_keyboard_urb(struct urb *urb)
++{
++ int i;
++
++ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
++ if (kdb_usb_kbds[i].urb && kdb_usb_kbds[i].urb == urb)
++ return i;
++ }
++ return -1;
++}
++
++/* Check if UHCI QH is managed by KDB code */
++static int kdb_uhci_keyboard_check_uhci_qh(struct uhci_qh *qh)
++{
++ int i;
++
++ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
++ if (kdb_usb_kbds[i].urb && kdb_usb_kbds[i].qh == qh)
++ return i;
++ }
++ return -1;
++}
++
++/* Set UHCI QH using URB pointer */
++static int kdb_uhci_keyboard_set_qh(struct urb *urb, struct uhci_qh *qh)
++{
++ int i;
++
++ i = kdb_uhci_keyboard_urb(urb);
++ if (i != -1)
++ kdb_usb_kbds[i].qh = qh;
++
++ return 0;
++}
++
++/* Get UHCI QH using URB pointer */
++static struct uhci_qh *kdb_uhci_keyboard_get_qh(struct urb *urb)
++{
++ int i;
++
++ i = kdb_uhci_keyboard_urb(urb);
++ if (i != -1)
++ return kdb_usb_kbds[i].qh;
++
++ return NULL;
++}
++
++/* Set UHCI hid_event using URB pointer */
++static int kdb_uhci_keyboard_set_hid_event(struct urb *urb, int hid_event)
++{
++ int i;
++
++ i = kdb_uhci_keyboard_urb(urb);
++ if (i != -1)
++ kdb_usb_kbds[i].kdb_hid_event = hid_event;
++
++ return 0;
++}
++/* Get UHCI hid_event using URB pointer */
++static int kdb_uhci_keyboard_get_hid_event(struct urb *urb)
++{
++ int i;
++
++ i = kdb_uhci_keyboard_urb(urb);
++ if (i != -1)
++ return kdb_usb_kbds[i].kdb_hid_event;
++
++ return 0;
++}
++
++/* Set UHCI hid_event using UHCI QH pointer */
++static int kdb_uhci_keyboard_set_hid_event_qh(struct uhci_qh *qh, int hid_event)
++{
++ int i;
++
++ for (i = 0; i < KDB_USB_NUM_KEYBOARDS; i++) {
++ if (kdb_usb_kbds[i].urb && kdb_usb_kbds[i].qh == qh){
++ kdb_usb_kbds[i].kdb_hid_event = hid_event;
++ return i;
++ }
++ }
++ return -1;
++}
++#endif
++
/*
* Store the current frame number in uhci->frame_number if the controller
* is runnning. Expand from 11 bits (of which we use only 10) to a
.hub_status_data = uhci_hub_status_data,
.hub_control = uhci_hub_control,
++#ifdef CONFIG_KDB_USB
++ .kdb_poll_char = uhci_kdb_poll_char,
++ .kdb_completion = kdb_uhci_urb_complete,
++ .kdb_hc_keyboard_attach = kdb_uhci_keyboard_attach,
++ .kdb_hc_keyboard_detach = kdb_uhci_keyboard_detach,
++#endif
};
static const struct pci_device_id uhci_pci_ids[] = { {
* games with the FSBR code to make sure we get the correct order in all
* the cases. I don't think it's worth the effort
*/
++#ifdef CONFIG_KDB_USB
++/* KDB HID QH, managed by KDB code */
++static int kdb_uhci_keyboard_check_uhci_qh(struct uhci_qh *qh);
++static int kdb_uhci_keyboard_set_qh(struct urb *urb, struct uhci_qh *qh);
++static struct uhci_qh *kdb_uhci_keyboard_get_qh(struct urb *urb);
++static int kdb_uhci_keyboard_set_hid_event(struct urb *urb, int hid_event);
++static int kdb_uhci_keyboard_get_hid_event(struct urb *urb);
++static int kdb_uhci_keyboard_set_hid_event_qh(struct uhci_qh *qh, int hid_event);
++static int kdb_uhci_keyboard_urb(struct urb *urb);
++#endif
++
static void uhci_set_next_interrupt(struct uhci_hcd *uhci)
{
if (uhci->is_stopped)
return qh;
}
++#ifdef CONFIG_KDB_USB
++/*
++ * Same as uhci_alloc_qh execpt it doesn't change to hep->hcpriv
++ */
++static struct uhci_qh *kdb_uhci_alloc_qh(struct uhci_hcd *uhci,
++ struct usb_device *udev, struct usb_host_endpoint *hep)
++{
++ dma_addr_t dma_handle;
++ struct uhci_qh *qh;
++
++ qh = dma_pool_alloc(uhci->qh_pool, GFP_ATOMIC, &dma_handle);
++ if (!qh)
++ return NULL;
++
++ memset(qh, 0, sizeof(*qh));
++ qh->dma_handle = dma_handle;
++
++ qh->element = UHCI_PTR_TERM;
++ qh->link = UHCI_PTR_TERM;
++
++ INIT_LIST_HEAD(&qh->queue);
++ INIT_LIST_HEAD(&qh->node);
++
++ if (udev) { /* Normal QH */
++ qh->type = hep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
++ if (qh->type != USB_ENDPOINT_XFER_ISOC) {
++ qh->dummy_td = uhci_alloc_td(uhci);
++ if (!qh->dummy_td) {
++ dma_pool_free(uhci->qh_pool, qh, dma_handle);
++ return NULL;
++ }
++ }
++ qh->state = QH_STATE_IDLE;
++ qh->hep = hep;
++ qh->udev = udev;
++
++ if (qh->type == USB_ENDPOINT_XFER_INT ||
++ qh->type == USB_ENDPOINT_XFER_ISOC)
++ qh->load = usb_calc_bus_time(udev->speed,
++ usb_endpoint_dir_in(&hep->desc),
++ qh->type == USB_ENDPOINT_XFER_ISOC,
++ le16_to_cpu(hep->desc.wMaxPacketSize))
++ / 1000 + 1;
++
++ } else { /* Skeleton QH */
++ qh->state = QH_STATE_ACTIVE;
++ qh->type = -1;
++ }
++ return qh;
++}
++#endif
++
static void uhci_free_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
{
WARN_ON(qh->state != QH_STATE_IDLE && qh->udev);
if (!urbp)
goto done;
++#ifdef CONFIG_KDB_USB
++ /* Always allocate new QH for KDB URB.
++ * KDB HQ will be managed by KDB poll code not by
++ * UHCI HCD Driver.
++ */
++ if (kdb_uhci_keyboard_urb(urb) != -1){
++ /* KDB urb will be enqued only once */
++ kdb_uhci_keyboard_set_qh(urb, NULL);
++ qh = kdb_uhci_alloc_qh(uhci, urb->dev, urb->ep);
++ if (!qh)
++ goto err_no_qh;
++ kdb_uhci_keyboard_set_qh(urb, qh);
++ } else
++#endif
++
if (urb->ep->hcpriv)
qh = urb->ep->hcpriv;
else {
int ret = 1;
unsigned status;
++#ifdef CONFIG_KDB_USB
++ /* Don't manage KDB QH */
++ if(kdb_uhci_keyboard_check_uhci_qh(qh) != -1){
++ ret = 0;
++ goto done;
++ }
++#endif
++
if (qh->type == USB_ENDPOINT_XFER_ISOC)
goto done;
uhci->next_qh = list_entry(qh->node.next,
struct uhci_qh, node);
++#ifdef CONFIG_KDB_USB
++ /* Don't manage KDB QH */
++ if(kdb_uhci_keyboard_check_uhci_qh(qh) != -1)
++ continue;
++#endif
if (uhci_advance_check(uhci, qh)) {
uhci_scan_qh(uhci, qh);
if (qh->state == QH_STATE_ACTIVE) {
else
uhci_set_next_interrupt(uhci);
}
++
++#ifdef CONFIG_KDB_USB
++/*
++ * Activate KDB UHCI QH, called by KDB poll code.
++ */
++static void kdb_activate_uhci_qh(struct uhci_qh *qh)
++{
++ struct urb_priv *urbp;
++ struct uhci_td *td;
++ __le32 status, token;
++
++ urbp = list_entry(qh->queue.next, struct urb_priv, node);
++
++ list_for_each_entry(td, &urbp->td_list, list){
++ status = td->status;
++ token = td->token;
++ barrier();
++ /* Clear Status and ActLen */
++ status &= cpu_to_le32(0xff000000);
++ /* Make TD Active */
++ status |= cpu_to_le32(TD_CTRL_ACTIVE);
++ /* Clear TD Interrupt */
++ status &= cpu_to_le32(~TD_CTRL_IOC);
++ /* Toggle Data Sycronization Bit */
++ if (token & cpu_to_le32(TD_TOKEN_TOGGLE))
++ token &= cpu_to_le32(~TD_TOKEN_TOGGLE);
++ else
++ token |= cpu_to_le32(TD_TOKEN_TOGGLE);
++
++ td->token = token;
++ td->status = status;
++ barrier();
++ }
++ /* Activate KDB UHCI Keyboard HID QH */
++ td = list_entry(urbp->td_list.next, struct uhci_td, list);
++ qh->element = LINK_TO_TD(td);
++ barrier();
++}
++
++/*
++ * Called when KDB finishes process key press/release event.
++ */
++static void
++kdb_uhci_urb_complete (struct urb *urb)
++{
++ if (!kdb_uhci_keyboard_get_hid_event(urb))
++ return;
++
++ /* Activate KDB TD */
++ kdb_activate_uhci_qh(kdb_uhci_keyboard_get_qh(urb));
++ kdb_uhci_keyboard_set_hid_event(urb, 0);
++}
++
++/*
++ * Check if state of KDB URB changed (key was pressed/released).
++ */
++static int uhci_check_kdb_uhci_qh(struct uhci_qh *qh)
++{
++ struct urb_priv *urbp = NULL;
++ struct uhci_td *td;
++ unsigned status;
++
++ urbp = list_entry(qh->queue.next, struct urb_priv, node);
++ td = list_entry(urbp->td_list.next, struct uhci_td, list);
++ status = td_status(td);
++ if (!(status & TD_CTRL_ACTIVE)){
++ /* We're okay, the queue has advanced */
++ kdb_uhci_keyboard_set_hid_event_qh(qh, 1);
++ return 0;
++ }
++ return -1;
++}
++#endif
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
.fiemap = ext3_fiemap,
+ .may_create = ext3_may_create,
+ .may_delete = ext3_may_delete,
};
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
+ .may_create = ext3_may_create,
+ .may_delete = ext3_may_delete,
};
const struct inode_operations ext3_special_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
+ .may_create = ext3_may_create,
+ .may_delete = ext3_may_delete,
};
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
+#include <linux/nfs4acl.h>
#include <linux/log2.h>
- #include <linux/precache.h>
#include <asm/uaccess.h>
return -EACCES;
}
+ if (!IS_WITHAPPEND(inode))
+ submask &= ~MAY_APPEND;
+
if (inode->i_op->permission)
- retval = inode->i_op->permission(inode, mask);
+ retval = inode->i_op->permission(inode, submask);
else
- retval = generic_permission(inode, mask, NULL);
+ retval = generic_permission(inode, mask, inode->i_op->check_acl);
if (retval)
return retval;
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
+#include <linux/utrace.h>
+ #include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
+
if (flags & MS_RDONLY)
acct_auto_close(sb);
- shrink_dcache_sb(sb);
+ if (rflags & REMOUNT_SHRINK_DCACHE)
+ shrink_dcache_sb(sb);
sync_filesystem(sb);
/* If we are remounting RDONLY and current sb is read/write,
#include <linux/dcache.h>
- static struct vm_operations_struct xfs_file_vm_ops;
+ static const struct vm_operations_struct xfs_file_vm_ops;
+#ifdef HAVE_DMAPI
+static struct vm_operations_struct xfs_dmapi_file_vm_ops;
+#endif
STATIC ssize_t
xfs_file_aio_read(
*/
STATIC int
xfs_file_fsync(
- struct file *filp,
- struct dentry *dentry,
- int datasync)
+ struct file *file,
+ struct dentry *dentry,
+ int datasync)
{
- xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
- return -xfs_fsync(XFS_I(dentry->d_inode));
+ struct inode *inode = dentry->d_inode;
+ struct xfs_inode *ip = XFS_I(inode);
+ int error;
+
+ /* capture size updates in I/O completion before writing the inode. */
+ error = filemap_fdatawait(inode->i_mapping);
+ if (error)
+ return error;
+
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ return -xfs_fsync(ip);
}
+#ifdef HAVE_DMAPI
+STATIC int
+xfs_vm_fault(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct xfs_mount *mp = XFS_M(inode->i_sb);
+
+ ASSERT_ALWAYS(mp->m_flags & XFS_MOUNT_DMAPI);
+
+ if (XFS_SEND_MMAP(mp, vma, 0))
+ return VM_FAULT_SIGBUS;
+ return filemap_fault(vma, vmf);
+}
+#endif /* HAVE_DMAPI */
+
STATIC int
xfs_file_readdir(
struct file *filp,
void *data,
struct vfsmount *mnt)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
+ int error;
+
+ error = get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
mnt);
+ if (!error) {
+ xfs_mount_t *mp = XFS_M(mnt->mnt_sb);
+ mp->m_vfsmount = mnt;
+ }
+
+ return error;
}
- static struct super_operations xfs_super_operations = {
+ static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.write_inode = xfs_fs_write_inode,
void *dibuff,
int *stat);
+int
+xfs_internal_inum(
- xfs_mount_t *mp,
- xfs_ino_t ino);
++ xfs_mount_t *mp,
++ xfs_ino_t ino);
+
typedef int (*inumbers_fmt_pf)(
void __user *ubuffer, /* buffer to write to */
const xfs_inogrp_t *buffer, /* buffer to read from */
struct xfs_buf **bpp);
extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
xfs_buf_t *bp, xfs_daddr_t blkno);
-
+/*
+ * Prototypes for functions in xfs_vnodeops.c.
+ */
+extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
- int flags);
++ int flags);
#endif /* __XFS_RW_H__ */
#define TIOCSRS485 0x542F
#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */
++#define TIOCGDEV _IOR('T', 0x32, unsigned int) /* Get real dev no below /dev/console */
#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */
#define TCSETX 0x5433
#define TCSETXF 0x5434
#endif
enum km_type {
- D(0) KM_BOUNCE_READ,
- D(1) KM_SKB_SUNRPC_DATA,
- D(2) KM_SKB_DATA_SOFTIRQ,
- D(3) KM_USER0,
- D(4) KM_USER1,
- D(5) KM_BIO_SRC_IRQ,
- D(6) KM_BIO_DST_IRQ,
- D(7) KM_PTE0,
- D(8) KM_PTE1,
- D(9) KM_IRQ0,
- D(10) KM_IRQ1,
- D(11) KM_SOFTIRQ0,
- D(12) KM_SOFTIRQ1,
- D(13) KM_SYNC_ICACHE,
- D(14) KM_SYNC_DCACHE,
- D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
- D(16) KM_IRQ_PTE,
- D(17) KM_NMI,
- D(18) KM_NMI_PTE,
- D(19) KM_KDB,
- D(20) KM_TYPE_NR
+ KMAP_D(0) KM_BOUNCE_READ,
+ KMAP_D(1) KM_SKB_SUNRPC_DATA,
+ KMAP_D(2) KM_SKB_DATA_SOFTIRQ,
+ KMAP_D(3) KM_USER0,
+ KMAP_D(4) KM_USER1,
+ KMAP_D(5) KM_BIO_SRC_IRQ,
+ KMAP_D(6) KM_BIO_DST_IRQ,
+ KMAP_D(7) KM_PTE0,
+ KMAP_D(8) KM_PTE1,
+ KMAP_D(9) KM_IRQ0,
+ KMAP_D(10) KM_IRQ1,
+ KMAP_D(11) KM_SOFTIRQ0,
+ KMAP_D(12) KM_SOFTIRQ1,
+ KMAP_D(13) KM_SYNC_ICACHE,
+ KMAP_D(14) KM_SYNC_DCACHE,
+ /* UML specific, for copy_*_user - used in do_op_one_page */
+ KMAP_D(15) KM_UML_USERCOPY,
+ KMAP_D(16) KM_IRQ_PTE,
+ KMAP_D(17) KM_NMI,
+ KMAP_D(18) KM_NMI_PTE,
-KMAP_D(19) KM_TYPE_NR
++KMAP_D(19) KM_KDB,
++KMAP_D(20) KM_TYPE_NR
};
- #undef D
+ #undef KMAP_D
#endif
void (*put_link) (struct dentry *, struct nameidata *, void *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
+ int (*check_acl)(struct inode *, int);
+ int (*may_create) (struct inode *, int);
+ int (*may_delete) (struct inode *, struct inode *);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
--- /dev/null
+#ifndef _KDB_H
+#define _KDB_H
+
+/*
+ * Kernel Debugger Architecture Independent Global Headers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+
+#ifdef CONFIG_KDB
+/* These are really private, but they must be defined before including
+ * asm-$(ARCH)/kdb.h, so make them public and put them here.
+ */
+extern int kdb_getuserarea_size(void *, unsigned long, size_t);
+extern int kdb_putuserarea_size(unsigned long, void *, size_t);
+
+#include <asm/kdb.h>
+#endif
+
+#define KDB_MAJOR_VERSION 4
+#define KDB_MINOR_VERSION 4
+#define KDB_TEST_VERSION ""
+
+/*
+ * kdb_initial_cpu is initialized to -1, and is set to the cpu
+ * number whenever the kernel debugger is entered.
+ */
+extern volatile int kdb_initial_cpu;
+extern atomic_t kdb_event;
+extern atomic_t kdb_8250;
+#ifdef CONFIG_KDB
+#define KDB_IS_RUNNING() (kdb_initial_cpu != -1)
+#define KDB_8250() (atomic_read(&kdb_8250) != 0)
+#else
+#define KDB_IS_RUNNING() (0)
+#define KDB_8250() (0)
+#endif /* CONFIG_KDB */
+
+/*
+ * kdb_on
+ *
+ * Defines whether kdb is on or not. Default value
+ * is set by CONFIG_KDB_OFF. Boot with kdb=on/off/on-nokey
+ * or echo "[012]" > /proc/sys/kernel/kdb to change it.
+ */
+extern int kdb_on;
+
+#if defined(CONFIG_SERIAL_8250_CONSOLE) || defined(CONFIG_SERIAL_SGI_L1_CONSOLE)
+/*
+ * kdb_serial.iobase is initialized to zero, and is set to the I/O
+ * address of the serial port when the console is setup in
+ * serial_console_setup.
+ */
+extern struct kdb_serial {
+ int io_type;
+ unsigned long iobase;
+ unsigned long ioreg_shift;
+} kdb_serial;
+#endif
+
+/*
+ * kdb_diemsg
+ *
+ * Contains a pointer to the last string supplied to the
+ * kernel 'die' panic function.
+ */
+extern const char *kdb_diemsg;
+
+#define KDB_FLAG_EARLYKDB (1 << 0) /* set from boot parameter kdb=early */
+#define KDB_FLAG_CATASTROPHIC (1 << 1) /* A catastrophic event has occurred */
+#define KDB_FLAG_CMD_INTERRUPT (1 << 2) /* Previous command was interrupted */
+#define KDB_FLAG_NOIPI (1 << 3) /* Do not send IPIs */
+#define KDB_FLAG_ONLY_DO_DUMP (1 << 4) /* Only do a dump, used when kdb is off */
+#define KDB_FLAG_NO_CONSOLE (1 << 5) /* No console is available, kdb is disabled */
+#define KDB_FLAG_NO_VT_CONSOLE (1 << 6) /* No VT console is available, do not use keyboard */
+#define KDB_FLAG_NO_I8042 (1 << 7) /* No i8042 chip is available, do not use keyboard */
+#define KDB_FLAG_RECOVERY (1 << 8) /* kdb is being entered for an error which has been recovered */
+
+extern volatile int kdb_flags; /* Global flags, see kdb_state for per cpu state */
+
+extern void kdb_save_flags(void);
+extern void kdb_restore_flags(void);
+
+#define KDB_FLAG(flag) (kdb_flags & KDB_FLAG_##flag)
+#define KDB_FLAG_SET(flag) ((void)(kdb_flags |= KDB_FLAG_##flag))
+#define KDB_FLAG_CLEAR(flag) ((void)(kdb_flags &= ~KDB_FLAG_##flag))
+
+/*
+ * External entry point for the kernel debugger. The pt_regs
+ * at the time of entry are supplied along with the reason for
+ * entry to the kernel debugger.
+ */
+
+typedef enum {
+ KDB_REASON_ENTER=1, /* KDB_ENTER() trap/fault - regs valid */
+ KDB_REASON_ENTER_SLAVE, /* KDB_ENTER_SLAVE() trap/fault - regs valid */
+ KDB_REASON_BREAK, /* Breakpoint inst. - regs valid */
+ KDB_REASON_DEBUG, /* Debug Fault - regs valid */
+ KDB_REASON_OOPS, /* Kernel Oops - regs valid */
+ KDB_REASON_SWITCH, /* CPU switch - regs valid*/
+ KDB_REASON_KEYBOARD, /* Keyboard entry - regs valid */
+ KDB_REASON_NMI, /* Non-maskable interrupt; regs valid */
+ KDB_REASON_RECURSE, /* Recursive entry to kdb; regs probably valid */
+ KDB_REASON_CPU_UP, /* Add one cpu to kdb; regs invalid */
+ KDB_REASON_SILENT, /* Silent entry/exit to kdb; regs invalid - internal only */
+} kdb_reason_t;
+
+#ifdef CONFIG_KDB
+extern int kdb(kdb_reason_t, int, struct pt_regs *);
+#else
+#define kdb(reason,error_code,frame) (0)
+#endif
+
+/* Mainly used by kdb code, but this function is sometimes used
+ * by hacked debug code so make it generally available, not private.
+ */
+extern void kdb_printf(const char *,...)
+ __attribute__ ((format (printf, 1, 2)));
+typedef void (*kdb_printf_t)(const char *, ...)
+ __attribute__ ((format (printf, 1, 2)));
+extern void kdb_init(void);
+
+#if defined(CONFIG_SMP)
+/*
+ * Kernel debugger non-maskable IPI handler.
+ */
+extern int kdb_ipi(struct pt_regs *, void (*ack_interrupt)(void));
+extern void smp_kdb_stop(void);
+#else /* CONFIG_SMP */
+#define smp_kdb_stop()
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KDB_USB
+
+#include <linux/usb.h>
+
- extern int kdb_usb_keyboard_attach(struct urb *urb, unsigned char *buffer, void *poll_func);
++typedef int (*kdb_hc_keyboard_attach_t)(int i, unsigned int bufsize);
++typedef int (*kdb_hc_keyboard_detach_t)(struct urb *urb, int i);
++
++extern int kdb_usb_keyboard_attach(struct urb *urb, unsigned char *buffer,
++ void *poll_func, void *compl_func,
++ kdb_hc_keyboard_attach_t kdb_hc_keyboard_attach,
++ kdb_hc_keyboard_detach_t kdb_hc_keyboard_detach,
++ unsigned int bufsize,
++ struct urb *hid_urb);
++
+extern int kdb_usb_keyboard_detach(struct urb *urb);
+
+#endif /* CONFIG_KDB_USB */
+
+static inline
+int kdb_process_cpu(const struct task_struct *p)
+{
+ unsigned int cpu = task_thread_info(p)->cpu;
+ if (cpu > NR_CPUS)
+ cpu = 0;
+ return cpu;
+}
+
+extern const char kdb_serial_str[];
+
+#ifdef CONFIG_KDB_KDUMP
+/* Define values for kdb_kdump_state */
+extern int kdb_kdump_state; /* KDB kdump state */
+#define KDB_KDUMP_RESET 0
+#define KDB_KDUMP_KDUMP 1
+
+void kdba_kdump_prepare(struct pt_regs *);
+void machine_crash_shutdown(struct pt_regs *);
+void machine_crash_shutdown_begin(void);
+void machine_crash_shutdown_end(struct pt_regs *);
+
+#endif /* CONFIG_KDB_KDUMP */
+
+#endif /* !_KDB_H */
--- /dev/null
+#ifndef _KDBPRIVATE_H
+#define _KDBPRIVATE_H
+
+/*
+ * Kernel Debugger Architecture Independent Private Headers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+#include <linux/dis-asm.h>
+#include <asm/kdbprivate.h>
+#include <asm/bfd.h>
+
+ /*
+ * Kernel Debugger Error codes. Must not overlap with command codes.
+ */
+
+#define KDB_NOTFOUND (-1)
+#define KDB_ARGCOUNT (-2)
+#define KDB_BADWIDTH (-3)
+#define KDB_BADRADIX (-4)
+#define KDB_NOTENV (-5)
+#define KDB_NOENVVALUE (-6)
+#define KDB_NOTIMP (-7)
+#define KDB_ENVFULL (-8)
+#define KDB_ENVBUFFULL (-9 )
+#define KDB_TOOMANYBPT (-10)
+#define KDB_TOOMANYDBREGS (-11)
+#define KDB_DUPBPT (-12)
+#define KDB_BPTNOTFOUND (-13)
+#define KDB_BADMODE (-14)
+#define KDB_BADINT (-15)
+#define KDB_INVADDRFMT (-16)
+#define KDB_BADREG (-17)
+#define KDB_BADCPUNUM (-18)
+#define KDB_BADLENGTH (-19)
+#define KDB_NOBP (-20)
+#define KDB_BADADDR (-21)
+
+ /*
+ * Kernel Debugger Command codes. Must not overlap with error codes.
+ */
+#define KDB_CMD_GO (-1001)
+#define KDB_CMD_CPU (-1002)
+#define KDB_CMD_SS (-1003)
+#define KDB_CMD_SSB (-1004)
+
+ /*
+ * Internal debug flags
+ */
+/* KDB_DEBUG_FLAG_BT 0x0001 Was Stack traceback debug */
+#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */
+#define KDB_DEBUG_FLAG_BB_SUMM 0x0004 /* Basic block analysis, summary only */
+#define KDB_DEBUG_FLAG_AR 0x0008 /* Activation record, generic */
+#define KDB_DEBUG_FLAG_ARA 0x0010 /* Activation record, arch specific */
+#define KDB_DEBUG_FLAG_BB 0x0020 /* All basic block analysis */
+#define KDB_DEBUG_FLAG_STATE 0x0040 /* State flags */
+#define KDB_DEBUG_FLAG_MASK 0xffff /* All debug flags */
+#define KDB_DEBUG_FLAG_SHIFT 16 /* Shift factor for dbflags */
+
+#define KDB_DEBUG(flag) (kdb_flags & (KDB_DEBUG_FLAG_##flag << KDB_DEBUG_FLAG_SHIFT))
+#define KDB_DEBUG_STATE(text,value) if (KDB_DEBUG(STATE)) kdb_print_state(text, value)
+
+typedef enum {
+ KDB_REPEAT_NONE = 0, /* Do not repeat this command */
+ KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */
+ KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */
+} kdb_repeat_t;
+
+typedef int (*kdb_func_t)(int, const char **);
+
+ /*
+ * Symbol table format returned by kallsyms.
+ */
+
+typedef struct __ksymtab {
+ unsigned long value; /* Address of symbol */
+ const char *mod_name; /* Module containing symbol or "kernel" */
+ unsigned long mod_start;
+ unsigned long mod_end;
+ const char *sec_name; /* Section containing symbol */
+ unsigned long sec_start;
+ unsigned long sec_end;
+ const char *sym_name; /* Full symbol name, including any version */
+ unsigned long sym_start;
+ unsigned long sym_end;
+ } kdb_symtab_t;
+extern int kallsyms_symbol_next(char *prefix_name, int flag);
+extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
+
+ /*
+ * Exported Symbols for kernel loadable modules to use.
+ */
+extern int kdb_register(char *, kdb_func_t, char *, char *, short);
+extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, short, kdb_repeat_t);
+extern int kdb_unregister(char *);
+
+extern int kdb_getarea_size(void *, unsigned long, size_t);
+extern int kdb_putarea_size(unsigned long, void *, size_t);
+
+/* Like get_user and put_user, kdb_getarea and kdb_putarea take variable
+ * names, not pointers. The underlying *_size functions take pointers.
+ */
+#define kdb_getarea(x,addr) kdb_getarea_size(&(x), addr, sizeof((x)))
+#define kdb_putarea(addr,x) kdb_putarea_size(addr, &(x), sizeof((x)))
+
+extern int kdb_getphysword(unsigned long *word,
+ unsigned long addr, size_t size);
+extern int kdb_getword(unsigned long *, unsigned long, size_t);
+extern int kdb_putword(unsigned long, unsigned long, size_t);
+
+extern int kdbgetularg(const char *, unsigned long *);
+extern char *kdbgetenv(const char *);
+extern int kdbgetintenv(const char *, int *);
+extern int kdbgetaddrarg(int, const char**, int*, unsigned long *,
+ long *, char **);
+extern int kdbgetsymval(const char *, kdb_symtab_t *);
+extern int kdbnearsym(unsigned long, kdb_symtab_t *);
+extern void kdbnearsym_cleanup(void);
+extern char *kdb_read(char *buffer, size_t bufsize);
+extern char *kdb_strdup(const char *str, gfp_t type);
+extern void kdb_symbol_print(kdb_machreg_t, const kdb_symtab_t *, unsigned int);
+
+ /*
+ * Do we have a set of registers?
+ */
+
+#define KDB_NULL_REGS(regs) \
+ (regs == (struct pt_regs *)NULL ? kdb_printf("%s: null regs - should never happen\n", __FUNCTION__), 1 : 0)
+
+ /*
+ * Routine for debugging the debugger state.
+ */
+
+extern void kdb_print_state(const char *, int);
+
+ /*
+ * Per cpu kdb state. A cpu can be under kdb control but outside kdb,
+ * for example when doing single step.
+ */
+volatile extern int kdb_state[ /*NR_CPUS*/ ];
+#define KDB_STATE_KDB 0x00000001 /* Cpu is inside kdb */
+#define KDB_STATE_LEAVING 0x00000002 /* Cpu is leaving kdb */
+#define KDB_STATE_CMD 0x00000004 /* Running a kdb command */
+#define KDB_STATE_KDB_CONTROL 0x00000008 /* This cpu is under kdb control */
+#define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */
+#define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */
+#define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command, DOING_SS is also set */
+#define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint after one ss, independent of DOING_SS */
+#define KDB_STATE_REENTRY 0x00000100 /* Valid re-entry into kdb */
+#define KDB_STATE_SUPPRESS 0x00000200 /* Suppress error messages */
+#define KDB_STATE_LONGJMP 0x00000400 /* longjmp() data is available */
+#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching back to initial cpu */
+#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */
+#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
+#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
+#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been adjusted */
+#define KDB_STATE_GO1 0x00010000 /* go only releases one cpu */
+#define KDB_STATE_KEYBOARD 0x00020000 /* kdb entered via keyboard on this cpu */
+#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */
+#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch specific use */
+
+#define KDB_STATE_CPU(flag,cpu) (kdb_state[cpu] & KDB_STATE_##flag)
+#define KDB_STATE_SET_CPU(flag,cpu) ((void)(kdb_state[cpu] |= KDB_STATE_##flag))
+#define KDB_STATE_CLEAR_CPU(flag,cpu) ((void)(kdb_state[cpu] &= ~KDB_STATE_##flag))
+
+#define KDB_STATE(flag) KDB_STATE_CPU(flag,smp_processor_id())
+#define KDB_STATE_SET(flag) KDB_STATE_SET_CPU(flag,smp_processor_id())
+#define KDB_STATE_CLEAR(flag) KDB_STATE_CLEAR_CPU(flag,smp_processor_id())
+
+ /*
+ * kdb_nextline
+ *
+ * Contains the current line number on the screen. Used
+ * to handle the built-in pager (LINES env variable)
+ */
+extern volatile int kdb_nextline;
+
+ /*
+ * Breakpoint state
+ *
+ * Each active and inactive breakpoint is represented by
+ * an instance of the following data structure.
+ */
+
+typedef struct _kdb_bp {
+ bfd_vma bp_addr; /* Address breakpoint is present at */
+ kdb_machinst_t bp_inst; /* Replaced instruction */
+
+ unsigned int bp_free:1; /* This entry is available */
+
+ unsigned int bp_enabled:1; /* Breakpoint is active in register */
+ unsigned int bp_global:1; /* Global to all processors */
+
+ unsigned int bp_hardtype:1; /* Uses hardware register */
+ unsigned int bp_forcehw:1; /* Force hardware register */
+ unsigned int bp_installed:1; /* Breakpoint is installed */
+ unsigned int bp_delay:1; /* Do delayed bp handling */
+ unsigned int bp_delayed:1; /* Delayed breakpoint */
+
+ int bp_cpu; /* Cpu # (if bp_global == 0) */
+ kdbhard_bp_t bp_template; /* Hardware breakpoint template */
+ kdbhard_bp_t *bp_hard[NR_CPUS]; /* Hardware breakpoint structure */
+ int bp_adjust; /* Adjustment to PC for real instruction */
+} kdb_bp_t;
+
+ /*
+ * Breakpoint handling subsystem global variables
+ */
+extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
+
+ /*
+ * Breakpoint architecture dependent functions. Must be provided
+ * in some form for all architectures.
+ */
+extern void kdba_initbp(void);
+extern void kdba_printbp(kdb_bp_t *);
+extern void kdba_alloc_hwbp(kdb_bp_t *bp, int *diagp);
+extern void kdba_free_hwbp(kdb_bp_t *bp);
+extern int kdba_parsebp(int, const char**, int *, kdb_bp_t*);
+extern char *kdba_bptype(kdbhard_bp_t *);
+extern void kdba_setsinglestep(struct pt_regs *);
+extern void kdba_clearsinglestep(struct pt_regs *);
+
+ /*
+ * Adjust instruction pointer architecture dependent function. Must be
+ * provided in some form for all architectures.
+ */
+extern void kdba_adjust_ip(kdb_reason_t, int, struct pt_regs *);
+
+ /*
+ * KDB-only global function prototypes.
+ */
+extern void kdb_id1(unsigned long);
+extern void kdb_id_init(void);
+
+ /*
+ * Initialization functions.
+ */
+extern void kdba_init(void);
+extern void kdb_io_init(void);
+
+ /*
+ * Architecture specific function to read a string.
+ */
+typedef int (*get_char_func)(void);
+extern get_char_func poll_funcs[];
+
+#ifndef CONFIG_IA64
+ /*
+ * Data for a single activation record on stack.
+ */
+
+struct kdb_stack_info {
+ kdb_machreg_t physical_start;
+ kdb_machreg_t physical_end;
+ kdb_machreg_t logical_start;
+ kdb_machreg_t logical_end;
+ kdb_machreg_t next;
+ const char * id;
+};
+
+typedef struct { DECLARE_BITMAP(bits, KDBA_MAXARGS); } valid_t;
+
+struct kdb_activation_record {
+ struct kdb_stack_info stack; /* information about current stack */
+ int args; /* number of arguments detected */
+ kdb_machreg_t arg[KDBA_MAXARGS]; /* -> arguments */
+ valid_t valid; /* is argument n valid? */
+};
+#endif
+
+ /*
+ * Architecture specific Stack Traceback functions.
+ */
+
+struct task_struct;
+
+extern int kdba_bt_address(kdb_machreg_t, int);
+extern int kdba_bt_process(const struct task_struct *, int);
+
+ /*
+ * KDB Command Table
+ */
+
+typedef struct _kdbtab {
+ char *cmd_name; /* Command name */
+ kdb_func_t cmd_func; /* Function to execute command */
+ char *cmd_usage; /* Usage String for this command */
+ char *cmd_help; /* Help message for this command */
+ short cmd_flags; /* Parsing flags */
+ short cmd_minlen; /* Minimum legal # command chars required */
+ kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */
+} kdbtab_t;
+
+ /*
+ * External command function declarations
+ */
+
+extern int kdb_id(int, const char **);
+extern int kdb_bt(int, const char **);
+
+ /*
+ * External utility function declarations
+ */
+extern char* kdb_getstr(char *, size_t, char *);
+
+ /*
+ * Register contents manipulation
+ */
+extern int kdba_getregcontents(const char *, struct pt_regs *, kdb_machreg_t *);
+extern int kdba_setregcontents(const char *, struct pt_regs *, kdb_machreg_t);
+extern int kdba_dumpregs(struct pt_regs *, const char *, const char *);
+extern int kdba_setpc(struct pt_regs *, kdb_machreg_t);
+extern kdb_machreg_t kdba_getpc(struct pt_regs *);
+
+ /*
+ * Debug register handling.
+ */
+extern void kdba_installdbreg(kdb_bp_t*);
+extern void kdba_removedbreg(kdb_bp_t*);
+
+ /*
+ * Breakpoint handling - External interfaces
+ */
+extern void kdb_initbptab(void);
+extern void kdb_bp_install_global(struct pt_regs *);
+extern void kdb_bp_install_local(struct pt_regs *);
+extern void kdb_bp_remove_global(void);
+extern void kdb_bp_remove_local(void);
+
+ /*
+ * Breakpoint handling - Internal to kdb_bp.c/kdba_bp.c
+ */
+extern int kdba_installbp(struct pt_regs *regs, kdb_bp_t *);
+extern int kdba_removebp(kdb_bp_t *);
+
+
+typedef enum {
+ KDB_DB_BPT, /* Breakpoint */
+ KDB_DB_SS, /* Single-step trap */
+ KDB_DB_SSB, /* Single step to branch */
+ KDB_DB_SSBPT, /* Single step over breakpoint */
+ KDB_DB_NOBPT /* Spurious breakpoint */
+} kdb_dbtrap_t;
+
+extern kdb_dbtrap_t kdba_db_trap(struct pt_regs *, int); /* DEBUG trap/fault handler */
+extern kdb_dbtrap_t kdba_bp_trap(struct pt_regs *, int); /* Breakpoint trap/fault hdlr */
+
+ /*
+ * Interrupt Handling
+ */
+typedef unsigned long kdb_intstate_t;
+
+extern void kdba_disableint(kdb_intstate_t *);
+extern void kdba_restoreint(kdb_intstate_t *);
+
+ /*
+ * SMP and process stack manipulation routines.
+ */
+extern int kdba_ipi(struct pt_regs *, void (*)(void));
+extern int kdba_main_loop(kdb_reason_t, kdb_reason_t, int, kdb_dbtrap_t, struct pt_regs *);
+extern int kdb_main_loop(kdb_reason_t, kdb_reason_t, int, kdb_dbtrap_t, struct pt_regs *);
+
+ /*
+ * General Disassembler interfaces
+ */
+extern int kdb_dis_fprintf(PTR, const char *, ...) __attribute__ ((format (printf, 2, 3)));
+extern int kdb_dis_fprintf_dummy(PTR, const char *, ...) __attribute__ ((format (printf, 2, 3)));
+extern disassemble_info kdb_di;
+
+ /*
+ * Architecture Dependent Disassembler interfaces
+ */
+extern int kdba_id_printinsn(kdb_machreg_t, disassemble_info *);
+extern int kdba_id_parsemode(const char *, disassemble_info*);
+extern void kdba_id_init(disassemble_info *);
+extern void kdba_check_pc(kdb_machreg_t *);
+
+ /*
+ * Miscellaneous functions and data areas
+ */
+extern char *kdb_cmds[];
- extern void debugger_syslog_data(char *syslog_data[]);
++extern void kdb_syslog_data(char *syslog_data[]);
+extern unsigned long kdb_task_state_string(const char *);
+extern char kdb_task_state_char (const struct task_struct *);
+extern unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask);
+extern void kdb_ps_suppressed(void);
+extern void kdb_ps1(const struct task_struct *p);
+extern int kdb_parse(const char *cmdstr);
+extern void kdb_print_nameval(const char *name, unsigned long val);
+extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info, int seqno);
+#ifdef CONFIG_SWAP
+extern void kdb_si_swapinfo(struct sysinfo *);
+#else
+#include <linux/swap.h>
+#define kdb_si_swapinfo(x) si_swapinfo(x)
+#endif
+extern void kdb_meminfo_proc_show(void);
+#ifdef CONFIG_HUGETLB_PAGE
+extern void kdb_hugetlb_report_meminfo(void);
+#endif /* CONFIG_HUGETLB_PAGE */
+extern const char *kdb_walk_kallsyms(loff_t *pos);
+
+ /*
+ * Architecture Dependant Local Processor setup & cleanup interfaces
+ */
+extern void kdba_local_arch_setup(void);
+extern void kdba_local_arch_cleanup(void);
+
+ /*
+ * Defines for kdb_symbol_print.
+ */
+#define KDB_SP_SPACEB 0x0001 /* Space before string */
+#define KDB_SP_SPACEA 0x0002 /* Space after string */
+#define KDB_SP_PAREN 0x0004 /* Parenthesis around string */
+#define KDB_SP_VALUE 0x0008 /* Print the value of the address */
+#define KDB_SP_SYMSIZE 0x0010 /* Print the size of the symbol */
+#define KDB_SP_NEWLINE 0x0020 /* Newline after string */
+#define KDB_SP_DEFAULT (KDB_SP_VALUE|KDB_SP_PAREN)
+
+/* Save data about running processes */
+
+struct kdb_running_process {
+ struct task_struct *p;
+ struct pt_regs *regs;
+ int seqno; /* kdb sequence number */
+ int irq_depth; /* irq count */
+ struct kdba_running_process arch; /* arch dependent save data */
+};
+
+extern struct kdb_running_process kdb_running_process[/* NR_CPUS */];
+
+extern int kdb_save_running(struct pt_regs *, kdb_reason_t, kdb_reason_t, int, kdb_dbtrap_t);
+extern void kdb_unsave_running(struct pt_regs *);
+extern struct task_struct *kdb_curr_task(int);
+
+/* Incremented each time the main kdb loop is entered on the initial cpu,
+ * it gives some indication of how old the saved data is.
+ */
+extern int kdb_seqno;
+
+#define kdb_task_has_cpu(p) (task_curr(p))
+extern void kdb_runqueue(unsigned long cpu, kdb_printf_t xxx_printf);
+
+/* Simplify coexistence with NPTL */
+#define kdb_do_each_thread(g, p) do_each_thread(g, p)
+#define kdb_while_each_thread(g, p) while_each_thread(g, p)
+
+#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
+
+extern void *debug_kmalloc(size_t size, gfp_t flags);
+extern void debug_kfree(void *);
+extern void debug_kusage(void);
+
+extern void kdba_set_current_task(const struct task_struct *);
+extern const struct task_struct *kdb_current_task;
+extern struct pt_regs *kdb_current_regs;
+
+/* Functions to safely read and write kernel areas. The {to,from}_xxx
+ * addresses are not necessarily valid, these functions must check for
+ * validity. If the arch already supports get and put routines with suitable
+ * validation and/or recovery on invalid addresses then use those routines,
+ * otherwise check it yourself.
+ */
+
+extern int kdba_putarea_size(unsigned long to_xxx, void *from, size_t size);
+extern int kdba_getarea_size(void *to, unsigned long from_xxx, size_t size);
+extern int kdba_verify_rw(unsigned long addr, size_t size);
+
+#ifndef KDB_RUNNING_PROCESS_ORIGINAL
+#define KDB_RUNNING_PROCESS_ORIGINAL kdb_running_process
+#endif
+
+extern int kdb_wait_for_cpus_secs;
+extern void kdba_cpu_up(void);
+extern char kdb_prompt_str[];
+
+#define KDB_WORD_SIZE ((int)sizeof(kdb_machreg_t))
+
+#ifdef CONFIG_KDB_USB
+#include <linux/usb.h>
+
++/* support up to 8 USB keyboards (probably excessive, but...) */
++#define KDB_USB_NUM_KEYBOARDS 8
++
+struct kdb_usb_kbd_info {
+ struct urb *urb; /* pointer to the URB */
+ unsigned char *buffer; /* pointer to the kbd char buffer */
+ int (*poll_func)(struct urb *urb); /* poll function to retrieve chars */
+ int poll_ret; /* return val from poll_func */
+ int caps_lock; /* state of the caps lock for this keyboard */
++ struct uhci_qh *qh;
++ int kdb_hid_event;
++ struct urb *hid_urb; /* pointer to the HID URB */
++ /* USB Host Controller specific callbacks */
++ kdb_hc_keyboard_attach_t kdb_hc_keyboard_attach;
++ kdb_hc_keyboard_detach_t kdb_hc_keyboard_detach;
++ int (*kdb_hc_urb_complete)(struct urb *urb); /* called when URB int is
++ processed */
++
+};
++
++extern struct kdb_usb_kbd_info kdb_usb_kbds[KDB_USB_NUM_KEYBOARDS];
++
+#endif /* CONFIG_KDB_USB */
+
+#ifdef CONFIG_KDB_KDUMP
+#define KDUMP_REASON_RESET 0
+extern void kdba_kdump_shutdown_slave(struct pt_regs *);
+#endif /* CONFIG_KDB_KDUMP */
+
+#endif /* !_KDBPRIVATE_H */
1 << PG_private | 1 << PG_private_2 | \
1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED | __PG_XEN | 1 << PG_waiters)
- 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
++ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | 1 << PG_waiters)
/*
* Flags checked when a page is prepped for return by the page allocator.
/* bitmask of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
+ unsigned long stack_start;
+ u64 instrumentation;
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
*/
static inline int tracehook_notify_jctl(int notify, int why)
{
+ if (task_utrace_flags(current) & UTRACE_EVENT(JCTL))
+ utrace_report_jctl(notify, why);
- return notify || (current->ptrace & PT_PTRACED);
+ return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+ }
+
+ /**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+ static inline void tracehook_finish_jctl(void)
+ {
}
#define DEATH_REAP -1
--- /dev/null
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+
+CCVERSION := $(shell $(CC) -v 2>&1 | sed -ne '$$p')
+obj-y := kdb_bt.o kdb_bp.o kdb_id.o kdbsupport.o gen-kdb_cmds.o kdbmain.o kdb_io.o kdbdereference.o
+CFLAGS_kdbmain.o += -DCCVERSION="$(CCVERSION)"
+
+subdir-$(CONFIG_KDB_MODULES) := modules
+obj-y += $(addsuffix /built-in.o, $(subdir-y))
+
+clean-files := gen-kdb_cmds.c
+
+override CFLAGS := $(CFLAGS:%-pg=% )
+
+# define architecture dependent kdb_cmds
+ifeq ($(CONFIG_IA64),y)
+ KDB_CMDS = ia64/kdb/kdb_cmds
+else
+ ifeq ($(CONFIG_X86_64),y)
+ KDB_CMDS = x86/kdb/kdb_cmds_64
+ else
+ ifeq ($(CONFIG_X86_32),y)
+ KDB_CMDS = x86/kdb/kdb_cmds_32
+ endif
+ endif
+endif
+
+quiet_cmd_gen-kdb = GENKDB $@
+ cmd_gen-kdb = $(AWK) 'BEGIN {print "\#include <linux/stddef.h>"; print "\#include <linux/init.h>"} \
+ /^\#/{next} \
+ /^[ \t]*$$/{next} \
+ {gsub(/"/, "\\\"", $$0); \
+ print "static __initdata char kdb_cmd" cmds++ "[] = \"" $$0 "\\n\";"} \
+ END {print "extern char *kdb_cmds[]; char __initdata *kdb_cmds[] = {"; for (i = 0; i < cmds; ++i) {print " kdb_cmd" i ","}; print(" NULL\n};");}' \
+ $(filter-out %/Makefile,$^) > $@
+
- $(obj)/gen-kdb_cmds.c: $(src)/kdb_cmds $(wildcard $(srctree)/arch/$(KDB_CMDS)) $(src)/Makefile
++$(obj)/gen-kdb_cmds.c: $(src)/kdb_cmds $(wildcard $(TOPDIR)/arch/$(KDB_CMDS)) $(src)/Makefile
+ $(call cmd,gen-kdb)
--- /dev/null
+# Initial commands for kdb, alter to suit your needs.
+# These commands are executed in kdb_init() context, no SMP, no
+# processes. Commands that require process data (including stack or
+# registers) are not reliable this early. set and bp commands should
+# be safe. Global breakpoint commands affect each cpu as it is booted.
+
+# Standard debugging information for first level support, just type archkdb
+# or archkdbcpu or archkdbshort at the kdb prompt.
+
+defcmd archkdb "" "First line arch debugging"
+ set BTSYMARG 1
+ set BTARGS 9
+ pid R
+ -archkdbcommon
++ r
+ -bta
+endefcmd
+
+defcmd archkdbcpu "" "archkdb with only tasks on cpus"
+ set BTSYMARG 1
+ set BTARGS 9
+ pid R
+ -archkdbcommon
+ -btc
+endefcmd
+
+defcmd archkdbshort "" "archkdb with less detailed backtrace"
+ set BTSYMARG 0
+ set BTARGS 0
+ pid R
+ -archkdbcommon
+ -bta
+endefcmd
--- /dev/null
+/*
+ * Kernel Debugger Architecture Independent Main Code
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
+ */
+
+/*
+ * Updated for Xscale (R) architecture support
+ * Eddie Dong <eddie.dong@intel.com> 8 Jan 03
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/smp.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nmi.h>
+#include <linux/ptrace.h>
+#include <linux/sysctl.h>
+#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
+#include <linux/dump.h>
+#endif
+#include <linux/cpu.h>
+#include <linux/kdebug.h>
+#ifdef CONFIG_KDB_KDUMP
+#include <linux/kexec.h>
+#endif
+
+#include <acpi/acpi_bus.h>
+
+#include <asm/system.h>
+#include <asm/kdebug.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+char kdb_debug_info_filename[256] = {""};
+EXPORT_SYMBOL(kdb_debug_info_filename);
+#define GREP_LEN 256
+char kdb_grep_string[GREP_LEN];
+int kdb_grepping_flag;
+EXPORT_SYMBOL(kdb_grepping_flag);
+int kdb_grep_leading;
+int kdb_grep_trailing;
+
+/*
+ * Kernel debugger state flags
+ */
+volatile int kdb_flags;
+atomic_t kdb_event;
+atomic_t kdb_8250;
+
+/*
+ * kdb_lock protects updates to kdb_initial_cpu. Used to
+ * single thread processors through the kernel debugger.
+ */
+static DEFINE_SPINLOCK(kdb_lock);
+volatile int kdb_initial_cpu = -1; /* cpu number that owns kdb */
+int kdb_seqno = 2; /* how many times kdb has been entered */
+
+volatile int kdb_nextline = 1;
+static volatile int kdb_new_cpu; /* Which cpu to switch to */
+
+volatile int kdb_state[NR_CPUS]; /* Per cpu state */
+
+const struct task_struct *kdb_current_task;
+EXPORT_SYMBOL(kdb_current_task);
+struct pt_regs *kdb_current_regs;
+
+#ifdef CONFIG_KDB_OFF
+int kdb_on = 0; /* Default is off */
+#else
+int kdb_on = 1; /* Default is on */
+#endif /* CONFIG_KDB_OFF */
+
+const char *kdb_diemsg;
+static int kdb_go_count;
+#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
+static unsigned int kdb_continue_catastrophic = CONFIG_KDB_CONTINUE_CATASTROPHIC;
+#else
+static unsigned int kdb_continue_catastrophic = 0;
+#endif
+
+#ifdef kdba_setjmp
+ /*
+ * Must have a setjmp buffer per CPU. Switching cpus will
+ * cause the jump buffer to be setup for the new cpu, and
+ * subsequent switches (and pager aborts) will use the
+ * appropriate per-processor values.
+ */
+kdb_jmp_buf *kdbjmpbuf;
+#endif /* kdba_setjmp */
+
+ /*
+ * kdb_commands describes the available commands.
+ */
+static kdbtab_t *kdb_commands;
+static int kdb_max_commands;
+
+typedef struct _kdbmsg {
+ int km_diag; /* kdb diagnostic */
+ char *km_msg; /* Corresponding message text */
+} kdbmsg_t;
+
+#define KDBMSG(msgnum, text) \
+ { KDB_##msgnum, text }
+
+static kdbmsg_t kdbmsgs[] = {
+ KDBMSG(NOTFOUND,"Command Not Found"),
+ KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
+ KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, 8 is only allowed on 64 bit systems"),
+ KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
+ KDBMSG(NOTENV, "Cannot find environment variable"),
+ KDBMSG(NOENVVALUE, "Environment variable should have value"),
+ KDBMSG(NOTIMP, "Command not implemented"),
+ KDBMSG(ENVFULL, "Environment full"),
+ KDBMSG(ENVBUFFULL, "Environment buffer full"),
+ KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
+#ifdef CONFIG_CPU_XSCALE
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
+#else
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
+#endif
+ KDBMSG(DUPBPT, "Duplicate breakpoint address"),
+ KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
+ KDBMSG(BADMODE, "Invalid IDMODE"),
+ KDBMSG(BADINT, "Illegal numeric value"),
+ KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
+ KDBMSG(BADREG, "Invalid register name"),
+ KDBMSG(BADCPUNUM, "Invalid cpu number"),
+ KDBMSG(BADLENGTH, "Invalid length field"),
+ KDBMSG(NOBP, "No Breakpoint exists"),
+ KDBMSG(BADADDR, "Invalid address"),
+};
+#undef KDBMSG
+
+static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
+
+
+/*
+ * Initial environment. This is all kept static and local to
+ * this file. We don't want to rely on the memory allocation
+ * mechanisms in the kernel, so we use a very limited allocate-only
+ * heap for new and altered environment variables. The entire
+ * environment is limited to a fixed number of entries (add more
+ * to __env[] if required) and a fixed amount of heap (add more to
+ * KDB_ENVBUFSIZE if required).
+ */
+
+static char *__env[] = {
+#if defined(CONFIG_SMP)
+ "PROMPT=[%d]kdb> ",
+ "MOREPROMPT=[%d]more> ",
+#else
+ "PROMPT=kdb> ",
+ "MOREPROMPT=more> ",
+#endif
+ "RADIX=16",
+ "LINES=24",
+ "COLUMNS=80",
+ "MDCOUNT=8", /* lines of md output */
+ "BTARGS=9", /* 9 possible args in bt */
+ KDB_PLATFORM_ENV,
+ "DTABCOUNT=30",
+ "NOSECT=1",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+};
+
+static const int __nenv = (sizeof(__env) / sizeof(char *));
+
+/* external commands: */
+int kdb_debuginfo_print(int argc, const char **argv);
+int kdb_pxhelp(int argc, const char **argv);
+int kdb_walkhelp(int argc, const char **argv);
+int kdb_walk(int argc, const char **argv);
+
+/*
+ * kdb_serial_str is the sequence that the user must enter on a serial
+ * console to invoke kdb. It can be a single character such as "\001"
+ * (control-A) or multiple characters such as "\eKDB". NOTE: All except the
+ * last character are passed through to the application reading from the serial
+ * console.
+ *
+ * I tried to make the sequence a CONFIG_ option but most of CML1 cannot cope
+ * with '\' in strings. CML2 would have been able to do it but we lost CML2.
+ * KAO.
+ */
+const char kdb_serial_str[] = "\eKDB";
+EXPORT_SYMBOL(kdb_serial_str);
+
+struct task_struct *
+kdb_curr_task(int cpu)
+{
+ struct task_struct *p = curr_task(cpu);
+#ifdef _TIF_MCA_INIT
+ struct kdb_running_process *krp = kdb_running_process + cpu;
+ if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && krp->p)
+ p = krp->p;
+#endif
+ return p;
+}
+
+/*
+ * kdbgetenv
+ *
+ * This function will return the character string value of
+ * an environment variable.
+ *
+ * Parameters:
+ * match A character string representing an environment variable.
+ * Outputs:
+ * None.
+ * Returns:
+ * NULL No environment variable matches 'match'
+ * char* Pointer to string value of environment variable.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+char *
+kdbgetenv(const char *match)
+{
+ char **ep = __env;
+ int matchlen = strlen(match);
+ int i;
+
+ for(i=0; i<__nenv; i++) {
+ char *e = *ep++;
+
+ if (!e) continue;
+
+ if ((strncmp(match, e, matchlen) == 0)
+ && ((e[matchlen] == '\0')
+ ||(e[matchlen] == '='))) {
+ char *cp = strchr(e, '=');
+ return (cp ? ++cp :"");
+ }
+ }
+ return NULL;
+}
+
+/*
+ * kdballocenv
+ *
+ * This function is used to allocate bytes for environment entries.
+ *
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required. Must be called with all
+ * processors halted.
+ * Remarks:
+ * We use a static environment buffer (envbuffer) to hold the values
+ * of dynamically generated environment variables (see kdb_set). Buffer
+ * space once allocated is never free'd, so over time, the amount of space
+ * (currently 512 bytes) will be exhausted if env variables are changed
+ * frequently.
+ */
+static char *
+kdballocenv(size_t bytes)
+{
+#define KDB_ENVBUFSIZE 512
+ static char envbuffer[KDB_ENVBUFSIZE];
+ static int envbufsize;
+ char *ep = NULL;
+
+ if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
+ ep = &envbuffer[envbufsize];
+ envbufsize += bytes;
+ }
+ return ep;
+}
+
+/*
+ * kdbgetulenv
+ *
+ * This function will return the value of an unsigned long-valued
+ * environment variable.
+ *
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+static int
+kdbgetulenv(const char *match, unsigned long *value)
+{
+ char *ep;
+
+ ep = kdbgetenv(match);
+ if (!ep) return KDB_NOTENV;
+ if (strlen(ep) == 0) return KDB_NOENVVALUE;
+
+ *value = simple_strtoul(ep, NULL, 0);
+
+ return 0;
+}
+
+/*
+ * kdbgetintenv
+ *
+ * This function will return the value of an integer-valued
+ * environment variable.
+ *
+ * Parameters:
+ * match A character string representing an integer-valued env variable
+ * Outputs:
+ * *value the integer representation of the environment variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+int
+kdbgetintenv(const char *match, int *value) {
+ unsigned long val;
+ int diag;
+
+ diag = kdbgetulenv(match, &val);
+ if (!diag) {
+ *value = (int) val;
+ }
+ return diag;
+}
+
+/*
+ * kdbgetularg
+ *
+ * This function will convert a numeric string
+ * into an unsigned long value.
+ *
+ * Parameters:
+ * arg A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of arg.
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+int
+kdbgetularg(const char *arg, unsigned long *value)
+{
+ char *endp;
+ unsigned long val;
+
+ val = simple_strtoul(arg, &endp, 0);
+
+ if (endp == arg) {
+ /*
+ * Try base 16, for us folks too lazy to type the
+ * leading 0x...
+ */
+ val = simple_strtoul(arg, &endp, 16);
+ if (endp == arg)
+ return KDB_BADINT;
+ }
+
+ *value = val;
+
+ return 0;
+}
+
+/*
+ * kdb_set
+ *
+ * This function implements the 'set' command. Alter an existing
+ * environment variable or create a new one.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_set(int argc, const char **argv)
+{
+ int i;
+ char *ep;
+ size_t varlen, vallen;
+
+ /*
+ * we can be invoked two ways:
+ * set var=value argv[1]="var", argv[2]="value"
+ * set var = value argv[1]="var", argv[2]="=", argv[3]="value"
+ * - if the latter, shift 'em down.
+ */
+ if (argc == 3) {
+ argv[2] = argv[3];
+ argc--;
+ }
+
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+
+ /*
+ * Check for internal variables
+ */
+ if (strcmp(argv[1], "KDBDEBUG") == 0) {
+ unsigned int debugflags;
+ char *cp;
+
+ debugflags = simple_strtoul(argv[2], &cp, 0);
+ if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
+ kdb_printf("kdb: illegal debug flags '%s'\n",
+ argv[2]);
+ return 0;
+ }
+ kdb_flags = (kdb_flags & ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
+ | (debugflags << KDB_DEBUG_FLAG_SHIFT);
+
+ return 0;
+ }
+
+ /*
+ * Tokenizer squashed the '=' sign. argv[1] is variable
+ * name, argv[2] = value.
+ */
+ varlen = strlen(argv[1]);
+ vallen = strlen(argv[2]);
+ ep = kdballocenv(varlen + vallen + 2);
+ if (ep == (char *)0)
+ return KDB_ENVBUFFULL;
+
+ sprintf(ep, "%s=%s", argv[1], argv[2]);
+
+ ep[varlen+vallen+1]='\0';
+
+ for(i=0; i<__nenv; i++) {
+ if (__env[i]
+ && ((strncmp(__env[i], argv[1], varlen)==0)
+ && ((__env[i][varlen] == '\0')
+ || (__env[i][varlen] == '=')))) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ /*
+ * Wasn't existing variable. Fit into slot.
+ */
+ for(i=0; i<__nenv-1; i++) {
+ if (__env[i] == (char *)0) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ return KDB_ENVFULL;
+}
+
+static int
+kdb_check_regs(void)
+{
+ if (!kdb_current_regs) {
+ kdb_printf("No current kdb registers."
+ " You may need to select another task\n");
+ return KDB_BADREG;
+ }
+ return 0;
+}
+
+/*
+ * kdbgetaddrarg
+ *
+ * This function is responsible for parsing an
+ * address-expression and returning the value of
+ * the expression, symbol name, and offset to the caller.
+ *
+ * The argument may consist of a numeric value (decimal or
+ * hexidecimal), a symbol name, a register name (preceeded
+ * by the percent sign), an environment variable with a numeric
+ * value (preceeded by a dollar sign) or a simple arithmetic
+ * expression consisting of a symbol name, +/-, and a numeric
+ * constant value (offset).
+ *
+ * Parameters:
+ * argc - count of arguments in argv
+ * argv - argument vector
+ * *nextarg - index to next unparsed argument in argv[]
+ * regs - Register state at time of KDB entry
+ * Outputs:
+ * *value - receives the value of the address-expression
+ * *offset - receives the offset specified, if any
+ * *name - receives the symbol name, if any
+ * *nextarg - index to next unparsed argument in argv[]
+ *
+ * Returns:
+ * zero is returned on success, a kdb diagnostic code is
+ * returned on error.
+ *
+ * Locking:
+ * No locking requirements.
+ *
+ * Remarks:
+ *
+ */
+
+int
+kdbgetaddrarg(int argc, const char **argv, int *nextarg,
+ kdb_machreg_t *value, long *offset,
+ char **name)
+{
+ kdb_machreg_t addr;
+ unsigned long off = 0;
+ int positive;
+ int diag;
+ int found = 0;
+ char *symname;
+ char symbol = '\0';
+ char *cp;
+ kdb_symtab_t symtab;
+
+ /*
+ * Process arguments which follow the following syntax:
+ *
+ * symbol | numeric-address [+/- numeric-offset]
+ * %register
+ * $environment-variable
+ */
+
+ if (*nextarg > argc) {
+ return KDB_ARGCOUNT;
+ }
+
+ symname = (char *)argv[*nextarg];
+
+ /*
+ * If there is no whitespace between the symbol
+ * or address and the '+' or '-' symbols, we
+ * remember the character and replace it with a
+ * null so the symbol/value can be properly parsed
+ */
+ if ((cp = strpbrk(symname, "+-")) != NULL) {
+ symbol = *cp;
+ *cp++ = '\0';
+ }
+
+ if (symname[0] == '$') {
+ diag = kdbgetulenv(&symname[1], &addr);
+ if (diag)
+ return diag;
+ } else if (symname[0] == '%') {
+ if ((diag = kdb_check_regs()))
+ return diag;
+ diag = kdba_getregcontents(&symname[1], kdb_current_regs, &addr);
+ if (diag)
+ return diag;
+ } else {
+ found = kdbgetsymval(symname, &symtab);
+ if (found) {
+ addr = symtab.sym_start;
+ } else {
+ diag = kdbgetularg(argv[*nextarg], &addr);
+ if (diag)
+ return diag;
+ }
+ }
+
+ if (!found)
+ found = kdbnearsym(addr, &symtab);
+
+ (*nextarg)++;
+
+ if (name)
+ *name = symname;
+ if (value)
+ *value = addr;
+ if (offset && name && *name)
+ *offset = addr - symtab.sym_start;
+
+ if ((*nextarg > argc)
+ && (symbol == '\0'))
+ return 0;
+
+ /*
+ * check for +/- and offset
+ */
+
+ if (symbol == '\0') {
+ if ((argv[*nextarg][0] != '+')
+ && (argv[*nextarg][0] != '-')) {
+ /*
+ * Not our argument. Return.
+ */
+ return 0;
+ } else {
+ positive = (argv[*nextarg][0] == '+');
+ (*nextarg)++;
+ }
+ } else
+ positive = (symbol == '+');
+
+ /*
+ * Now there must be an offset!
+ */
+ if ((*nextarg > argc)
+ && (symbol == '\0')) {
+ return KDB_INVADDRFMT;
+ }
+
+ if (!symbol) {
+ cp = (char *)argv[*nextarg];
+ (*nextarg)++;
+ }
+
+ diag = kdbgetularg(cp, &off);
+ if (diag)
+ return diag;
+
+ if (!positive)
+ off = -off;
+
+ if (offset)
+ *offset += off;
+
+ if (value)
+ *value += off;
+
+ return 0;
+}
+
+static void
+kdb_cmderror(int diag)
+{
+ int i;
+
+ if (diag >= 0) {
+ kdb_printf("no error detected (diagnostic is %d)\n", diag);
+ return;
+ }
+
+ for(i=0; i<__nkdb_err; i++) {
+ if (kdbmsgs[i].km_diag == diag) {
+ kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
+ return;
+ }
+ }
+
+ kdb_printf("Unknown diag %d\n", -diag);
+}
+
+/*
+ * kdb_defcmd, kdb_defcmd2
+ *
+ * This function implements the 'defcmd' command which defines one
+ * command as a set of other commands, terminated by endefcmd.
+ * kdb_defcmd processes the initial 'defcmd' command, kdb_defcmd2
+ * is invoked from kdb_parse for the following commands until
+ * 'endefcmd'.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+struct defcmd_set {
+ int count;
+ int usable;
+ char *name;
+ char *usage;
+ char *help;
+ char **command;
+};
+static struct defcmd_set *defcmd_set;
+static int defcmd_set_count;
+static int defcmd_in_progress;
+
+/* Forward references */
+static int kdb_exec_defcmd(int argc, const char **argv);
+
+static int
+kdb_defcmd2(const char *cmdstr, const char *argv0)
+{
+ struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
+ char **save_command = s->command;
+ if (strcmp(argv0, "endefcmd") == 0) {
+ defcmd_in_progress = 0;
+ if (!s->count)
+ s->usable = 0;
+ if (s->usable)
+ kdb_register(s->name, kdb_exec_defcmd, s->usage, s->help, 0);
+ return 0;
+ }
+ if (!s->usable)
+ return KDB_NOTIMP;
+ s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+ if (!s->command) {
+ kdb_printf("Could not allocate new kdb_defcmd table for %s\n", cmdstr);
+ s->usable = 0;
+ return KDB_NOTIMP;
+ }
+ memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
+ s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
+ kfree(save_command);
+ return 0;
+}
+
+static int
+kdb_defcmd(int argc, const char **argv)
+{
+ struct defcmd_set *save_defcmd_set = defcmd_set, *s;
+ if (defcmd_in_progress) {
+ kdb_printf("kdb: nested defcmd detected, assuming missing endefcmd\n");
+ kdb_defcmd2("endefcmd", "endefcmd");
+ }
+ if (argc == 0) {
+ int i;
+ for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
+ kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name, s->usage, s->help);
+ for (i = 0; i < s->count; ++i)
+ kdb_printf("%s", s->command[i]);
+ kdb_printf("endefcmd\n");
+ }
+ return 0;
+ }
+ if (argc != 3)
+ return KDB_ARGCOUNT;
+ defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set), GFP_KDB);
+ if (!defcmd_set) {
+ kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
+ defcmd_set = save_defcmd_set;
+ return KDB_NOTIMP;
+ }
+ memcpy(defcmd_set, save_defcmd_set, defcmd_set_count * sizeof(*defcmd_set));
+ kfree(save_defcmd_set);
+ s = defcmd_set + defcmd_set_count;
+ memset(s, 0, sizeof(*s));
+ s->usable = 1;
+ s->name = kdb_strdup(argv[1], GFP_KDB);
+ s->usage = kdb_strdup(argv[2], GFP_KDB);
+ s->help = kdb_strdup(argv[3], GFP_KDB);
+ if (s->usage[0] == '"') {
+ strcpy(s->usage, s->usage+1);
+ s->usage[strlen(s->usage)-1] = '\0';
+ }
+ if (s->help[0] == '"') {
+ strcpy(s->help, s->help+1);
+ s->help[strlen(s->help)-1] = '\0';
+ }
+ ++defcmd_set_count;
+ defcmd_in_progress = 1;
+ return 0;
+}
+
+/*
+ * kdb_exec_defcmd
+ *
+ * Execute the set of commands associated with this defcmd name.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_exec_defcmd(int argc, const char **argv)
+{
+ int i, ret;
+ struct defcmd_set *s;
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+ for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
+ if (strcmp(s->name, argv[0]) == 0)
+ break;
+ }
+ if (i == defcmd_set_count) {
+ kdb_printf("kdb_exec_defcmd: could not find commands for %s\n", argv[0]);
+ return KDB_NOTIMP;
+ }
+ for (i = 0; i < s->count; ++i) {
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+ kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
+ if ((ret = kdb_parse(s->command[i])))
+ return ret;
+ }
+ return 0;
+}
+
+/* Command history */
+#define KDB_CMD_HISTORY_COUNT 32
+#define CMD_BUFLEN 200 /* kdb_printf: max printline size == 256 */
+static unsigned int cmd_head=0, cmd_tail=0;
+static unsigned int cmdptr;
+static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
+static char cmd_cur[CMD_BUFLEN];
+
+/*
+ * The "str" argument may point to something like | grep xyz
+ *
+ */
+static void
+parse_grep(const char *str)
+{
+ int len;
+ char *cp = (char *)str, *cp2;
+
+ /* sanity check: we should have been called with the \ first */
+ if (*cp != '|')
+ return;
+ cp++;
+ while (isspace(*cp)) cp++;
+ if (strncmp(cp,"grep ",5)) {
+ kdb_printf ("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ cp += 5;
+ while (isspace(*cp)) cp++;
+ cp2 = strchr(cp, '\n');
+ if (cp2)
+ *cp2 = '\0'; /* remove the trailing newline */
+ len = strlen(cp);
+ if (len == 0) {
+ kdb_printf ("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ /* now cp points to a nonzero length search string */
+ if (*cp == '"') {
+ /* allow it be "x y z" by removing the "'s - there must
+ be two of them */
+ cp++;
+ cp2 = strchr(cp, '"');
+ if (!cp2) {
+ kdb_printf ("invalid quoted string, see grephelp\n");
+ return;
+ }
+ *cp2 = '\0'; /* end the string where the 2nd " was */
+ }
+ kdb_grep_leading = 0;
+ if (*cp == '^') {
+ kdb_grep_leading = 1;
+ cp++;
+ }
+ len = strlen(cp);
+ kdb_grep_trailing = 0;
+ if (*(cp+len-1) == '$') {
+ kdb_grep_trailing = 1;
+ *(cp+len-1) = '\0';
+ }
+ len = strlen(cp);
+ if (!len) return;
+ if (len >= GREP_LEN) {
+ kdb_printf ("search string too long\n");
+ return;
+ }
+ strcpy(kdb_grep_string, cp);
+ kdb_grepping_flag++;
+ return;
+}
+
+/*
+ * kdb_parse
+ *
+ * Parse the command line, search the command table for a
+ * matching command and invoke the command function.
+ * This function may be called recursively, if it is, the second call
+ * will overwrite argv and cbuf. It is the caller's responsibility to
+ * save their argv if they recursively call kdb_parse().
+ *
+ * Parameters:
+ * cmdstr The input command line to be parsed.
+ * regs The registers at the time kdb was entered.
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero for success, a kdb diagnostic if failure.
+ * Locking:
+ * None.
+ * Remarks:
+ * Limited to 20 tokens.
+ *
+ * Real rudimentary tokenization. Basically only whitespace
+ * is considered a token delimeter (but special consideration
+ * is taken of the '=' sign as used by the 'set' command).
+ *
+ * The algorithm used to tokenize the input string relies on
+ * there being at least one whitespace (or otherwise useless)
+ * character between tokens as the character immediately following
+ * the token is altered in-place to a null-byte to terminate the
+ * token string.
+ */
+
+#define MAXARGC 20
+
+int
+kdb_parse(const char *cmdstr)
+{
+ static char *argv[MAXARGC];
+ static int argc = 0;
+ static char cbuf[CMD_BUFLEN+2];
+ char *cp;
+ char *cpp, quoted;
+ kdbtab_t *tp;
+ int i, escaped, ignore_errors = 0, check_grep;
+
+ /*
+ * First tokenize the command string.
+ */
+ cp = (char *)cmdstr;
+ kdb_grepping_flag = check_grep = 0;
+
+ if (KDB_FLAG(CMD_INTERRUPT)) {
+ /* Previous command was interrupted, newline must not repeat the command */
+ KDB_FLAG_CLEAR(CMD_INTERRUPT);
+ argc = 0; /* no repeat */
+ }
+
+ if (*cp != '\n' && *cp != '\0') {
+ argc = 0;
+ cpp = cbuf;
+ while (*cp) {
+ /* skip whitespace */
+ while (isspace(*cp)) cp++;
+ if ((*cp == '\0') || (*cp == '\n') || (*cp == '#' && !defcmd_in_progress))
+ break;
+ /* special case: check for | grep pattern */
+ if (*cp == '|') {
+ check_grep++;
+ break;
+ }
+ if (cpp >= cbuf + CMD_BUFLEN) {
+ kdb_printf("kdb_parse: command buffer overflow, command ignored\n%s\n", cmdstr);
+ return KDB_NOTFOUND;
+ }
+ if (argc >= MAXARGC - 1) {
+ kdb_printf("kdb_parse: too many arguments, command ignored\n%s\n", cmdstr);
+ return KDB_NOTFOUND;
+ }
+ argv[argc++] = cpp;
+ escaped = 0;
+ quoted = '\0';
+ /* Copy to next unquoted and unescaped whitespace or '=' */
+ while (*cp && *cp != '\n' && (escaped || quoted || !isspace(*cp))) {
+ if (cpp >= cbuf + CMD_BUFLEN)
+ break;
+ if (escaped) {
+ escaped = 0;
+ *cpp++ = *cp++;
+ continue;
+ }
+ if (*cp == '\\') {
+ escaped = 1;
+ ++cp;
+ continue;
+ }
+ if (*cp == quoted) {
+ quoted = '\0';
+ } else if (*cp == '\'' || *cp == '"') {
+ quoted = *cp;
+ }
+ if ((*cpp = *cp++) == '=' && !quoted)
+ break;
+ ++cpp;
+ }
+ *cpp++ = '\0'; /* Squash a ws or '=' character */
+ }
+ }
+ if (!argc)
+ return 0;
+ if (check_grep)
+ parse_grep(cp);
+ if (defcmd_in_progress) {
+ int result = kdb_defcmd2(cmdstr, argv[0]);
+ if (!defcmd_in_progress) {
+ argc = 0; /* avoid repeat on endefcmd */
+ *(argv[0]) = '\0';
+ }
+ return result;
+ }
+ if (argv[0][0] == '-' && argv[0][1] && (argv[0][1] < '0' || argv[0][1] > '9')) {
+ ignore_errors = 1;
+ ++argv[0];
+ }
+
+ for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
+ if (tp->cmd_name) {
+ /*
+ * If this command is allowed to be abbreviated,
+ * check to see if this is it.
+ */
+
+ if (tp->cmd_minlen
+ && (strlen(argv[0]) <= tp->cmd_minlen)) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ tp->cmd_minlen) == 0) {
+ break;
+ }
+ }
+
+ if (strcmp(argv[0], tp->cmd_name)==0) {
+ break;
+ }
+ }
+ }
+
+ /*
+ * If we don't find a command by this name, see if the first
+ * few characters of this match any of the known commands.
+ * e.g., md1c20 should match md.
+ */
+ if (i == kdb_max_commands) {
+ for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
+ if (tp->cmd_name) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ strlen(tp->cmd_name))==0) {
+ break;
+ }
+ }
+ }
+ }
+
+ if (i < kdb_max_commands) {
+ int result;
+ KDB_STATE_SET(CMD);
+ result = (*tp->cmd_func)(argc-1,
+ (const char**)argv);
+ if (result && ignore_errors && result > KDB_CMD_GO)
+ result = 0;
+ KDB_STATE_CLEAR(CMD);
+ switch (tp->cmd_repeat) {
+ case KDB_REPEAT_NONE:
+ argc = 0;
+ if (argv[0])
+ *(argv[0]) = '\0';
+ break;
+ case KDB_REPEAT_NO_ARGS:
+ argc = 1;
+ if (argv[1])
+ *(argv[1]) = '\0';
+ break;
+ case KDB_REPEAT_WITH_ARGS:
+ break;
+ }
+ return result;
+ }
+
+ /*
+ * If the input with which we were presented does not
+ * map to an existing command, attempt to parse it as an
+ * address argument and display the result. Useful for
+ * obtaining the address of a variable, or the nearest symbol
+ * to an address contained in a register.
+ */
+ {
+ kdb_machreg_t value;
+ char *name = NULL;
+ long offset;
+ int nextarg = 0;
+
+ if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
+ &value, &offset, &name)) {
+ return KDB_NOTFOUND;
+ }
+
+ kdb_printf("%s = ", argv[0]);
+ kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
+ kdb_printf("\n");
+ return 0;
+ }
+}
+
+
+static int
+handle_ctrl_cmd(char *cmd)
+{
+#define CTRL_P 16
+#define CTRL_N 14
+
+ /* initial situation */
+ if (cmd_head == cmd_tail) return 0;
+
+ switch(*cmd) {
+ case CTRL_P:
+ if (cmdptr != cmd_tail)
+ cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ case CTRL_N:
+ if (cmdptr != cmd_head)
+ cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * kdb_do_dump
+ *
+ * Call the dump() function if the kernel is configured for LKCD.
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None. dump() may or may not return.
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static void
+kdb_do_dump(void)
+{
+#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
+ kdb_printf("Forcing dump (if configured)\n");
+ console_loglevel = 8; /* to see the dump messages */
+ dump("kdb_do_dump");
+#endif
+}
+
+/*
+ * kdb_reboot
+ *
+ * This function implements the 'reboot' command. Reboot the system
+ * immediately.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Shouldn't return from this function.
+ */
+
+static int
+kdb_reboot(int argc, const char **argv)
+{
+ emergency_restart();
+ kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
+ while (1) {};
+ /* NOTREACHED */
+ return 0;
+}
+
+#ifdef CONFIG_KDB_KDUMP
+
+int kdb_kdump_state = KDB_KDUMP_RESET; /* KDB kdump state */
+
+static int kdb_cpu(int argc, const char **argv);
+
+/*
+ * kdb_kdump_check
+ *
+ * This is where the kdump on monarch cpu is handled.
+ *
+ */
+void kdb_kdump_check(struct pt_regs *regs)
+{
+ if (kdb_kdump_state != KDB_KDUMP_RESET) {
+ crash_kexec(regs);
+
+ /* If the call above returned then something
+ didn't work */
+ kdb_printf("kdb_kdump_check: crash_kexec failed!\n");
+ kdb_printf(" Please check if the kdump kernel has been properly loaded\n");
+ kdb_kdump_state = KDB_KDUMP_RESET;
+ }
+}
+
+
+/*
+ * kdb_kdump
+ *
+ * This function implements the 'kdump' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * envp environment vector
+ * regs registers at time kdb was entered.
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Shouldn't return from this function.
+ */
+
+static int
+kdb_kdump(int argc, const char **argv)
+{
+ char cpu_id[6]; /* up to 99,999 cpus */
+ const char *cpu_argv[] = {NULL, cpu_id, NULL};
+ int ret;
+
+ kdb_kdump_state = KDB_KDUMP_KDUMP;
+ /* Switch back to the initial cpu before process kdump command */
+ if (smp_processor_id() != kdb_initial_cpu) {
+ sprintf(cpu_id, "%d", kdb_initial_cpu);
+ ret = kdb_cpu(1, cpu_argv);
+ if (ret != KDB_CMD_CPU) {
+ kdb_printf("kdump: Failed to switch to initial cpu %d;"
+ " aborted\n", kdb_initial_cpu);
+ kdb_kdump_state = KDB_KDUMP_RESET;
+ }
+ } else
+ ret = KDB_CMD_CPU;
+
+ return ret;
+}
+
+#endif /* CONFIG_KDB_KDUMP */
+
+static int
+kdb_quiet(int reason)
+{
+ return (reason == KDB_REASON_CPU_UP || reason == KDB_REASON_SILENT);
+}
+
+/*
+ * kdb_local
+ *
+ * The main code for kdb. This routine is invoked on a specific
+ * processor, it is not global. The main kdb() routine ensures
+ * that only one processor at a time is in this routine. This
+ * code is called with the real reason code on the first entry
+ * to a kdb session, thereafter it is called with reason SWITCH,
+ * even if the user goes back to the original cpu.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint. NULL
+ * for reason SILENT or CPU_UP, otherwise valid.
+ * db_result Result code from the break or debug point.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * KDB_CMD_GO User typed 'go'.
+ * KDB_CMD_CPU User switched to another cpu.
+ * KDB_CMD_SS Single step.
+ * KDB_CMD_SSB Single step until branch.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+static int
+kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_dbtrap_t db_result)
+{
+ char *cmdbuf;
+ int diag;
+ struct task_struct *kdb_current = kdb_curr_task(smp_processor_id());
+
+#ifdef CONFIG_KDB_KDUMP
+ kdb_kdump_check(regs);
+#endif
+
+ /* If kdb has been entered for an event which has been/will be
+ * recovered then silently return. We have to get this far into kdb in
+ * order to synchronize all the cpus, typically only one cpu (monarch)
+ * knows that the event is recoverable but the other cpus (slaves) may
+ * also be driven into kdb before that decision is made by the monarch.
+ *
+ * To pause in kdb even for recoverable events, 'set RECOVERY_PAUSE 1'
+ */
+ KDB_DEBUG_STATE("kdb_local 1", reason);
+ if (reason == KDB_REASON_ENTER
+ && KDB_FLAG(RECOVERY)
+ && !KDB_FLAG(CATASTROPHIC)) {
+ int recovery_pause = 0;
+ kdbgetintenv("RECOVERY_PAUSE", &recovery_pause);
+ if (recovery_pause == 0)
+ reason = KDB_REASON_SILENT;
+ else
+ kdb_printf("%s: Recoverable error detected but"
+ " RECOVERY_PAUSE is set, staying in KDB\n",
+ __FUNCTION__);
+ }
+
+ KDB_DEBUG_STATE("kdb_local 2", reason);
+ kdb_go_count = 0;
+ if (kdb_quiet(reason)) {
+ /* no message */
+ } else if (reason == KDB_REASON_DEBUG) {
+ /* special case below */
+ } else {
+ kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", smp_processor_id());
+#endif
+ }
+
+ switch (reason) {
+ case KDB_REASON_DEBUG:
+ {
+ /*
+ * If re-entering kdb after a single step
+ * command, don't print the message.
+ */
+ switch(db_result) {
+ case KDB_DB_BPT:
+ kdb_printf("\nEntering kdb (0x%p, pid %d) ", kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", smp_processor_id());
+#endif
+ kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ break;
+ case KDB_DB_SSB:
+ /*
+ * In the midst of ssb command. Just return.
+ */
+ KDB_DEBUG_STATE("kdb_local 3", reason);
+ return KDB_CMD_SSB; /* Continue with SSB command */
+
+ break;
+ case KDB_DB_SS:
+ break;
+ case KDB_DB_SSBPT:
+ KDB_DEBUG_STATE("kdb_local 4", reason);
+ return 1; /* kdba_db_trap did the work */
+ default:
+ kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
+ db_result);
+ break;
+ }
+
+ }
+ break;
+ case KDB_REASON_ENTER:
+ if (KDB_STATE(KEYBOARD))
+ kdb_printf("due to Keyboard Entry\n");
+ else {
+ kdb_printf("due to KDB_ENTER()\n");
+ }
+ break;
+ case KDB_REASON_KEYBOARD:
+ KDB_STATE_SET(KEYBOARD);
+ kdb_printf("due to Keyboard Entry\n");
+ break;
+ case KDB_REASON_ENTER_SLAVE: /* drop through, slaves only get released via cpu switch */
+ case KDB_REASON_SWITCH:
+ kdb_printf("due to cpu switch\n");
+ if (KDB_STATE(GO_SWITCH)) {
+ KDB_STATE_CLEAR(GO_SWITCH);
+ KDB_DEBUG_STATE("kdb_local 5", reason);
+ return KDB_CMD_GO;
+ }
+ break;
+ case KDB_REASON_OOPS:
+ kdb_printf("Oops: %s\n", kdb_diemsg);
+ kdb_printf("due to oops @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ kdba_dumpregs(regs, NULL, NULL);
+ break;
+ case KDB_REASON_NMI:
+ kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n",
+ kdba_getpc(regs));
+ kdba_dumpregs(regs, NULL, NULL);
+ break;
+ case KDB_REASON_BREAK:
+ kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ /*
+ * Determine if this breakpoint is one that we
+ * are interested in.
+ */
+ if (db_result != KDB_DB_BPT) {
+ kdb_printf("kdb: error return from kdba_bp_trap: %d\n", db_result);
+ KDB_DEBUG_STATE("kdb_local 6", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+ break;
+ case KDB_REASON_RECURSE:
+ kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ break;
+ case KDB_REASON_CPU_UP:
+ case KDB_REASON_SILENT:
+ KDB_DEBUG_STATE("kdb_local 7", reason);
+ if (reason == KDB_REASON_CPU_UP)
+ kdba_cpu_up();
+ return KDB_CMD_GO; /* Silent entry, silent exit */
+ break;
+ default:
+ kdb_printf("kdb: unexpected reason code: %d\n", reason);
+ KDB_DEBUG_STATE("kdb_local 8", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+
+ kdba_local_arch_setup();
+
+ kdba_set_current_task(kdb_current);
+
+ while (1) {
+ /*
+ * Initialize pager context.
+ */
+ kdb_nextline = 1;
+ KDB_STATE_CLEAR(SUPPRESS);
+#ifdef kdba_setjmp
+ /*
+ * Use kdba_setjmp/kdba_longjmp to break out of
+ * the pager early and to attempt to recover from kdb errors.
+ */
+ KDB_STATE_CLEAR(LONGJMP);
+ if (kdbjmpbuf) {
+ if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) {
+ /* Command aborted (usually in pager) */
+ continue;
+ }
+ else
+ KDB_STATE_SET(LONGJMP);
+ }
+#endif /* kdba_setjmp */
+
+ cmdbuf = cmd_cur;
+ *cmdbuf = '\0';
+ *(cmd_hist[cmd_head])='\0';
+
+ if (KDB_FLAG(ONLY_DO_DUMP)) {
+ /* kdb is off but a catastrophic error requires a dump.
+ * Take the dump and reboot.
+ * Turn on logging so the kdb output appears in the log
+ * buffer in the dump.
+ */
+ const char *setargs[] = { "set", "LOGGING", "1" };
+ kdb_set(2, setargs);
+ kdb_do_dump();
+ kdb_reboot(0, NULL);
+ /*NOTREACHED*/
+ }
+
+do_full_getstr:
+#if defined(CONFIG_SMP)
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), smp_processor_id());
+#else
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
+#endif
+ if (defcmd_in_progress)
+ strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
+
+ /*
+ * Fetch command from keyboard
+ */
+ cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
+ if (*cmdbuf != '\n') {
+ if (*cmdbuf < 32) {
+ if(cmdptr == cmd_head) {
+ strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
+ *(cmd_hist[cmd_head]+strlen(cmd_hist[cmd_head])-1) = '\0';
+ }
+ if(!handle_ctrl_cmd(cmdbuf))
+ *(cmd_cur+strlen(cmd_cur)-1) = '\0';
+ cmdbuf = cmd_cur;
+ goto do_full_getstr;
+ }
+ else
+ strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
+
+ cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
+ if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
+
+ }
+
+ cmdptr = cmd_head;
+ diag = kdb_parse(cmdbuf);
+ if (diag == KDB_NOTFOUND) {
+ kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
+ diag = 0;
+ }
+ if (diag == KDB_CMD_GO
+ || diag == KDB_CMD_CPU
+ || diag == KDB_CMD_SS
+ || diag == KDB_CMD_SSB)
+ break;
+
+ if (diag)
+ kdb_cmderror(diag);
+ }
+
+ kdba_local_arch_cleanup();
+
+ KDB_DEBUG_STATE("kdb_local 9", diag);
+ return diag;
+}
+
+
+/*
+ * kdb_print_state
+ *
+ * Print the state data for the current processor for debugging.
+ *
+ * Inputs:
+ * text Identifies the debug point
+ * value Any integer value to be printed, e.g. reason code.
+ * Returns:
+ * None.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+void kdb_print_state(const char *text, int value)
+{
+ kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
+ text, smp_processor_id(), value, kdb_initial_cpu, kdb_state[smp_processor_id()]);
+}
+
+/*
+ * kdb_previous_event
+ *
+ * Return a count of cpus that are leaving kdb, i.e. the number
+ * of processors that are still handling the previous kdb event.
+ *
+ * Inputs:
+ * None.
+ * Returns:
+ * Count of cpus in previous event.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+static int
+kdb_previous_event(void)
+{
+ int i, leaving = 0;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (KDB_STATE_CPU(LEAVING, i))
+ ++leaving;
+ }
+ return leaving;
+}
+
+/*
+ * kdb_wait_for_cpus
+ *
+ * Invoked once at the start of a kdb event, from the controlling cpu. Wait a
+ * short period for the other cpus to enter kdb state.
+ *
+ * Inputs:
+ * none
+ * Returns:
+ * none
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+int kdb_wait_for_cpus_secs;
+
+static void
+kdb_wait_for_cpus(void)
+{
+#ifdef CONFIG_SMP
+ int online = 0, kdb_data = 0, prev_kdb_data = 0, c, time;
+ mdelay(100);
+ for (time = 0; time < kdb_wait_for_cpus_secs; ++time) {
+ online = 0;
+ kdb_data = 0;
+ for_each_online_cpu(c) {
+ ++online;
+ if (kdb_running_process[c].seqno >= kdb_seqno - 1)
+ ++kdb_data;
+ }
+ if (online == kdb_data)
+ break;
+ if (prev_kdb_data != kdb_data) {
+ kdb_nextline = 0; /* no prompt yet */
+ kdb_printf(" %d out of %d cpus in kdb, waiting for the rest, timeout in %d second(s)\n",
+ kdb_data, online, kdb_wait_for_cpus_secs - time);
+ prev_kdb_data = kdb_data;
+ }
+ touch_nmi_watchdog();
+ mdelay(1000);
+ /* Architectures may want to send a more forceful interrupt */
+ if (time == min(kdb_wait_for_cpus_secs / 2, 5))
+ kdba_wait_for_cpus();
+ if (time % 4 == 0)
+ kdb_printf(".");
+ }
+ if (time) {
+ int wait = online - kdb_data;
+ if (wait == 0)
+ kdb_printf("All cpus are now in kdb\n");
+ else
+ kdb_printf("%d cpu%s not in kdb, %s state is unknown\n",
+ wait,
+ wait == 1 ? " is" : "s are",
+ wait == 1 ? "its" : "their");
+ }
+ /* give back the vector we took over in smp_kdb_stop */
+ kdba_giveback_vector(KDB_VECTOR);
+#endif /* CONFIG_SMP */
+}
+
+/*
+ * kdb_main_loop
+ *
+ * The main kdb loop. After initial setup and assignment of the controlling
+ * cpu, all cpus are in this loop. One cpu is in control and will issue the kdb
+ * prompt, the others will spin until 'go' or cpu switch.
+ *
+ * To get a consistent view of the kernel stacks for all processes, this routine
+ * is invoked from the main kdb code via an architecture specific routine.
+ * kdba_main_loop is responsible for making the kernel stacks consistent for all
+ * processes, there should be no difference between a blocked process and a
+ * running process as far as kdb is concerned.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * reason2 kdb's current reason code. Initially error but can change
+ * acording to kdb state.
+ * db_result Result code from break or debug point.
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it
+ * should always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+int
+kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ kdb_dbtrap_t db_result, struct pt_regs *regs)
+{
+ int result = 1;
+ /* Stay in kdb() until 'go', 'ss[b]' or an error */
+ while (1) {
+ /*
+ * All processors except the one that is in control
+ * will spin here.
+ */
+ KDB_DEBUG_STATE("kdb_main_loop 1", reason);
+ while (KDB_STATE(HOLD_CPU)) {
+ /* state KDB is turned off by kdb_cpu to see if the
+ * other cpus are still live, each cpu in this loop
+ * turns it back on.
+ */
+ if (!KDB_STATE(KDB)) {
+ KDB_STATE_SET(KDB);
+ }
+
+#if defined(CONFIG_KDB_KDUMP)
+ if (KDB_STATE(KEXEC)) {
+ struct pt_regs r;
+ if (regs == NULL)
+ regs = &r;
+
+ kdba_kdump_shutdown_slave(regs);
+ return 0;
+ }
+#endif
+ }
+
+ KDB_STATE_CLEAR(SUPPRESS);
+ KDB_DEBUG_STATE("kdb_main_loop 2", reason);
+ if (KDB_STATE(LEAVING))
+ break; /* Another cpu said 'go' */
+
+ if (!kdb_quiet(reason))
+ kdb_wait_for_cpus();
+ /* Still using kdb, this processor is in control */
+ result = kdb_local(reason2, error, regs, db_result);
+ KDB_DEBUG_STATE("kdb_main_loop 3", result);
+
+ if (result == KDB_CMD_CPU) {
+ /* Cpu switch, hold the current cpu, release the target one. */
+ reason2 = KDB_REASON_SWITCH;
+ KDB_STATE_SET(HOLD_CPU);
+ KDB_STATE_CLEAR_CPU(HOLD_CPU, kdb_new_cpu);
+ continue;
+ }
+
+ if (result == KDB_CMD_SS) {
+ KDB_STATE_SET(DOING_SS);
+ break;
+ }
+
+ if (result == KDB_CMD_SSB) {
+ KDB_STATE_SET(DOING_SS);
+ KDB_STATE_SET(DOING_SSB);
+ break;
+ }
+
+ if (result && result != 1 && result != KDB_CMD_GO)
+ kdb_printf("\nUnexpected kdb_local return code %d\n", result);
+
+ KDB_DEBUG_STATE("kdb_main_loop 4", reason);
+ break;
+ }
+ if (KDB_STATE(DOING_SS))
+ KDB_STATE_CLEAR(SSBPT);
+ return result;
+}
+
+/* iapc_boot_arch was defined in ACPI 2.0, FADT revision 3 onwards. For any
+ * FADT prior to revision 3, we have to assume that we have an i8042 I/O
+ * device. ACPI initialises after KDB initialises but before using KDB, so
+ * check iapc_boot_arch on each entry to KDB.
+ */
+static void
+kdb_check_i8042(void)
+{
+ KDB_FLAG_CLEAR(NO_I8042);
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.header.revision >= 3 &&
+ (acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) == 0)
+ KDB_FLAG_SET(NO_I8042);
+#endif /* CONFIG_ACPI */
+}
+
+/*
+ * kdb
+ *
+ * This function is the entry point for the kernel debugger. It
+ * provides a command parser and associated support functions to
+ * allow examination and control of an active kernel.
+ *
+ * The breakpoint trap code should invoke this function with
+ * one of KDB_REASON_BREAK (int 03) or KDB_REASON_DEBUG (debug register)
+ *
+ * the die_if_kernel function should invoke this function with
+ * KDB_REASON_OOPS.
+ *
+ * In single step mode, one cpu is released to run without
+ * breakpoints. Interrupts and NMI are reset to their original values,
+ * the cpu is allowed to do one instruction which causes a trap
+ * into kdb with KDB_REASON_DEBUG.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it
+ * should always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * Locking:
+ * none
+ * Remarks:
+ * No assumptions of system state. This function may be invoked
+ * with arbitrary locks held. It will stop all other processors
+ * in an SMP environment, disable all interrupts and does not use
+ * the operating systems keyboard driver.
+ *
+ * This code is reentrant but only for cpu switch. Any other
+ * reentrancy is an error, although kdb will attempt to recover.
+ *
+ * At the start of a kdb session the initial processor is running
+ * kdb() and the other processors can be doing anything. When the
+ * initial processor calls smp_kdb_stop() the other processors are
+ * driven through kdb_ipi which calls kdb() with reason SWITCH.
+ * That brings all processors into this routine, one with a "real"
+ * reason code, the other with SWITCH.
+ *
+ * Because the other processors are driven via smp_kdb_stop(),
+ * they enter here from the NMI handler. Until the other
+ * processors exit from here and exit from kdb_ipi, they will not
+ * take any more NMI requests. The initial cpu will still take NMI.
+ *
+ * Multiple race and reentrancy conditions, each with different
+ * advoidance mechanisms.
+ *
+ * Two cpus hit debug points at the same time.
+ *
+ * kdb_lock and kdb_initial_cpu ensure that only one cpu gets
+ * control of kdb. The others spin on kdb_initial_cpu until
+ * they are driven through NMI into kdb_ipi. When the initial
+ * cpu releases the others from NMI, they resume trying to get
+ * kdb_initial_cpu to start a new event.
+ *
+ * A cpu is released from kdb and starts a new event before the
+ * original event has completely ended.
+ *
+ * kdb_previous_event() prevents any cpu from entering
+ * kdb_initial_cpu state until the previous event has completely
+ * ended on all cpus.
+ *
+ * An exception occurs inside kdb.
+ *
+ * kdb_initial_cpu detects recursive entry to kdb and attempts
+ * to recover. The recovery uses longjmp() which means that
+ * recursive calls to kdb never return. Beware of assumptions
+ * like
+ *
+ * ++depth;
+ * kdb();
+ * --depth;
+ *
+ * If the kdb call is recursive then longjmp takes over and
+ * --depth is never executed.
+ *
+ * NMI handling.
+ *
+ * NMI handling is tricky. The initial cpu is invoked by some kdb event,
+ * this event could be NMI driven but usually is not. The other cpus are
+ * driven into kdb() via kdb_ipi which uses NMI so at the start the other
+ * cpus will not accept NMI. Some operations such as SS release one cpu
+ * but hold all the others. Releasing a cpu means it drops back to
+ * whatever it was doing before the kdb event, this means it drops out of
+ * kdb_ipi and hence out of NMI status. But the software watchdog uses
+ * NMI and we do not want spurious watchdog calls into kdb. kdba_read()
+ * resets the watchdog counters in its input polling loop, when a kdb
+ * command is running it is subject to NMI watchdog events.
+ *
+ * Another problem with NMI handling is the NMI used to drive the other
+ * cpus into kdb cannot be distinguished from the watchdog NMI. State
+ * flag WAIT_IPI indicates that a cpu is waiting for NMI via kdb_ipi,
+ * if not set then software NMI is ignored by kdb_ipi.
+ *
+ * Cpu switching.
+ *
+ * All cpus are in kdb (or they should be), all but one are
+ * spinning on KDB_STATE(HOLD_CPU). Only one cpu is not in
+ * HOLD_CPU state, only that cpu can handle commands.
+ *
+ * Go command entered.
+ *
+ * If necessary, go will switch to the initial cpu first. If the event
+ * was caused by a software breakpoint (assumed to be global) that
+ * requires single-step to get over the breakpoint then only release the
+ * initial cpu, after the initial cpu has single-stepped the breakpoint
+ * then release the rest of the cpus. If SSBPT is not required then
+ * release all the cpus at once.
+ */
+
+int
+kdb(kdb_reason_t reason, int error, struct pt_regs *regs)
+{
+ kdb_intstate_t int_state; /* Interrupt state */
+ kdb_reason_t reason2 = reason;
+ int result = 0; /* Default is kdb did not handle it */
+ int ss_event, old_regs_saved = 0;
+ struct pt_regs *old_regs = NULL;
+ kdb_dbtrap_t db_result=KDB_DB_NOBPT;
+ preempt_disable();
+ atomic_inc(&kdb_event);
+
+ switch(reason) {
+ case KDB_REASON_OOPS:
+ case KDB_REASON_NMI:
+ KDB_FLAG_SET(CATASTROPHIC); /* kernel state is dubious now */
+ break;
+ default:
+ break;
+ }
+ switch(reason) {
+ case KDB_REASON_ENTER:
+ case KDB_REASON_ENTER_SLAVE:
+ case KDB_REASON_BREAK:
+ case KDB_REASON_DEBUG:
+ case KDB_REASON_OOPS:
+ case KDB_REASON_SWITCH:
+ case KDB_REASON_KEYBOARD:
+ case KDB_REASON_NMI:
+ if (regs && regs != get_irq_regs()) {
+ old_regs = set_irq_regs(regs);
+ old_regs_saved = 1;
+ }
+ break;
+ default:
+ break;
+ }
+ if (kdb_continue_catastrophic > 2) {
+ kdb_printf("kdb_continue_catastrophic is out of range, setting to 2\n");
+ kdb_continue_catastrophic = 2;
+ }
+ if (!kdb_on && KDB_FLAG(CATASTROPHIC) && kdb_continue_catastrophic == 2) {
+ KDB_FLAG_SET(ONLY_DO_DUMP);
+ }
+ if (!kdb_on && !KDB_FLAG(ONLY_DO_DUMP))
+ goto out;
+
+ KDB_DEBUG_STATE("kdb 1", reason);
+ KDB_STATE_CLEAR(SUPPRESS);
+
+ /* Filter out userspace breakpoints first, no point in doing all
+ * the kdb smp fiddling when it is really a gdb trap.
+ * Save the single step status first, kdba_db_trap clears ss status.
+ * kdba_b[dp]_trap sets SSBPT if required.
+ */
+ ss_event = KDB_STATE(DOING_SS) || KDB_STATE(SSBPT);
+#ifdef CONFIG_CPU_XSCALE
+ if ( KDB_STATE(A_XSC_ICH) ) {
+ /* restore changed I_BIT */
+ KDB_STATE_CLEAR(A_XSC_ICH);
+ kdba_restore_retirq(regs, KDB_STATE(A_XSC_IRQ));
+ if ( !ss_event ) {
+ kdb_printf("Stranger!!! Why IRQ bit is changed====\n");
+ }
+ }
+#endif
+ if (reason == KDB_REASON_BREAK) {
+ db_result = kdba_bp_trap(regs, error); /* Only call this once */
+ }
+ if (reason == KDB_REASON_DEBUG) {
+ db_result = kdba_db_trap(regs, error); /* Only call this once */
+ }
+
+ if ((reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
+ && db_result == KDB_DB_NOBPT) {
+ KDB_DEBUG_STATE("kdb 2", reason);
+ goto out; /* Not one of mine */
+ }
+
+ /* Turn off single step if it was being used */
+ if (ss_event) {
+ kdba_clearsinglestep(regs);
+ /* Single step after a breakpoint removes the need for a delayed reinstall */
+ if (reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
+ KDB_STATE_CLEAR(SSBPT);
+ }
+
+ /* kdb can validly reenter but only for certain well defined conditions */
+ if (reason == KDB_REASON_DEBUG
+ && !KDB_STATE(HOLD_CPU)
+ && ss_event)
+ KDB_STATE_SET(REENTRY);
+ else
+ KDB_STATE_CLEAR(REENTRY);
+
+ /* Wait for previous kdb event to completely exit before starting
+ * a new event.
+ */
+ while (kdb_previous_event())
+ ;
+ KDB_DEBUG_STATE("kdb 3", reason);
+
+ /*
+ * If kdb is already active, print a message and try to recover.
+ * If recovery is not possible and recursion is allowed or
+ * forced recursion without recovery is set then try to recurse
+ * in kdb. Not guaranteed to work but it makes an attempt at
+ * debugging the debugger.
+ */
+ if (reason != KDB_REASON_SWITCH &&
+ reason != KDB_REASON_ENTER_SLAVE) {
+ if (KDB_IS_RUNNING() && !KDB_STATE(REENTRY)) {
+ int recover = 1;
+ unsigned long recurse = 0;
+ kdb_printf("kdb: Debugger re-entered on cpu %d, new reason = %d\n",
+ smp_processor_id(), reason);
+ /* Should only re-enter from released cpu */
+
+ if (KDB_STATE(HOLD_CPU)) {
+ kdb_printf(" Strange, cpu %d should not be running\n", smp_processor_id());
+ recover = 0;
+ }
+ if (!KDB_STATE(CMD)) {
+ kdb_printf(" Not executing a kdb command\n");
+ recover = 0;
+ }
+ if (!KDB_STATE(LONGJMP)) {
+ kdb_printf(" No longjmp available for recovery\n");
+ recover = 0;
+ }
+ kdbgetulenv("RECURSE", &recurse);
+ if (recurse > 1) {
+ kdb_printf(" Forced recursion is set\n");
+ recover = 0;
+ }
+ if (recover) {
+ kdb_printf(" Attempting to abort command and recover\n");
+#ifdef kdba_setjmp
+ kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 0);
+#endif /* kdba_setjmp */
+ }
+ if (recurse) {
+ if (KDB_STATE(RECURSE)) {
+ kdb_printf(" Already in recursive mode\n");
+ } else {
+ kdb_printf(" Attempting recursive mode\n");
+ KDB_STATE_SET(RECURSE);
+ KDB_STATE_SET(REENTRY);
+ reason2 = KDB_REASON_RECURSE;
+ recover = 1;
+ }
+ }
+ if (!recover) {
+ kdb_printf(" Cannot recover, allowing event to proceed\n");
+ /*temp*/
+ while (KDB_IS_RUNNING())
+ cpu_relax();
+ goto out;
+ }
+ }
+ } else if (reason == KDB_REASON_SWITCH && !KDB_IS_RUNNING()) {
+ kdb_printf("kdb: CPU switch without kdb running, I'm confused\n");
+ goto out;
+ }
+
+ /*
+ * Disable interrupts, breakpoints etc. on this processor
+ * during kdb command processing
+ */
+ KDB_STATE_SET(KDB);
+ kdba_disableint(&int_state);
+ if (!KDB_STATE(KDB_CONTROL)) {
+ kdb_bp_remove_local();
+ KDB_STATE_SET(KDB_CONTROL);
+ }
+
+ /*
+ * If not entering the debugger due to CPU switch or single step
+ * reentry, serialize access here.
+ * The processors may race getting to this point - if,
+ * for example, more than one processor hits a breakpoint
+ * at the same time. We'll serialize access to kdb here -
+ * other processors will loop here, and the NMI from the stop
+ * IPI will take them into kdb as switch candidates. Once
+ * the initial processor releases the debugger, the rest of
+ * the processors will race for it.
+ *
+ * The above describes the normal state of affairs, where two or more
+ * cpus that are entering kdb at the "same" time are assumed to be for
+ * separate events. However some processes such as ia64 MCA/INIT will
+ * drive all the cpus into error processing at the same time. For that
+ * case, all of the cpus entering kdb at the "same" time are really a
+ * single event.
+ *
+ * That case is handled by the use of KDB_ENTER by one cpu (the
+ * monarch) and KDB_ENTER_SLAVE on the other cpus (the slaves).
+ * KDB_ENTER_SLAVE maps to KDB_REASON_ENTER_SLAVE. The slave events
+ * will be treated as if they had just responded to the kdb IPI, i.e.
+ * as if they were KDB_REASON_SWITCH.
+ *
+ * Because of races across multiple cpus, ENTER_SLAVE can occur before
+ * the main ENTER. Hold up ENTER_SLAVE here until the main ENTER
+ * arrives.
+ */
+
+ if (reason == KDB_REASON_ENTER_SLAVE) {
+ spin_lock(&kdb_lock);
+ while (!KDB_IS_RUNNING()) {
+ spin_unlock(&kdb_lock);
+ while (!KDB_IS_RUNNING())
+ cpu_relax();
+ spin_lock(&kdb_lock);
+ }
+ reason = KDB_REASON_SWITCH;
+ KDB_STATE_SET(HOLD_CPU);
+ spin_unlock(&kdb_lock);
+ }
+
+ if (reason == KDB_REASON_SWITCH || KDB_STATE(REENTRY))
+ ; /* drop through */
+ else {
+ KDB_DEBUG_STATE("kdb 4", reason);
+ spin_lock(&kdb_lock);
+ while (KDB_IS_RUNNING() || kdb_previous_event()) {
+ spin_unlock(&kdb_lock);
+ while (KDB_IS_RUNNING() || kdb_previous_event())
+ cpu_relax();
+ spin_lock(&kdb_lock);
+ }
+ KDB_DEBUG_STATE("kdb 5", reason);
+
+ kdb_initial_cpu = smp_processor_id();
+ ++kdb_seqno;
+ spin_unlock(&kdb_lock);
+ if (!kdb_quiet(reason))
+ notify_die(DIE_KDEBUG_ENTER, "KDEBUG ENTER", regs, error, 0, 0);
+ }
+
+ if (smp_processor_id() == kdb_initial_cpu
+ && !KDB_STATE(REENTRY)) {
+ KDB_STATE_CLEAR(HOLD_CPU);
+ KDB_STATE_CLEAR(WAIT_IPI);
+ kdb_check_i8042();
+ /*
+ * Remove the global breakpoints. This is only done
+ * once from the initial processor on initial entry.
+ */
+ if (!kdb_quiet(reason) || smp_processor_id() == 0)
+ kdb_bp_remove_global();
+
+ /*
+ * If SMP, stop other processors. The other processors
+ * will enter kdb() with KDB_REASON_SWITCH and spin in
+ * kdb_main_loop().
+ */
+ KDB_DEBUG_STATE("kdb 6", reason);
+ if (NR_CPUS > 1 && !kdb_quiet(reason)) {
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (!cpu_online(i))
+ continue;
+ if (i != kdb_initial_cpu) {
+ KDB_STATE_SET_CPU(HOLD_CPU, i);
+ KDB_STATE_SET_CPU(WAIT_IPI, i);
+ }
+ }
+ KDB_DEBUG_STATE("kdb 7", reason);
+ smp_kdb_stop();
+ KDB_DEBUG_STATE("kdb 8", reason);
+ }
+ }
+
+ if (KDB_STATE(GO1)) {
+ kdb_bp_remove_global(); /* They were set for single-step purposes */
+ KDB_STATE_CLEAR(GO1);
+ reason = KDB_REASON_SILENT; /* Now silently go */
+ }
+
+ /* Set up a consistent set of process stacks before talking to the user */
+ KDB_DEBUG_STATE("kdb 9", result);
+ result = kdba_main_loop(reason, reason2, error, db_result, regs);
+ reason = reason2; /* back to original event type */
+
+ KDB_DEBUG_STATE("kdb 10", result);
+ kdba_adjust_ip(reason, error, regs);
+ KDB_STATE_CLEAR(LONGJMP);
+ KDB_DEBUG_STATE("kdb 11", result);
+ /* go which requires single-step over a breakpoint must only release
+ * one cpu.
+ */
+ if (result == KDB_CMD_GO && KDB_STATE(SSBPT))
+ KDB_STATE_SET(GO1);
+
+ if (smp_processor_id() == kdb_initial_cpu &&
+ !KDB_STATE(DOING_SS) &&
+ !KDB_STATE(RECURSE)) {
+ /*
+ * (Re)install the global breakpoints and cleanup the cached
+ * symbol table. This is only done once from the initial
+ * processor on go.
+ */
+ KDB_DEBUG_STATE("kdb 12", reason);
+ if (!kdb_quiet(reason) || smp_processor_id() == 0) {
+ kdb_bp_install_global(regs);
+ kdbnearsym_cleanup();
+ debug_kusage();
+ }
+ if (!KDB_STATE(GO1)) {
+ /*
+ * Release all other cpus which will see KDB_STATE(LEAVING) is set.
+ */
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (KDB_STATE_CPU(KDB, i))
+ KDB_STATE_SET_CPU(LEAVING, i);
+ KDB_STATE_CLEAR_CPU(WAIT_IPI, i);
+ KDB_STATE_CLEAR_CPU(HOLD_CPU, i);
+ }
+ /* Wait until all the other processors leave kdb */
+ while (kdb_previous_event() != 1)
+ ;
+ if (!kdb_quiet(reason))
+ notify_die(DIE_KDEBUG_LEAVE, "KDEBUG LEAVE", regs, error, 0, 0);
+ kdb_initial_cpu = -1; /* release kdb control */
+ KDB_DEBUG_STATE("kdb 13", reason);
+ }
+ }
+
+ KDB_DEBUG_STATE("kdb 14", result);
+ kdba_restoreint(&int_state);
+#ifdef CONFIG_CPU_XSCALE
+ if ( smp_processor_id() == kdb_initial_cpu &&
+ ( KDB_STATE(SSBPT) | KDB_STATE(DOING_SS) )
+ ) {
+ kdba_setsinglestep(regs);
+ // disable IRQ in stack frame
+ KDB_STATE_SET(A_XSC_ICH);
+ if ( kdba_disable_retirq(regs) ) {
+ KDB_STATE_SET(A_XSC_IRQ);
+ }
+ else {
+ KDB_STATE_CLEAR(A_XSC_IRQ);
+ }
+ }
+#endif
+
+ /* Only do this work if we are really leaving kdb */
+ if (!(KDB_STATE(DOING_SS) || KDB_STATE(SSBPT) || KDB_STATE(RECURSE))) {
+ KDB_DEBUG_STATE("kdb 15", result);
+ kdb_bp_install_local(regs);
+ if (old_regs_saved)
+ set_irq_regs(old_regs);
+ KDB_STATE_CLEAR(KDB_CONTROL);
+ }
+
+ KDB_DEBUG_STATE("kdb 16", result);
+ KDB_FLAG_CLEAR(CATASTROPHIC);
+ KDB_STATE_CLEAR(IP_ADJUSTED); /* Re-adjust ip next time in */
+ KDB_STATE_CLEAR(KEYBOARD);
+ KDB_STATE_CLEAR(KDB); /* Main kdb state has been cleared */
+ KDB_STATE_CLEAR(RECURSE);
+ KDB_STATE_CLEAR(LEAVING); /* No more kdb work after this */
+ KDB_DEBUG_STATE("kdb 17", reason);
+out:
+ atomic_dec(&kdb_event);
+ preempt_enable();
+ return result != 0;
+}
+
+/*
+ * kdb_mdr
+ *
+ * This function implements the guts of the 'mdr' command.
+ *
+ * mdr <addr arg>,<byte count>
+ *
+ * Inputs:
+ * addr Start address
+ * count Number of bytes
+ * Outputs:
+ * None.
+ * Returns:
+ * Always 0. Any errors are detected and printed by kdb_getarea.
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_mdr(kdb_machreg_t addr, unsigned int count)
+{
+ unsigned char c;
+ while (count--) {
+ if (kdb_getarea(c, addr))
+ return 0;
+ kdb_printf("%02x", c);
+ addr++;
+ }
+ kdb_printf("\n");
+ return 0;
+}
+
+/*
+ * kdb_md
+ *
+ * This function implements the 'md', 'md1', 'md2', 'md4', 'md8'
+ * 'mdr' and 'mds' commands.
+ *
+ * md|mds [<addr arg> [<line count> [<radix>]]]
+ * mdWcN [<addr arg> [<line count> [<radix>]]]
+ * where W = is the width (1, 2, 4 or 8) and N is the count.
+ * for eg., md1c20 reads 20 bytes, 1 at a time.
+ * mdr <addr arg>,<byte count>
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static void
+kdb_md_line(const char *fmtstr, kdb_machreg_t addr,
+ int symbolic, int nosect, int bytesperword,
+ int num, int repeat, int phys)
+{
+ /* print just one line of data */
+ kdb_symtab_t symtab;
+ char cbuf[32];
+ char *c = cbuf;
+ int i;
+ unsigned long word;
+
+ memset(cbuf, '\0', sizeof(cbuf));
+ if (phys)
+ kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
+ else
+ kdb_printf(kdb_machreg_fmt0 " ", addr);
+
+ for (i = 0; i < num && repeat--; i++) {
+ if (phys) {
+ if (kdb_getphysword(&word, addr, bytesperword))
+ break;
+ } else if (kdb_getword(&word, addr, bytesperword))
+ break;
+ kdb_printf(fmtstr, word);
+ if (symbolic)
+ kdbnearsym(word, &symtab);
+ else
+ memset(&symtab, 0, sizeof(symtab));
+ if (symtab.sym_name) {
+ kdb_symbol_print(word, &symtab, 0);
+ if (!nosect) {
+ kdb_printf("\n");
+ kdb_printf(" %s %s "
+ kdb_machreg_fmt " " kdb_machreg_fmt " " kdb_machreg_fmt,
+ symtab.mod_name,
+ symtab.sec_name,
+ symtab.sec_start,
+ symtab.sym_start,
+ symtab.sym_end);
+ }
+ addr += bytesperword;
+ } else {
+ union {
+ u64 word;
+ unsigned char c[8];
+ } wc;
+ unsigned char *cp;
+#ifdef __BIG_ENDIAN
+ cp = wc.c + 8 - bytesperword;
+#else
+ cp = wc.c;
+#endif
+ wc.word = word;
+#define printable_char(c) ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.';})
+ switch (bytesperword) {
+ case 8:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 4;
+ case 4:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 2;
+ case 2:
+ *c++ = printable_char(*cp++);
+ addr++;
+ case 1:
+ *c++ = printable_char(*cp++);
+ addr++;
+ break;
+ }
+#undef printable_char
+ }
+ }
+ kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1), " ", cbuf);
+}
+
+static int
+kdb_md(int argc, const char **argv)
+{
+ static kdb_machreg_t last_addr;
+ static int last_radix, last_bytesperword, last_repeat;
+ int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
+ int nosect = 0;
+ char fmtchar, fmtstr[64];
+ kdb_machreg_t addr;
+ unsigned long word;
+ long offset = 0;
+ int symbolic = 0;
+ int valid = 0;
+ int phys = 0;
+
+ kdbgetintenv("MDCOUNT", &mdcount);
+ kdbgetintenv("RADIX", &radix);
+ kdbgetintenv("BYTESPERWORD", &bytesperword);
+
+ /* Assume 'md <addr>' and start with environment values */
+ repeat = mdcount * 16 / bytesperword;
+
+ if (strcmp(argv[0], "mdr") == 0) {
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+ valid = 1;
+ } else if (isdigit(argv[0][2])) {
+ bytesperword = (int)(argv[0][2] - '0');
+ if (bytesperword == 0) {
+ bytesperword = last_bytesperword;
+ if (bytesperword == 0) {
+ bytesperword = 4;
+ }
+ }
+ last_bytesperword = bytesperword;
+ repeat = mdcount * 16 / bytesperword;
+ if (!argv[0][3])
+ valid = 1;
+ else if (argv[0][3] == 'c' && argv[0][4]) {
+ char *p;
+ repeat = simple_strtoul(argv[0]+4, &p, 10);
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ valid = !*p;
+ }
+ last_repeat = repeat;
+ } else if (strcmp(argv[0], "md") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mds") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mdp") == 0) {
+ phys = valid = 1;
+ }
+ if (!valid)
+ return KDB_NOTFOUND;
+
+ if (argc == 0) {
+ if (last_addr == 0)
+ return KDB_ARGCOUNT;
+ addr = last_addr;
+ radix = last_radix;
+ bytesperword = last_bytesperword;
+ repeat = last_repeat;
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ }
+
+ if (argc) {
+ kdb_machreg_t val;
+ int diag, nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ if (argc > nextarg+2)
+ return KDB_ARGCOUNT;
+
+ if (argc >= nextarg) {
+ diag = kdbgetularg(argv[nextarg], &val);
+ if (!diag) {
+ mdcount = (int) val;
+ repeat = mdcount * 16 / bytesperword;
+ }
+ }
+ if (argc >= nextarg+1) {
+ diag = kdbgetularg(argv[nextarg+1], &val);
+ if (!diag)
+ radix = (int) val;
+ }
+ }
+
+ if (strcmp(argv[0], "mdr") == 0) {
+ return kdb_mdr(addr, mdcount);
+ }
+
+ switch (radix) {
+ case 10:
+ fmtchar = 'd';
+ break;
+ case 16:
+ fmtchar = 'x';
+ break;
+ case 8:
+ fmtchar = 'o';
+ break;
+ default:
+ return KDB_BADRADIX;
+ }
+
+ last_radix = radix;
+
+ if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+
+ switch (bytesperword) {
+ case 8:
+ sprintf(fmtstr, "%%16.16l%c ", fmtchar);
+ break;
+ case 4:
+ sprintf(fmtstr, "%%8.8l%c ", fmtchar);
+ break;
+ case 2:
+ sprintf(fmtstr, "%%4.4l%c ", fmtchar);
+ break;
+ case 1:
+ sprintf(fmtstr, "%%2.2l%c ", fmtchar);
+ break;
+ default:
+ return KDB_BADWIDTH;
+ }
+
+ last_repeat = repeat;
+ last_bytesperword = bytesperword;
+
+ if (strcmp(argv[0], "mds") == 0) {
+ symbolic = 1;
+ /* Do not save these changes as last_*, they are temporary mds
+ * overrides.
+ */
+ bytesperword = KDB_WORD_SIZE;
+ repeat = mdcount;
+ kdbgetintenv("NOSECT", &nosect);
+ }
+
+ /* Round address down modulo BYTESPERWORD */
+
+ addr &= ~(bytesperword-1);
+
+ while (repeat > 0) {
+ unsigned long a;
+ int n, z, num = (symbolic ? 1 : (16 / bytesperword));
+
+ for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
+ if (phys) {
+ if (kdb_getphysword(&word, a, bytesperword)
+ || word)
+ break;
+ } else if (kdb_getword(&word, a, bytesperword) || word)
+ break;
+ }
+ n = min(num, repeat);
+ kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword, num, repeat, phys);
+ addr += bytesperword * n;
+ repeat -= n;
+ z = (z + num - 1) / num;
+ if (z > 2) {
+ int s = num * (z-2);
+ kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0 " zero suppressed\n",
+ addr, addr + bytesperword * s - 1);
+ addr += bytesperword * s;
+ repeat -= s;
+ }
+ }
+ last_addr = addr;
+
+ return 0;
+}
+
+/*
+ * kdb_mm
+ *
+ * This function implements the 'mm' command.
+ *
+ * mm address-expression new-value
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * mm works on machine words, mmW works on bytes.
+ */
+
+static int
+kdb_mm(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ unsigned long contents;
+ int nextarg;
+ int width;
+
+ if (argv[0][2] && !isdigit(argv[0][2]))
+ return KDB_NOTFOUND;
+
+ if (argc < 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ return diag;
+
+ if (nextarg > argc)
+ return KDB_ARGCOUNT;
+
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL)))
+ return diag;
+
+ if (nextarg != argc + 1)
+ return KDB_ARGCOUNT;
+
+ width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
+ if ((diag = kdb_putword(addr, contents, width)))
+ return diag;
+
+ kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
+
+ return 0;
+}
+
+/*
+ * kdb_go
+ *
+ * This function implements the 'go' command.
+ *
+ * go [address-expression]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * KDB_CMD_GO for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_go(int argc, const char **argv)
+{
+ kdb_machreg_t addr;
+ int diag;
+ int nextarg;
+ long offset;
+ struct pt_regs *regs = get_irq_regs();
+
+ if (argc == 1) {
+ if (smp_processor_id() != kdb_initial_cpu) {
+ kdb_printf("go <address> must be issued from the initial cpu, do cpu %d first\n", kdb_initial_cpu);
+ return KDB_ARGCOUNT;
+ }
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg,
+ &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ kdba_setpc(regs, addr);
+ } else if (argc)
+ return KDB_ARGCOUNT;
+
+ diag = KDB_CMD_GO;
+ if (KDB_FLAG(CATASTROPHIC)) {
+ kdb_printf("Catastrophic error detected\n");
+ kdb_printf("kdb_continue_catastrophic=%d, ",
+ kdb_continue_catastrophic);
+ if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
+ kdb_printf("type go a second time if you really want to continue\n");
+ return 0;
+ }
+ if (kdb_continue_catastrophic == 2) {
+ kdb_do_dump();
+ kdb_printf("forcing reboot\n");
+ kdb_reboot(0, NULL);
+ }
+ kdb_printf("attempting to continue\n");
+ }
+ if (smp_processor_id() != kdb_initial_cpu) {
+ char buf[80];
+ kdb_printf("go was not issued from initial cpu, switching back to cpu %d\n", kdb_initial_cpu);
+ sprintf(buf, "cpu %d\n", kdb_initial_cpu);
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+ diag = kdb_parse(buf);
+ if (diag == KDB_CMD_CPU)
+ KDB_STATE_SET_CPU(GO_SWITCH, kdb_initial_cpu);
+ }
+ return diag;
+}
+
+/*
+ * kdb_rd
+ *
+ * This function implements the 'rd' command.
+ *
+ * rd display all general registers.
+ * rd c display all control registers.
+ * rd d display all debug registers.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_rd(int argc, const char **argv)
+{
+ int diag;
+ if (argc == 0) {
+ if ((diag = kdb_check_regs()))
+ return diag;
+ return kdba_dumpregs(kdb_current_regs, NULL, NULL);
+ }
+
+ if (argc > 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdb_check_regs()))
+ return diag;
+ return kdba_dumpregs(kdb_current_regs, argv[1], argc==2 ? argv[2]: NULL);
+}
+
+/*
+ * kdb_rm
+ *
+ * This function implements the 'rm' (register modify) command.
+ *
+ * rm register-name new-contents
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Currently doesn't allow modification of control or
+ * debug registers.
+ */
+
+static int
+kdb_rm(int argc, const char **argv)
+{
+ int diag;
+ int ind = 0;
+ kdb_machreg_t contents;
+
+ if (argc != 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ /*
+ * Allow presence or absence of leading '%' symbol.
+ */
+
+ if (argv[1][0] == '%')
+ ind = 1;
+
+ diag = kdbgetularg(argv[2], &contents);
+ if (diag)
+ return diag;
+
+ if ((diag = kdb_check_regs()))
+ return diag;
+ diag = kdba_setregcontents(&argv[1][ind], kdb_current_regs, contents);
+ if (diag)
+ return diag;
+
+ return 0;
+}
+
+#if defined(CONFIG_MAGIC_SYSRQ)
+/*
+ * kdb_sr
+ *
+ * This function implements the 'sr' (SYSRQ key) command which
+ * interfaces to the soi-disant MAGIC SYSRQ functionality.
+ *
+ * sr <magic-sysrq-code>
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * None.
+ */
+static int
+kdb_sr(int argc, const char **argv)
+{
+ extern int __sysrq_enabled;
+ if (argc != 1) {
+ return KDB_ARGCOUNT;
+ }
+ if (!__sysrq_enabled) {
+ kdb_printf("Auto activating sysrq\n");
+ __sysrq_enabled = 1;
+ }
+
+ handle_sysrq(*argv[1], NULL);
+
+ return 0;
+}
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+/*
+ * kdb_ef
+ *
+ * This function implements the 'regs' (display exception frame)
+ * command. This command takes an address and expects to find
+ * an exception frame at that address, formats and prints it.
+ *
+ * regs address-expression
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Not done yet.
+ */
+
+static int
+kdb_ef(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset;
+ int nextarg;
+
+ if (argc == 1) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ return kdba_dumpregs((struct pt_regs *)addr, NULL, NULL);
+ }
+
+ return KDB_ARGCOUNT;
+}
+
+#if defined(CONFIG_MODULES)
+extern struct list_head *kdb_modules;
+extern void free_module(struct module *);
+
+/* modules using other modules */
+struct module_use
+{
+ struct list_head list;
+ struct module *module_which_uses;
+};
+
+/*
+ * kdb_lsmod
+ *
+ * This function implements the 'lsmod' command. Lists currently
+ * loaded kernel modules.
+ *
+ * Mostly taken from userland lsmod.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+static int
+kdb_lsmod(int argc, const char **argv)
+{
+ struct module *mod;
+
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("Module Size modstruct Used by\n");
+ list_for_each_entry(mod, kdb_modules, list) {
+
+ kdb_printf("%-20s%8u 0x%p ", mod->name,
+ mod->core_size, (void *)mod);
+#ifdef CONFIG_MODULE_UNLOAD
+ kdb_printf("%4d ", module_refcount(mod));
+#endif
+ if (mod->state == MODULE_STATE_GOING)
+ kdb_printf(" (Unloading)");
+ else if (mod->state == MODULE_STATE_COMING)
+ kdb_printf(" (Loading)");
+ else
+ kdb_printf(" (Live)");
+
+#ifdef CONFIG_MODULE_UNLOAD
+ {
+ struct module_use *use;
+ kdb_printf(" [ ");
+ list_for_each_entry(use, &mod->modules_which_use_me, list)
+ kdb_printf("%s ", use->module_which_uses->name);
+ kdb_printf("]\n");
+ }
+#endif
+ }
+
+ return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
+/*
+ * kdb_env
+ *
+ * This function implements the 'env' command. Display the current
+ * environment variables.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_env(int argc, const char **argv)
+{
+ int i;
+
+ for(i=0; i<__nenv; i++) {
+ if (__env[i]) {
+ kdb_printf("%s\n", __env[i]);
+ }
+ }
+
+ if (KDB_DEBUG(MASK))
+ kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
+
+ return 0;
+}
+
+/*
+ * kdb_dmesg
+ *
+ * This function implements the 'dmesg' command to display the contents
+ * of the syslog buffer.
+ *
+ * dmesg [lines] [adjust]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * None.
+ */
+
+static int
+kdb_dmesg(int argc, const char **argv)
+{
+ char *syslog_data[4], *start, *end, c = '\0', *p;
+ int diag, logging, logsize, lines = 0, adjust = 0, n;
+
+ if (argc > 2)
+ return KDB_ARGCOUNT;
+ if (argc) {
+ char *cp;
+ lines = simple_strtol(argv[1], &cp, 0);
+ if (*cp)
+ lines = 0;
+ if (argc > 1) {
+ adjust = simple_strtoul(argv[2], &cp, 0);
+ if (*cp || adjust < 0)
+ adjust = 0;
+ }
+ }
+
+ /* disable LOGGING if set */
+ diag = kdbgetintenv("LOGGING", &logging);
+ if (!diag && logging) {
+ const char *setargs[] = { "set", "LOGGING", "0" };
+ kdb_set(2, setargs);
+ }
+
+ /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] logical start, end+1. */
- debugger_syslog_data(syslog_data);
++ kdb_syslog_data(syslog_data);
+ if (syslog_data[2] == syslog_data[3])
+ return 0;
+ logsize = syslog_data[1] - syslog_data[0];
+ start = syslog_data[2];
+ end = syslog_data[3];
+#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
+ for (n = 0, p = start; p < end; ++p) {
+ if ((c = *KDB_WRAP(p)) == '\n')
+ ++n;
+ }
+ if (c != '\n')
+ ++n;
+ if (lines < 0) {
+ if (adjust >= n)
+ kdb_printf("buffer only contains %d lines, nothing printed\n", n);
+ else if (adjust - lines >= n)
+ kdb_printf("buffer only contains %d lines, last %d lines printed\n",
+ n, n - adjust);
+ if (adjust) {
+ for (; start < end && adjust; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --adjust;
+ }
+ if (start < end)
+ ++start;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ ++lines;
+ }
+ end = p;
+ } else if (lines > 0) {
+ int skip = n - (adjust + lines);
+ if (adjust >= n) {
+ kdb_printf("buffer only contains %d lines, nothing printed\n", n);
+ skip = n;
+ } else if (skip < 0) {
+ lines += skip;
+ skip = 0;
+ kdb_printf("buffer only contains %d lines, first %d lines printed\n",
+ n, lines);
+ }
+ for (; start < end && skip; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --skip;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ --lines;
+ }
+ end = p;
+ }
+ /* Do a line at a time (max 200 chars) to reduce protocol overhead */
+ c = '\n';
+ while (start != end) {
+ char buf[201];
+ p = buf;
+ while (start < end && (c = *KDB_WRAP(start)) && (p - buf) < sizeof(buf)-1) {
+ ++start;
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+ *p = '\0';
+ kdb_printf("%s", buf);
+ }
+ if (c != '\n')
+ kdb_printf("\n");
+
+ return 0;
+}
+
+/*
+ * kdb_cpu
+ *
+ * This function implements the 'cpu' command.
+ *
+ * cpu [<cpunum>]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * KDB_CMD_CPU for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * All cpu's should be spinning in kdb(). However just in case
+ * a cpu did not take the smp_kdb_stop NMI, check that a cpu
+ * entered kdb() before passing control to it.
+ */
+
+static void
+kdb_cpu_status(void)
+{
+ int i, start_cpu, first_print = 1;
+ char state, prev_state = '?';
+
+ kdb_printf("Currently on cpu %d\n", smp_processor_id());
+ kdb_printf("Available cpus: ");
+ for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ state = 'F'; /* cpu is offline */
+ else {
+ struct kdb_running_process *krp = kdb_running_process+i;
+ if (KDB_STATE_CPU(KDB, i)) {
+ state = ' '; /* cpu is responding to kdb */
+ if (kdb_task_state_char(krp->p) == 'I')
+ state = 'I'; /* running the idle task */
+ } else if (krp->seqno && krp->p && krp->seqno >= kdb_seqno - 1)
+ state = '+'; /* some kdb data, but not responding */
+ else
+ state = '*'; /* no kdb data */
+ }
+ if (state != prev_state) {
+ if (prev_state != '?') {
+ if (!first_print)
+ kdb_printf(", ");
+ first_print = 0;
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ prev_state = state;
+ start_cpu = i;
+ }
+ }
+ /* print the trailing cpus, ignoring them if they are all offline */
+ if (prev_state != 'F') {
+ if (!first_print)
+ kdb_printf(", ");
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ kdb_printf("\n");
+}
+
+static int
+kdb_cpu(int argc, const char **argv)
+{
+ unsigned long cpunum;
+ int diag, i;
+
+ /* ask the other cpus if they are still active */
+ for (i=0; i<NR_CPUS; i++) {
+ if (cpu_online(i))
+ KDB_STATE_CLEAR_CPU(KDB, i);
+ }
+ KDB_STATE_SET(KDB);
+ barrier();
+ /* wait for the other cpus to notice and set state KDB again,
+ * see kdb_main_loop
+ */
+ udelay(1000);
+
+ if (argc == 0) {
+ kdb_cpu_status();
+ return 0;
+ }
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ diag = kdbgetularg(argv[1], &cpunum);
+ if (diag)
+ return diag;
+
+ /*
+ * Validate cpunum
+ */
+ if ((cpunum > NR_CPUS)
+ || !cpu_online(cpunum)
+ || !KDB_STATE_CPU(KDB, cpunum))
+ return KDB_BADCPUNUM;
+
+ kdb_new_cpu = cpunum;
+
+ /*
+ * Switch to other cpu
+ */
+ return KDB_CMD_CPU;
+}
+
+/* The user may not realize that ps/bta with no parameters does not print idle
+ * or sleeping system daemon processes, so tell them how many were suppressed.
+ */
+void
+kdb_ps_suppressed(void)
+{
+ int idle = 0, daemon = 0;
+ unsigned long mask_I = kdb_task_state_string("I"),
+ mask_M = kdb_task_state_string("M");
+ unsigned long cpu;
+ const struct task_struct *p, *g;
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_online(cpu))
+ continue;
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask_I))
+ ++idle;
+ }
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_state(p, mask_M))
+ ++daemon;
+ } kdb_while_each_thread(g, p);
+ if (idle || daemon) {
+ if (idle)
+ kdb_printf("%d idle process%s (state I)%s\n",
+ idle, idle == 1 ? "" : "es",
+ daemon ? " and " : "");
+ if (daemon)
+ kdb_printf("%d sleeping system daemon (state M) process%s",
+ daemon, daemon == 1 ? "" : "es");
+ kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
+ }
+}
+
+/*
+ * kdb_ps
+ *
+ * This function implements the 'ps' command which shows
+ * a list of the active processes.
+ *
+ * ps [DRSTCZEUIMA] All processes, optionally filtered by state
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+void
+kdb_ps1(const struct task_struct *p)
+{
+ struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
+ kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n",
+ (void *)p, p->pid, p->parent->pid,
+ kdb_task_has_cpu(p), kdb_process_cpu(p),
+ kdb_task_state_char(p),
+ (void *)(&p->thread),
+ p == kdb_curr_task(smp_processor_id()) ? '*': ' ',
+ p->comm);
+ if (kdb_task_has_cpu(p)) {
+ if (!krp->seqno || !krp->p)
+ kdb_printf(" Error: no saved data for this cpu\n");
+ else {
+ if (krp->seqno < kdb_seqno - 1)
+ kdb_printf(" Warning: process state is stale\n");
+ if (krp->p != p)
+ kdb_printf(" Error: does not match running process table (0x%p)\n", krp->p);
+ }
+ }
+}
+
+static int
+kdb_ps(int argc, const char **argv)
+{
+ struct task_struct *g, *p;
+ unsigned long mask, cpu;
+
+ if (argc == 0)
+ kdb_ps_suppressed();
+ kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n",
+ (int)(2*sizeof(void *))+2, "Task Addr",
+ (int)(2*sizeof(void *))+2, "Thread");
+ mask = kdb_task_state_string(argc ? argv[1] : NULL);
+ /* Run the active tasks first */
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_online(cpu))
+ continue;
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ }
+ kdb_printf("\n");
+ /* Now the real tasks */
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ } kdb_while_each_thread(g, p);
+
+ return 0;
+}
+
+/*
+ * kdb_pid
+ *
+ * This function implements the 'pid' command which switches
+ * the currently active process.
+ *
+ * pid [<pid> | R]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+
+static int
+kdb_pid(int argc, const char **argv)
+{
+ struct task_struct *p;
+ unsigned long val;
+ int diag;
+
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+
+ if (argc) {
+ if (strcmp(argv[1], "R") == 0) {
+ p = KDB_RUNNING_PROCESS_ORIGINAL[kdb_initial_cpu].p;
+ } else {
+ diag = kdbgetularg(argv[1], &val);
+ if (diag)
+ return KDB_BADINT;
+
+ p = find_task_by_pid_ns((pid_t)val, &init_pid_ns);
+ if (!p) {
+ kdb_printf("No task with pid=%d\n", (pid_t)val);
+ return 0;
+ }
+ }
+
+ kdba_set_current_task(p);
+ }
+
+ kdb_printf("KDB current process is %s(pid=%d)\n", kdb_current_task->comm,
+ kdb_current_task->pid);
+
+ return 0;
+}
+
+/*
+ * kdb_ll
+ *
+ * This function implements the 'll' command which follows a linked
+ * list and executes an arbitrary command for each element.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_ll(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ kdb_machreg_t va;
+ unsigned long linkoffset;
+ int nextarg;
+ const char *command;
+
+ if (argc != 3) {
+ return KDB_ARGCOUNT;
+ }
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ diag = kdbgetularg(argv[2], &linkoffset);
+ if (diag)
+ return diag;
+
+ /*
+ * Using the starting address as
+ * the first element in the list, and assuming that
+ * the list ends with a null pointer.
+ */
+
+ va = addr;
+ if (!(command = kdb_strdup(argv[3], GFP_KDB))) {
+ kdb_printf("%s: cannot duplicate command\n", __FUNCTION__);
+ return 0;
+ }
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+
+ while (va) {
+ char buf[80];
+
+ sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
+ diag = kdb_parse(buf);
+ if (diag)
+ return diag;
+
+ addr = va + linkoffset;
+ if (kdb_getword(&va, addr, sizeof(va)))
+ return 0;
+ }
+ kfree(command);
+
+ return 0;
+}
+
+/*
+ * kdb_help
+ *
+ * This function implements the 'help' and '?' commands.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_help(int argc, const char **argv)
+{
+ kdbtab_t *kt;
+ int i;
+
+ kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
+ kdb_printf("----------------------------------------------------------\n");
+ for(i=0, kt=kdb_commands; i<kdb_max_commands; i++, kt++) {
+ if (kt->cmd_name)
+ kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
+ kt->cmd_usage, kt->cmd_help);
+ }
+ return 0;
+}
+
+extern int kdb_wake_up_process(struct task_struct * p);
+
+/*
+ * kdb_kill
+ *
+ * This function implements the 'kill' commands.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_kill(int argc, const char **argv)
+{
+ long sig, pid;
+ char *endp;
+ struct task_struct *p;
+ struct siginfo info;
+
+ if (argc!=2)
+ return KDB_ARGCOUNT;
+
+ sig = simple_strtol(argv[1], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (sig >= 0 ) {
+ kdb_printf("Invalid signal parameter.<-signal>\n");
+ return 0;
+ }
+ sig=-sig;
+
+ pid = simple_strtol(argv[2], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (pid <=0 ) {
+ kdb_printf("Process ID must be large than 0.\n");
+ return 0;
+ }
+
+ /* Find the process. */
+ if (!(p = find_task_by_pid_ns(pid, &init_pid_ns))) {
+ kdb_printf("The specified process isn't found.\n");
+ return 0;
+ }
+ p = p->group_leader;
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = pid; /* use same capabilities as process being signalled */
+ info.si_uid = 0; /* kdb has root authority */
+ kdb_send_sig_info(p, &info, kdb_seqno);
+ return 0;
+}
+
+struct kdb_tm {
+ int tm_sec; /* seconds */
+ int tm_min; /* minutes */
+ int tm_hour; /* hours */
+ int tm_mday; /* day of the month */
+ int tm_mon; /* month */
+ int tm_year; /* year */
+};
+
+static void
+kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
+{
+ /* This will work from 1970-2099, 2100 is not a leap year */
+ static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+ memset(tm, 0, sizeof(*tm));
+ tm->tm_sec = tv->tv_sec % (24 * 60 * 60);
+ tm->tm_mday = tv->tv_sec / (24 * 60 * 60) + (2 * 365 + 1); /* shift base from 1970 to 1968 */
+ tm->tm_min = tm->tm_sec / 60 % 60;
+ tm->tm_hour = tm->tm_sec / 60 / 60;
+ tm->tm_sec = tm->tm_sec % 60;
+ tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
+ tm->tm_mday %= (4*365+1);
+ mon_day[1] = 29;
+ while (tm->tm_mday >= mon_day[tm->tm_mon]) {
+ tm->tm_mday -= mon_day[tm->tm_mon];
+ if (++tm->tm_mon == 12) {
+ tm->tm_mon = 0;
+ ++tm->tm_year;
+ mon_day[1] = 28;
+ }
+ }
+ ++tm->tm_mday;
+}
+
+/*
+ * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
+ * I cannot call that code directly from kdb, it has an unconditional
+ * cli()/sti() and calls routines that take locks which can stop the debugger.
+ */
+
+static void
+kdb_sysinfo(struct sysinfo *val)
+{
+ struct timespec uptime;
+ do_posix_clock_monotonic_gettime(&uptime);
+ memset(val, 0, sizeof(*val));
+ val->uptime = uptime.tv_sec;
+ val->loads[0] = avenrun[0];
+ val->loads[1] = avenrun[1];
+ val->loads[2] = avenrun[2];
+ val->procs = nr_threads-1;
+ si_meminfo(val);
+ kdb_si_swapinfo(val);
+
+ return;
+}
+
+/*
+ * kdb_summary
+ *
+ * This function implements the 'summary' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_summary(int argc, const char **argv)
+{
+ extern struct timespec xtime;
+ extern struct timezone sys_tz;
+ struct kdb_tm tm;
+ struct sysinfo val;
+
+ if (argc)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("sysname %s\n", init_uts_ns.name.sysname);
+ kdb_printf("release %s\n", init_uts_ns.name.release);
+ kdb_printf("version %s\n", init_uts_ns.name.version);
+ kdb_printf("machine %s\n", init_uts_ns.name.machine);
+ kdb_printf("nodename %s\n", init_uts_ns.name.nodename);
+ kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
+ kdb_printf("ccversion %s\n", __stringify(CCVERSION));
+
+ kdb_gmtime(&xtime, &tm);
+ kdb_printf("date %04d-%02d-%02d %02d:%02d:%02d tz_minuteswest %d\n",
+ 1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ sys_tz.tz_minuteswest);
+
+ kdb_sysinfo(&val);
+ kdb_printf("uptime ");
+ if (val.uptime > (24*60*60)) {
+ int days = val.uptime / (24*60*60);
+ val.uptime %= (24*60*60);
+ kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
+ }
+ kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
+
+ /* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+ kdb_printf("load avg %ld.%02ld %ld.%02ld %ld.%02ld\n",
+ LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
+ LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
+ LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
+ kdb_printf("\n");
+#undef LOAD_INT
+#undef LOAD_FRAC
+
+ kdb_meminfo_proc_show(); /* in fs/proc/meminfo.c */
+
+ return 0;
+}
+
+/*
+ * kdb_per_cpu
+ *
+ * This function implements the 'per_cpu' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_per_cpu(int argc, const char **argv)
+{
+ char buf[256], fmtstr[64];
+ kdb_symtab_t symtab;
+ cpumask_t suppress;
+ int cpu, diag;
+ unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
+
+ if (argc < 1 || argc > 3)
+ return KDB_ARGCOUNT;
+
+ cpus_clear(suppress);
+ snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
+ if (!kdbgetsymval(buf, &symtab)) {
+ kdb_printf("%s is not a per_cpu variable\n", argv[1]);
+ return KDB_BADADDR;
+ }
+ if (argc >=2 && (diag = kdbgetularg(argv[2], &bytesperword)))
+ return diag;
+ if (!bytesperword)
+ bytesperword = KDB_WORD_SIZE;
+ else if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+ sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
+ if (argc >= 3) {
+ if ((diag = kdbgetularg(argv[3], &whichcpu)))
+ return diag;
+ if (!cpu_online(whichcpu)) {
+ kdb_printf("cpu %ld is not online\n", whichcpu);
+ return KDB_BADCPUNUM;
+ }
+ }
+
+ /* Most architectures use __per_cpu_offset[cpu], some use
+ * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
+ */
+#ifdef __per_cpu_offset
+#define KDB_PCU(cpu) __per_cpu_offset(cpu)
+#else
+#ifdef CONFIG_SMP
+#define KDB_PCU(cpu) __per_cpu_offset[cpu]
+#else
+#define KDB_PCU(cpu) 0
+#endif
+#endif
+
+ for_each_online_cpu(cpu) {
+ if (whichcpu != ~0UL && whichcpu != cpu)
+ continue;
+ addr = symtab.sym_start + KDB_PCU(cpu);
+ if ((diag = kdb_getword(&val, addr, bytesperword))) {
+ kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to read, diag=%d\n",
+ cpu, addr, diag);
+ continue;
+ }
+#ifdef CONFIG_SMP
+ if (!val) {
+ cpu_set(cpu, suppress);
+ continue;
+ }
+#endif /* CONFIG_SMP */
+ kdb_printf("%5d ", cpu);
+ kdb_md_line(fmtstr, addr,
+ bytesperword == KDB_WORD_SIZE,
+ 1, bytesperword, 1, 1, 0);
+ }
+ if (cpus_weight(suppress) == 0)
+ return 0;
+ kdb_printf("Zero suppressed cpu(s):");
+ for_each_cpu_mask(cpu, suppress) {
+ kdb_printf(" %d", cpu);
+ if (cpu == NR_CPUS-1 || next_cpu(cpu, suppress) != cpu + 1)
+ continue;
+ while (cpu < NR_CPUS && next_cpu(cpu, suppress) == cpu + 1)
+ ++cpu;
+ kdb_printf("-%d", cpu);
+ }
+ kdb_printf("\n");
+
+#undef KDB_PCU
+
+ return 0;
+}
+
+/*
+ * display help for the use of cmd | grep pattern
+ */
+static int
+kdb_grep_help(int argc, const char **argv)
+{
+ kdb_printf ("Usage of cmd args | grep pattern:\n");
+ kdb_printf (" Any command's output may be filtered through an ");
+ kdb_printf ("emulated 'pipe'.\n");
+ kdb_printf (" 'grep' is just a key word.\n");
+ kdb_printf
+ (" The pattern may include a very limited set of metacharacters:\n");
+ kdb_printf (" pattern or ^pattern or pattern$ or ^pattern$\n");
+ kdb_printf
+ (" And if there are spaces in the pattern, you may quote it:\n");
+ kdb_printf
+ (" \"pat tern\" or \"^pat tern\" or \"pat tern$\" or \"^pat tern$\"\n");
+ return 0;
+}
+
+/*
+ * kdb_register_repeat
+ *
+ * This function is used to register a kernel debugger command.
+ *
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * repeat Does the command auto repeat on enter?
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one if a duplicate command.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+#define kdb_command_extend 50 /* arbitrary */
+int
+kdb_register_repeat(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen,
+ kdb_repeat_t repeat)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * Brute force method to determine duplicates
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
+ kdb_printf("Duplicate kdb command registered: "
+ "%s, func %p help %s\n", cmd, func, help);
+ return 1;
+ }
+ }
+
+ /*
+ * Insert command into first available location in table
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name == NULL) {
+ break;
+ }
+ }
+
+ if (i >= kdb_max_commands) {
+ kdbtab_t *new = kmalloc((kdb_max_commands + kdb_command_extend) * sizeof(*new), GFP_KDB);
+ if (!new) {
+ kdb_printf("Could not allocate new kdb_command table\n");
+ return 1;
+ }
+ if (kdb_commands) {
+ memcpy(new, kdb_commands, kdb_max_commands * sizeof(*new));
+ kfree(kdb_commands);
+ }
+ memset(new + kdb_max_commands, 0, kdb_command_extend * sizeof(*new));
+ kdb_commands = new;
+ kp = kdb_commands + kdb_max_commands;
+ kdb_max_commands += kdb_command_extend;
+ }
+
+ kp->cmd_name = cmd;
+ kp->cmd_func = func;
+ kp->cmd_usage = usage;
+ kp->cmd_help = help;
+ kp->cmd_flags = 0;
+ kp->cmd_minlen = minlen;
+ kp->cmd_repeat = repeat;
+
+ return 0;
+}
+
+/*
+ * kdb_register
+ *
+ * Compatibility register function for commands that do not need to
+ * specify a repeat state. Equivalent to kdb_register_repeat with
+ * KDB_REPEAT_NONE.
+ *
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one if a duplicate command.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+int
+kdb_register(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen)
+{
+ return kdb_register_repeat(cmd, func, usage, help, minlen, KDB_REPEAT_NONE);
+}
+
+/*
+ * kdb_unregister
+ *
+ * This function is used to unregister a kernel debugger command.
+ * It is generally called when a module which implements kdb
+ * commands is unloaded.
+ *
+ * Inputs:
+ * cmd Command name
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one command not registered.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+int
+kdb_unregister(char *cmd)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * find the command.
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
+ kp->cmd_name = NULL;
+ return 0;
+ }
+ }
+
+ /*
+ * Couldn't find it.
+ */
+ return 1;
+}
+
+/*
+ * kdb_inittab
+ *
+ * This function is called by the kdb_init function to initialize
+ * the kdb command table. It must be called prior to any other
+ * call to kdb_register_repeat.
+ *
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ */
+
+static void __init
+kdb_inittab(void)
+{
+ int i;
+ kdbtab_t *kp;
+
+ for(i=0, kp=kdb_commands; i < kdb_max_commands; i++,kp++) {
+ kp->cmd_name = NULL;
+ }
+
+ kdb_register_repeat("md", kdb_md, "<vaddr>", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>", "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>", "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mds", kdb_md, "<vaddr>", "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("id", kdb_id, "<vaddr>", "Display Instructions", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("go", kdb_go, "[<vaddr>]", "Continue Execution", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("rd", kdb_rd, "", "Display Registers", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("rm", kdb_rm, "<reg> <contents>", "Modify Registers", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ef", kdb_ef, "<vaddr>", "Display exception frame", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bt", kdb_bt, "[<vaddr>]", "Stack traceback", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("btp", kdb_bt, "<pid>", "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]", "Display stack all processes", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btc", kdb_bt, "", "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btt", kdb_bt, "<vaddr>", "Backtrace process given its struct task address", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>", "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("env", kdb_env, "", "Show environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("set", kdb_set, "", "Set environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("help", kdb_help, "", "Display Help Message", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("?", kdb_help, "", "Display Help Message", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("cpu", kdb_cpu, "<cpunum>","Switch to new cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ps", kdb_ps, "[<flags>|A]", "Display active task list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pid", kdb_pid, "<pidnum>", "Switch to another task", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("reboot", kdb_reboot, "", "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+#if defined(CONFIG_KDB_KDUMP)
+ kdb_register_repeat("kdump", kdb_kdump, "", "Calls kdump mode", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MODULES)
+ kdb_register_repeat("lsmod", kdb_lsmod, "", "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MAGIC_SYSRQ)
+ kdb_register_repeat("sr", kdb_sr, "<key>", "Magic SysRq key", 0, KDB_REPEAT_NONE);
+#endif
+ kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", "Display syslog buffer", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>", "Send a signal to a process", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("summary", kdb_summary, "", "Summarize the system", 4, KDB_REPEAT_NONE);
+ kdb_register_repeat("per_cpu", kdb_per_cpu, "", "Display per_cpu variables", 3, KDB_REPEAT_NONE);
+ kdb_register_repeat("grephelp", kdb_grep_help, "",
+ "Display help on | grep", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("print", kdb_debuginfo_print, "<expression>",
+ "Type casting, as in lcrash", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("px", kdb_debuginfo_print, "<expression>",
+ "Print in hex (type casting) (see 'pxhelp')", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pxhelp", kdb_pxhelp, "",
+ "Display help for the px command", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pd", kdb_debuginfo_print, "<expression>",
+ "Print in decimal (type casting)", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("whatis", kdb_debuginfo_print,"<type or symbol>",
+ "Display the type, or the address for a symbol", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("sizeof", kdb_debuginfo_print, "<type>",
+ "Display the size of a structure, typedef, etc.", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("walk", kdb_walk, "",
+ "Walk a linked list (see 'walkhelp')", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("walkhelp", kdb_walkhelp, "",
+ "Display help for the walk command", 0, KDB_REPEAT_NONE);
+}
+
+/*
+ * The user has written to our "file"
+ * file: the /proc file
+ * buffer: user address of the data he is writing
+ * count: number of bytes in the user's buffer
+ */
+static int
+kdb_write_proc_filename(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ int ret_count;
+
+ /* our buffer is kdb_debug_info_filename[256] */
+ if (count > 256) {
+ return 0;
+ }
+ if (copy_from_user(kdb_debug_info_filename, buffer, count)) {
+ return 0;
+ }
+ ret_count = count; /* actual count */
+ /* remove any newline from the end of the file name */
+ if (kdb_debug_info_filename[count-1] == '\n') count--;
+ kdb_debug_info_filename[count] = '\0';
+
+ return ret_count;
+}
+
+/*
+ * The user is reading from our "file"
+ * page: the beginning of the user's buffer
+ * start: pointer to the user's pointer (tells him where we put the data)
+ * off: offset into the resource to be read
+ * count: length of the read
+ */
+static int
+kdb_read_proc_filename(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ /* give him kdb_debug_info_filename[]; */
+ return snprintf(page, count, "%s\n", kdb_debug_info_filename);
+}
+
+/*
+ * kdb_proc_filename
+ *
+ * create /proc/kdb/debug_info_name
+ */
+static void
+kdb_proc_filename(void)
+{
+ struct proc_dir_entry *kdb_dir_entry, *kdb_file_entry;
+
+ /* create /proc/kdb */
+ kdb_dir_entry = proc_mkdir("kdb", NULL);
+ if (!kdb_dir_entry) {
+ printk ("kdb could not create /proc/kdb\n");
+ return;
+ }
+
+ /* read/write by owner (root) only */
+ kdb_file_entry = create_proc_entry("debug_info_name",
+ S_IRUSR | S_IWUSR, kdb_dir_entry);
+ if (!kdb_file_entry) {
+ printk ("kdb could not create /proc/kdb/kdb_dir_entry\n");
+ return;
+ }
+ kdb_file_entry->nlink = 1;
+ kdb_file_entry->data = (void *)NULL;
+ kdb_file_entry->read_proc = kdb_read_proc_filename;
+ kdb_file_entry->write_proc = kdb_write_proc_filename;
+ return;
+}
+
+/*
+ * kdb_cmd_init
+ *
+ * This function is called by the kdb_init function to execute any
+ * commands defined in kdb_cmds.
+ *
+ * Inputs:
+ * Commands in *kdb_cmds[];
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ */
+
+static void __init
+kdb_cmd_init(void)
+{
+ int i, diag;
+ for (i = 0; kdb_cmds[i]; ++i) {
+ if (!defcmd_in_progress)
+ if (console_loglevel >= 6 /* KERN_INFO */)
+ kdb_printf("kdb_cmd[%d]: %s", i, kdb_cmds[i]);
+ diag = kdb_parse(kdb_cmds[i]);
+ if (diag)
+ kdb_printf("kdb command %s failed, kdb diag %d\n",
+ kdb_cmds[i], diag);
+ }
+ if (defcmd_in_progress) {
+ kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
+ kdb_parse("endefcmd");
+ }
+}
+
+/*
+ * kdb_panic
+ *
+ * Invoked via the panic_notifier_list.
+ *
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero.
+ * Locking:
+ * None.
+ * Remarks:
+ * When this function is called from panic(), the other cpus have already
+ * been stopped.
+ *
+ */
+
+static int
+kdb_panic(struct notifier_block *self, unsigned long command, void *ptr)
+{
+ KDB_FLAG_SET(CATASTROPHIC); /* kernel state is dubious now */
+ KDB_ENTER();
+ return 0;
+}
+
+static struct notifier_block kdb_block = { kdb_panic, NULL, 0 };
+
+#ifdef CONFIG_SYSCTL
- static int proc_do_kdb(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
++static int proc_do_kdb(ctl_table *table, int write, void __user *buffer,
++ size_t *lenp, loff_t *ppos)
+{
+ if (KDB_FLAG(NO_CONSOLE) && write) {
+ printk(KERN_ERR "kdb has no working console and has switched itself off\n");
+ return -EINVAL;
+ }
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
++ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
+static ctl_table kdb_kern_table[] = {
+ {
+ .ctl_name = KERN_KDB,
+ .procname = "kdb",
+ .data = &kdb_on,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_kdb,
+ .strategy = &sysctl_intvec,
+ },
+ {}
+};
+
+static ctl_table kdb_root_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdb_kern_table,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static int
+kdb_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ if (action == CPU_ONLINE) {
+ int cpu =(unsigned long)hcpu;
+ cpumask_t save_cpus_allowed = current->cpus_allowed;
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on this cpu */
+ set_cpus_allowed_ptr(current, &save_cpus_allowed);
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kdb_cpu_nfb = {
+ .notifier_call = kdb_cpu_callback
+};
+
+/*
+ * kdb_init
+ *
+ * Initialize the kernel debugger environment.
+ *
+ * Parameters:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ * None.
+ */
+
+void __init
+kdb_init(void)
+{
+ kdb_initial_cpu = smp_processor_id();
+ /*
+ * This must be called before any calls to kdb_printf.
+ */
+ kdb_io_init();
+
+ kdb_inittab(); /* Initialize Command Table */
+ kdb_initbptab(); /* Initialize Breakpoint Table */
+ kdb_id_init(); /* Initialize Disassembler */
+ kdba_init(); /* Architecture Dependent Initialization */
+
+ /*
+ * Use printk() to get message in log_buf[];
+ */
+ printk("kdb version %d.%d%s by Keith Owens, Scott Lurndal. "\
+ "Copyright SGI, All Rights Reserved\n",
+ KDB_MAJOR_VERSION, KDB_MINOR_VERSION, KDB_TEST_VERSION);
+
+ kdb_cmd_init(); /* Preset commands from kdb_cmds */
+ kdb_initial_cpu = -1; /* Avoid recursion problems */
+ kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on boot cpu */
+ kdb_initial_cpu = smp_processor_id();
+ atomic_notifier_chain_register(&panic_notifier_list, &kdb_block);
+ register_cpu_notifier(&kdb_cpu_nfb);
+
+#ifdef kdba_setjmp
+ kdbjmpbuf = vmalloc(NR_CPUS * sizeof(*kdbjmpbuf));
+ if (!kdbjmpbuf)
+ printk(KERN_ERR "Cannot allocate kdbjmpbuf, no kdb recovery will be possible\n");
+#endif /* kdba_setjmp */
+
+ kdb_initial_cpu = -1;
+ kdb_wait_for_cpus_secs = max(10, 2*num_online_cpus());
+}
+
+#ifdef CONFIG_SYSCTL
+static int __init
+kdb_late_init(void)
+{
+ register_sysctl_table(kdb_root_table);
+ /* seems that we cannot allocate with kmalloc until now */
+ kdb_proc_filename();
+ return 0;
+}
+
+__initcall(kdb_late_init);
+#endif
+
+EXPORT_SYMBOL(kdb_register);
+EXPORT_SYMBOL(kdb_register_repeat);
+EXPORT_SYMBOL(kdb_unregister);
+EXPORT_SYMBOL(kdb_getarea_size);
+EXPORT_SYMBOL(kdb_putarea_size);
+EXPORT_SYMBOL(kdb_getuserarea_size);
+EXPORT_SYMBOL(kdb_putuserarea_size);
+EXPORT_SYMBOL(kdbgetularg);
+EXPORT_SYMBOL(kdbgetenv);
+EXPORT_SYMBOL(kdbgetintenv);
+EXPORT_SYMBOL(kdbgetaddrarg);
+EXPORT_SYMBOL(kdb);
+EXPORT_SYMBOL(kdb_on);
+EXPORT_SYMBOL(kdb_seqno);
+EXPORT_SYMBOL(kdb_initial_cpu);
+EXPORT_SYMBOL(kdbnearsym);
+EXPORT_SYMBOL(kdb_printf);
+EXPORT_SYMBOL(kdb_symbol_print);
+EXPORT_SYMBOL(kdb_running_process);
--- /dev/null
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+#
+
+obj-$(CONFIG_KDB_MODULES) += kdbm_pg.o kdbm_task.o kdbm_vm.o kdbm_sched.o
+obj-m += kdbm_debugtypes.o
+ifdef CONFIG_X86
+obj-$(CONFIG_KDB_MODULES) += kdbm_x86.o
+endif
++CFLAGS_kdbm_vm.o += -I $(srctree)/drivers/scsi
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug page information");
+MODULE_LICENSE("GPL");
+
+/* Standard Linux page stuff */
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+/* From include/linux/page-flags.h */
+static char *pg_flag_vals[] = {
+ "PG_locked", "PG_error", "PG_referenced", "PG_uptodate",
+ "PG_dirty", "PG_lru", "PG_active", "PG_slab",
+ "PG_owner_priv_1", "PG_arch_1", "PG_reserved", "PG_private",
+ "PG_writeback",
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ "PG_head", "PG_tail",
+#else
+ "PG_compound",
+#endif
+ "PG_swapcache", "PG_mappedtodisk", "PG_reclaim", "PG_buddy",
+#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+ "PG_uncached",
+#endif
+ NULL };
+#endif
+
+/* From include/linux/buffer_head.h */
+static char *bh_state_vals[] = {
+ "Uptodate", "Dirty", "Lock", "Req",
+ "Uptodate_Lock", "Mapped", "New", "Async_read",
+ "Async_write", "Delay", "Boundary", "Write_EIO",
+ "Ordered", "Eopnotsupp", "Unwritten", "PriavateStart",
+ NULL };
+
+/* From include/linux/bio.h */
+static char *bio_flag_vals[] = {
+ "Uptodate", "RW_block", "EOF", "Seg_valid",
+ "Cloned", "Bounced", "User_mapped", "Eopnotsupp",
+ NULL };
+
+/* From include/linux/fs.h */
+static char *inode_flag_vals[] = {
+ "I_DIRTY_SYNC", "I_DIRTY_DATASYNC", "I_DIRTY_PAGES", "I_NEW",
+ "I_WILL_FREE", "I_FREEING", "I_CLEAR", "I_LOCK",
+ "I_SYNC", NULL };
+
+static char *map_flags(unsigned long flags, char *mapping[])
+{
+ static char buffer[256];
+ int index;
+ int offset = 12;
+
+ buffer[0] = '\0';
+
+ for (index = 0; flags && mapping[index]; flags >>= 1, index++) {
+ if (flags & 1) {
+ if ((offset + strlen(mapping[index]) + 1) >= 80) {
+ strcat(buffer, "\n ");
+ offset = 12;
+ } else if (offset > 12) {
+ strcat(buffer, " ");
+ offset++;
+ }
+ strcat(buffer, mapping[index]);
+ offset += strlen(mapping[index]);
+ }
+ }
+
+ return (buffer);
+}
+
+static int
+kdbm_buffers(int argc, const char **argv)
+{
+ struct buffer_head bh;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(bh, addr)))
+ return(diag);
+
+ kdb_printf("buffer_head at 0x%lx\n", addr);
+ kdb_printf(" bno %llu size %llu dev 0x%x\n",
+ (unsigned long long)bh.b_blocknr,
+ (unsigned long long)bh.b_size,
+ bh.b_bdev ? bh.b_bdev->bd_dev : 0);
+ kdb_printf(" count %d state 0x%lx [%s]\n",
+ bh.b_count.counter, bh.b_state,
+ map_flags(bh.b_state, bh_state_vals));
+ kdb_printf(" b_data 0x%p\n",
+ bh.b_data);
+ kdb_printf(" b_page 0x%p b_this_page 0x%p b_private 0x%p\n",
+ bh.b_page, bh.b_this_page, bh.b_private);
+ kdb_printf(" b_end_io ");
+ if (bh.b_end_io)
+ kdb_symbol_print(kdba_funcptr_value(bh.b_end_io), NULL, KDB_SP_VALUE);
+ else
+ kdb_printf("(NULL)");
+ kdb_printf("\n");
+
+ return 0;
+}
+
+static int
+print_biovec(struct bio_vec *vec, int vcount)
+{
+ struct bio_vec bvec;
+ unsigned long addr;
+ int diag;
+ int i;
+
+ if (vcount < 1 || vcount > BIO_MAX_PAGES) {
+ kdb_printf(" [skipped iovecs, vcnt is %d]\n", vcount);
+ return 0;
+ }
+
+ addr = (unsigned long)vec;
+ for (i = 0; i < vcount; i++) {
+ if ((diag = kdb_getarea(bvec, addr)))
+ return(diag);
+ addr += sizeof(bvec);
+ kdb_printf(" [%d] page 0x%p length=%u offset=%u\n",
+ i, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
+ }
+ return 0;
+}
+
+static int
+kdbm_bio(int argc, const char **argv)
+{
+ struct bio bio;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(bio, addr)))
+ return(diag);
+
+ kdb_printf("bio at 0x%lx\n", addr);
+ kdb_printf(" bno %llu next 0x%p dev 0x%x\n",
+ (unsigned long long)bio.bi_sector,
+ bio.bi_next, bio.bi_bdev ? bio.bi_bdev->bd_dev : 0);
+ kdb_printf(" vcnt %u vec 0x%p rw 0x%lx flags 0x%lx [%s]\n",
+ bio.bi_vcnt, bio.bi_io_vec, bio.bi_rw, bio.bi_flags,
+ map_flags(bio.bi_flags, bio_flag_vals));
+ print_biovec(bio.bi_io_vec, bio.bi_vcnt);
+ kdb_printf(" count %d private 0x%p\n",
+ atomic_read(&bio.bi_cnt), bio.bi_private);
+ kdb_printf(" bi_end_io ");
+ if (bio.bi_end_io)
+ kdb_symbol_print(kdba_funcptr_value(bio.bi_end_io), NULL, KDB_SP_VALUE);
+ else
+ kdb_printf("(NULL)");
+ kdb_printf("\n");
+
+ return 0;
+}
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+static char *page_flags(unsigned long flags)
+{
+ return(map_flags(flags, pg_flag_vals));
+}
+
+static int
+kdbm_page(int argc, const char **argv)
+{
+ struct page page;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+#ifdef __ia64__
+ if (rgn_index(addr) == 0)
+ addr = (unsigned long) &mem_map[addr]; /* assume region 0 is a page index, not an address */
+#else
+ if (addr < PAGE_OFFSET)
+ addr = (unsigned long) &mem_map[addr];
+#endif
+
+ if ((diag = kdb_getarea(page, addr)))
+ return(diag);
+
+ kdb_printf("struct page at 0x%lx\n", addr);
+ kdb_printf(" addr space 0x%p index %lu (offset 0x%llx)\n",
+ page.mapping, page.index,
+ (unsigned long long)page.index << PAGE_CACHE_SHIFT);
+ kdb_printf(" count %d flags %s\n",
+ page._count.counter, page_flags(page.flags));
+ kdb_printf(" virtual 0x%p\n", page_address((struct page *)addr));
+ if (page_has_buffers(&page))
+ kdb_printf(" buffers 0x%p\n", page_buffers(&page));
+ else
+ kdb_printf(" private 0x%lx\n", page_private(&page));
+
+ return 0;
+}
+#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
+
+static unsigned long
+print_request(unsigned long addr)
+{
+ struct request rq;
+
+ if (kdb_getarea(rq, addr))
+ return(0);
+
+ kdb_printf("struct request at 0x%lx\n", addr);
- kdb_printf(" errors %d sector %llu nr_sectors %u\n",
- rq.errors, (unsigned long long)blk_rq_pos(&rq),
- blk_rq_sectors(&rq));
-
- kdb_printf(" nrseg %u u currnrsect %u\n",
- rq.nr_phys_segments, blk_rq_cur_sectors(&rq));
++ kdb_printf(" errors %d sector %llu nr_sectors %lu\n",
++ rq.errors, blk_rq_pos(&rq), blk_rq_sectors(&rq));
+
+ return (unsigned long) rq.queuelist.next;
+}
+
+static int
+kdbm_request(int argc, const char **argv)
+{
+ long offset = 0;
+ unsigned long addr;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ print_request(addr);
+ return 0;
+}
+
+
+static int
+kdbm_rqueue(int argc, const char **argv)
+{
+ struct request_queue rq;
+ unsigned long addr, head_addr, next;
+ long offset = 0;
+ int nextarg;
+ int i, diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(rq, addr)))
+ return(diag);
+
+ kdb_printf("struct request_queue at 0x%lx\n", addr);
+ i = 0;
+ next = (unsigned long)rq.queue_head.next;
+ head_addr = addr + offsetof(struct request_queue, queue_head);
+ kdb_printf(" request queue: %s\n", next == head_addr ?
+ "empty" : "");
+ while (next != head_addr) {
+ i++;
+ next = print_request(next);
+ }
+
+ if (i)
+ kdb_printf("%d requests found\n", i);
+
+ return 0;
+}
+
+
+static void
+do_buffer(unsigned long addr)
+{
+ struct buffer_head bh;
+
+ if (kdb_getarea(bh, addr))
+ return;
+
+ kdb_printf("\tbh 0x%lx bno %8llu [%s]\n", addr,
+ (unsigned long long)bh.b_blocknr,
+ map_flags(bh.b_state, bh_state_vals));
+}
+
+static void
+kdbm_show_page(struct page *page, int first)
+{
+ if (first)
+ kdb_printf("page_struct index cnt zone nid flags\n");
+ kdb_printf("%p%s %6lu %5d %3d %3d 0x%lx",
+ page_address(page), sizeof(void *) == 4 ? " " : "",
+ page->index, atomic_read(&(page->_count)),
+ page_zonenum(page), page_to_nid(page),
+ page->flags & (~0UL >> ZONES_SHIFT));
+#define kdb_page_flags(page, type) if (Page ## type(page)) kdb_printf(" " #type);
+ kdb_page_flags(page, Locked);
+ kdb_page_flags(page, Error);
+ kdb_page_flags(page, Referenced);
+ kdb_page_flags(page, Uptodate);
+ kdb_page_flags(page, Dirty);
+ kdb_page_flags(page, LRU);
+ kdb_page_flags(page, Active);
+ kdb_page_flags(page, Slab);
+ kdb_page_flags(page, Checked);
+ if (page->flags & (1UL << PG_arch_1))
+ kdb_printf(" arch_1");
+ kdb_page_flags(page, Reserved);
+ kdb_page_flags(page, Private);
+ kdb_page_flags(page, Writeback);
+ kdb_page_flags(page, Compound);
+ kdb_page_flags(page, SwapCache);
+ kdb_page_flags(page, MappedToDisk);
+ kdb_page_flags(page, Reclaim);
+ kdb_page_flags(page, Buddy);
+
+ /* PageHighMem is not a flag any more, but treat it as one */
+ kdb_page_flags(page, HighMem);
+
+ if (page_has_buffers(page)) {
+ struct buffer_head *head, *bh;
+ kdb_printf("\n");
+ head = bh = page_buffers(page);
+ do {
+ do_buffer((unsigned long) bh);
+ } while ((bh = bh->b_this_page) != head);
+ } else if (page_private(page)) {
+ kdb_printf(" private= 0x%lx", page_private(page));
+ }
+ /* Cannot use page_mapping(page) here, it needs swapper_space which is
+ * not exported.
+ */
+ if (page->mapping)
+ kdb_printf(" mapping= %p", page->mapping);
+ kdb_printf("\n");
+#undef kdb_page_flags
+}
+
+static int
+kdbm_inode_pages(int argc, const char **argv)
+{
+ struct inode *inode = NULL;
+ struct address_space *ap = NULL;
+ unsigned long addr, addr1 = 0;
+ long offset = 0;
+ int nextarg;
+ int diag;
+ pgoff_t next = 0;
+ struct page *page;
+ int first;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ goto out;
+
+ if (argc == 2) {
+ nextarg = 2;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr1,
+ &offset, NULL);
+ if (diag)
+ goto out;
+ kdb_printf("Looking for page index 0x%lx ... \n", addr1);
+ next = addr1;
+ }
+
+ if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode_pages: cannot kmalloc inode\n");
+ goto out;
+ }
+ if (!(ap = kmalloc(sizeof(*ap), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode_pages: cannot kmalloc ap\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*inode, addr)))
+ goto out;
+ if (!inode->i_mapping) {
+ kdb_printf("inode has no mapping\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*ap, (unsigned long) inode->i_mapping)))
+ goto out;
+
+ /* Run the pages in the radix tree, printing the state of each page */
+ first = 1;
+ while (radix_tree_gang_lookup(&ap->page_tree, (void **)&page, next, 1)) {
+ kdbm_show_page(page, first);
+ if (addr1)
+ break;
+ first = 0;
+ next = page->index + 1;
+ }
+
+out:
+ if (inode)
+ kfree(inode);
+ if (ap)
+ kfree(ap);
+ return diag;
+}
+
+static int
+kdbm_inode(int argc, const char **argv)
+{
+ struct inode *inode = NULL;
+ unsigned long addr;
+ unsigned char *iaddr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode: cannot kmalloc inode\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*inode, addr)))
+ goto out;
+
+ kdb_printf("struct inode at 0x%lx\n", addr);
+
+ kdb_printf(" i_ino = %lu i_count = %u i_size %Ld\n",
+ inode->i_ino, atomic_read(&inode->i_count),
+ inode->i_size);
+
+ kdb_printf(" i_mode = 0%o i_nlink = %d i_rdev = 0x%x\n",
+ inode->i_mode, inode->i_nlink,
+ inode->i_rdev);
+
+ kdb_printf(" i_hash.nxt = 0x%p i_hash.pprev = 0x%p\n",
+ inode->i_hash.next,
+ inode->i_hash.pprev);
+
+ kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
+ list_entry(inode->i_list.next, struct inode, i_list),
+ list_entry(inode->i_list.prev, struct inode, i_list));
+
+ kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
+ list_entry(inode->i_dentry.next, struct dentry, d_alias),
+ list_entry(inode->i_dentry.prev, struct dentry, d_alias));
+
+ kdb_printf(" i_sb = 0x%p i_op = 0x%p i_data = 0x%lx nrpages = %lu\n",
+ inode->i_sb, inode->i_op,
+ addr + offsetof(struct inode, i_data),
+ inode->i_data.nrpages);
+ kdb_printf(" i_fop= 0x%p i_flock = 0x%p i_mapping = 0x%p\n",
+ inode->i_fop, inode->i_flock, inode->i_mapping);
+
+ kdb_printf(" i_flags 0x%x i_state 0x%lx [%s]",
+ inode->i_flags, inode->i_state,
+ map_flags(inode->i_state, inode_flag_vals));
+
+ iaddr = (char *)addr;
+ iaddr += offsetof(struct inode, i_private);
+
+ kdb_printf(" fs specific info @ 0x%p\n", iaddr);
+out:
+ if (inode)
+ kfree(inode);
+ return diag;
+}
+
+static int
+kdbm_sb(int argc, const char **argv)
+{
+ struct super_block *sb = NULL;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sb = kmalloc(sizeof(*sb), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sb: cannot kmalloc sb\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sb, addr)))
+ goto out;
+
+ kdb_printf("struct super_block at 0x%lx\n", addr);
+ kdb_printf(" s_dev 0x%x blocksize 0x%lx\n", sb->s_dev, sb->s_blocksize);
+ kdb_printf(" s_flags 0x%lx s_root 0x%p\n", sb->s_flags, sb->s_root);
- kdb_printf(" s_dirt %d s_dirty.next 0x%p s_dirty.prev 0x%p\n",
- sb->s_dirt, sb->s_dirty.next, sb->s_dirty.prev);
- kdb_printf(" s_frozen %d s_id [%s]\n", sb->s_frozen, sb->s_id);
++ kdb_printf(" s_dirt %d s_frozen %d s_id [%s]\n",
++ sb->s_dirt, sb->s_frozen, sb->s_id);
+out:
+ if (sb)
+ kfree(sb);
+ return diag;
+}
+
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+/* According to Steve Lord, this code is ix86 specific. Patches to extend it to
+ * other architectures will be greatefully accepted.
+ */
+static int
+kdbm_memmap(int argc, const char **argv)
+{
+ struct page page;
+ int i, page_count;
+ int slab_count = 0;
+ int dirty_count = 0;
+ int locked_count = 0;
+ int page_counts[10]; /* [8] = large counts, [9] = -1 counts */
+ int buffered_count = 0;
+#ifdef buffer_delay
+ int delay_count = 0;
+#endif
+ int diag;
+ unsigned long addr;
+#ifdef CONFIG_DISCONTIGMEM
+ int node_id = -1, found_node = 0;
+ int tot_page_count = 0;
+ unsigned long unode_id;
+ pg_data_t *pgdat;
+
+ if (argc == 1) { /* node_id was specified */
+ diag = kdbgetularg(argv[argc], &unode_id);
+ if (diag)
+ return diag;
+ node_id = (int)unode_id;
+ }
+ else if (argc)
+ return KDB_ARGCOUNT;
+
+ tot_page_count = 0;
+ memset(page_counts, 0, sizeof(page_counts));
+
+ for_each_online_pgdat(pgdat) {
+ if ((node_id != -1) && (pgdat->node_id != node_id))
+ continue;
+ found_node = 1;
+ addr = (unsigned long)pgdat->node_mem_map;
+ page_count = pgdat->node_spanned_pages;
+ tot_page_count += page_count;
+#else
+ addr = (unsigned long)mem_map;
+ page_count = max_mapnr;
+ memset(page_counts, 0, sizeof(page_counts));
+#endif
+ for (i = 0; i < page_count; i++) {
+ if ((diag = kdb_getarea(page, addr)))
+ return(diag);
+ addr += sizeof(page);
+
+ if (PageSlab(&page))
+ slab_count++;
+ if (PageDirty(&page))
+ dirty_count++;
+ if (PageLocked(&page))
+ locked_count++;
+ if (page._count.counter == -1)
+ page_counts[9]++;
+ else if (page._count.counter < 8)
+ page_counts[page._count.counter]++;
+ else
+ page_counts[8]++;
+ if (page_has_buffers(&page)) {
+ buffered_count++;
+#ifdef buffer_delay
+ if (buffer_delay(page.buffers))
+ delay_count++;
+#endif
+ }
+ }
+#ifdef CONFIG_DISCONTIGMEM
+ }
+ page_count = tot_page_count;
+ if (node_id != -1) {
+ if (!found_node) {
+ kdb_printf("Node %d does not exist.\n", node_id);
+ return 0;
+ }
+ kdb_printf("Node %d pages:\n", node_id);
+ }
+#endif
+ kdb_printf(" Total pages: %6d\n", page_count);
+ kdb_printf(" Slab pages: %6d\n", slab_count);
+ kdb_printf(" Dirty pages: %6d\n", dirty_count);
+ kdb_printf(" Locked pages: %6d\n", locked_count);
+ kdb_printf(" Buffer pages: %6d\n", buffered_count);
+#ifdef buffer_delay
+ kdb_printf(" Delalloc pages: %6d\n", delay_count);
+#endif
+ kdb_printf(" -1 page count: %6d\n", page_counts[9]);
+ for (i = 0; i < 8; i++) {
+ kdb_printf(" %d page count: %6d\n",
+ i, page_counts[i]);
+ }
+ kdb_printf(" high page count: %6d\n", page_counts[8]);
+ return 0;
+}
+#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
+
+static int __init kdbm_pg_init(void)
+{
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_register("page", kdbm_page, "<vaddr>", "Display page", 0);
+#endif
+ kdb_register("inode", kdbm_inode, "<vaddr>", "Display inode", 0);
+ kdb_register("sb", kdbm_sb, "<vaddr>", "Display super_block", 0);
+ kdb_register("bh", kdbm_buffers, "<buffer head address>", "Display buffer", 0);
+ kdb_register("bio", kdbm_bio, "<bio address>", "Display bio", 0);
+ kdb_register("inode_pages", kdbm_inode_pages, "<inode *>", "Display pages in an inode", 0);
+ kdb_register("req", kdbm_request, "<vaddr>", "dump request struct", 0);
+ kdb_register("rqueue", kdbm_rqueue, "<vaddr>", "dump request queue", 0);
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_register("memmap", kdbm_memmap, "", "page table summary", 0);
+#endif
+
+ return 0;
+}
+
+
+static void __exit kdbm_pg_exit(void)
+{
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_unregister("page");
+#endif
+ kdb_unregister("inode");
+ kdb_unregister("sb");
+ kdb_unregister("bh");
+ kdb_unregister("bio");
+ kdb_unregister("inode_pages");
+ kdb_unregister("req");
+ kdb_unregister("rqueue");
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_unregister("memmap");
+#endif
+}
+
+module_init(kdbm_pg_init)
+module_exit(kdbm_pg_exit)
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/signal.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug struct task and sigset information");
+MODULE_LICENSE("GPL");
+
+static char *
+kdb_cpus_allowed_string(struct task_struct *tp)
+{
+ static char maskbuf[NR_CPUS * 8];
+ if (cpus_equal(tp->cpus_allowed, cpu_online_map))
+ strcpy(maskbuf, "ALL");
- else if (cpus_full(tp->cpus_allowed))
- strcpy(maskbuf, "ALL(NR_CPUS)");
+ else if (cpus_empty(tp->cpus_allowed))
+ strcpy(maskbuf, "NONE");
+ else if (cpus_weight(tp->cpus_allowed) == 1)
+ snprintf(maskbuf, sizeof(maskbuf), "ONLY(%d)", first_cpu(tp->cpus_allowed));
+ else
+ cpulist_scnprintf(maskbuf, sizeof(maskbuf), &tp->cpus_allowed);
+ return maskbuf;
+}
+
+static int
+kdbm_task(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset=0;
+ int nextarg;
+ int e = 0;
+ struct task_struct *tp = NULL, *tp1;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ return(e);
+
+ if (!(tp = kmalloc(sizeof(*tp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc tp\n", __FUNCTION__);
+ goto out;
+ }
+ if ((e = kdb_getarea(*tp, addr))) {
+ kdb_printf("%s: invalid task address\n", __FUNCTION__);
+ goto out;
+ }
+
+ tp1 = (struct task_struct *)addr;
+ kdb_printf(
+ "struct task at 0x%lx, pid=%d flags=0x%x state=%ld comm=\"%s\"\n",
+ addr, tp->pid, tp->flags, tp->state, tp->comm);
+
+ kdb_printf(" cpu=%d policy=%u ", kdb_process_cpu(tp), tp->policy);
+ kdb_printf(
+ "prio=%d static_prio=%d cpus_allowed=",
+ tp->prio, tp->static_prio);
+ {
+ /* The cpus allowed string may be longer than kdb_printf() can
+ * handle. Print it in chunks.
+ */
+ char c, *p;
+ p = kdb_cpus_allowed_string(tp);
+ while (1) {
+ if (strlen(p) < 100) {
+ kdb_printf("%s", p);
+ break;
+ }
+ c = p[100];
+ p[100] = '\0';
+ kdb_printf("%s", p);
+ p[100] = c;
+ p += 100;
+ }
+ }
+ kdb_printf(" &thread=0x%p\n", &tp1->thread);
+
+ kdb_printf(" need_resched=%d ",
+ test_tsk_thread_flag(tp, TIF_NEED_RESCHED));
+ kdb_printf(
+ "time_slice=%u",
+ tp->rt.time_slice);
+ kdb_printf(" lock_depth=%d\n", tp->lock_depth);
+
+ kdb_printf(
+ " fs=0x%p files=0x%p mm=0x%p\n",
+ tp->fs, tp->files, tp->mm);
+
+ if (tp->sysvsem.undo_list)
+ kdb_printf(
+ " sysvsem.sem_undo refcnt %d list_proc=0x%p\n",
+ atomic_read(&tp->sysvsem.undo_list->refcnt),
+ &tp->sysvsem.undo_list->list_proc);
+
+ kdb_printf(
+ " signal=0x%p &blocked=0x%p &pending=0x%p\n",
+ tp->signal, &tp1->blocked, &tp1->pending);
+
+ kdb_printf(
+ " utime=%ld stime=%ld cutime=%ld cstime=%ld\n",
+ tp->utime, tp->stime,
+ tp->signal ? tp->signal->cutime : 0L,
+ tp->signal ? tp->signal->cstime : 0L);
+
+ kdb_printf(" thread_info=0x%p\n", task_thread_info(tp));
+ kdb_printf(" ti flags=0x%lx\n", (unsigned long)task_thread_info(tp)->flags);
+
+#ifdef CONFIG_NUMA
+ kdb_printf(
+ " mempolicy=0x%p il_next=%d\n",
+ tp->mempolicy, tp->il_next);
+#endif
+
+out:
+ if (tp)
+ kfree(tp);
+ return e;
+}
+
+static int
+kdbm_sigset(int argc, const char **argv)
+{
+ sigset_t *sp = NULL;
+ unsigned long addr;
+ long offset=0;
+ int nextarg;
+ int e = 0;
+ int i;
+ char fmt[32];
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+#ifndef _NSIG_WORDS
+ kdb_printf("unavailable on this platform, _NSIG_WORDS not defined.\n");
+#else
+ nextarg = 1;
+ if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ return(e);
+
+ if (!(sp = kmalloc(sizeof(*sp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc sp\n", __FUNCTION__);
+ goto out;
+ }
+ if ((e = kdb_getarea(*sp, addr))) {
+ kdb_printf("%s: invalid sigset address\n", __FUNCTION__);
+ goto out;
+ }
+
+ sprintf(fmt, "[%%d]=0x%%0%dlx ", (int)sizeof(sp->sig[0])*2);
+ kdb_printf("sigset at 0x%p : ", sp);
+ for (i=_NSIG_WORDS-1; i >= 0; i--) {
+ if (i == 0 || sp->sig[i]) {
+ kdb_printf(fmt, i, sp->sig[i]);
+ }
+ }
+ kdb_printf("\n");
+#endif /* _NSIG_WORDS */
+
+out:
+ if (sp)
+ kfree(sp);
+ return e;
+}
+
+static int __init kdbm_task_init(void)
+{
+ kdb_register("task", kdbm_task, "<vaddr>", "Display task_struct", 0);
+ kdb_register("sigset", kdbm_sigset, "<vaddr>", "Display sigset_t", 0);
+
+ return 0;
+}
+
+static void __exit kdbm_task_exit(void)
+{
+ kdb_unregister("task");
+ kdb_unregister("sigset");
+}
+
+module_init(kdbm_task_init)
+module_exit(kdbm_task_exit)
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
- #include <scsi/scsi_device.h>
- #include <scsi/scsi_cmnd.h>
++#include <scsi.h>
+#include <scsi/scsi_host.h>
+#include <asm/pgtable.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug VM information");
+MODULE_LICENSE("GPL");
+
+struct __vmflags {
+ unsigned long mask;
+ char *name;
+};
+
+static struct __vmflags vmflags[] = {
+ { VM_READ, "VM_READ " },
+ { VM_WRITE, "VM_WRITE " },
+ { VM_EXEC, "VM_EXEC " },
+ { VM_SHARED, "VM_SHARED " },
+ { VM_MAYREAD, "VM_MAYREAD " },
+ { VM_MAYWRITE, "VM_MAYWRITE " },
+ { VM_MAYEXEC, "VM_MAYEXEC " },
+ { VM_MAYSHARE, "VM_MAYSHARE " },
+ { VM_GROWSDOWN, "VM_GROWSDOWN " },
+ { VM_GROWSUP, "VM_GROWSUP " },
+ { VM_PFNMAP, "VM_PFNMAP " },
+ { VM_DENYWRITE, "VM_DENYWRITE " },
+ { VM_EXECUTABLE, "VM_EXECUTABLE " },
+ { VM_LOCKED, "VM_LOCKED " },
+ { VM_IO, "VM_IO " },
+ { VM_SEQ_READ, "VM_SEQ_READ " },
+ { VM_RAND_READ, "VM_RAND_READ " },
+ { VM_DONTCOPY, "VM_DONTCOPY " },
+ { VM_DONTEXPAND, "VM_DONTEXPAND " },
+ { VM_RESERVED, "VM_RESERVED " },
+ { VM_ACCOUNT, "VM_ACCOUNT " },
+ { VM_HUGETLB, "VM_HUGETLB " },
+ { VM_NONLINEAR, "VM_NONLINEAR " },
+ { VM_MAPPED_COPY, "VM_MAPPED_COPY " },
+ { VM_INSERTPAGE, "VM_INSERTPAGE " },
+ { 0, "" }
+};
+
+static int
+kdbm_print_vm(struct vm_area_struct *vp, unsigned long addr, int verbose_flg)
+{
+ struct __vmflags *tp;
+
+ kdb_printf("struct vm_area_struct at 0x%lx for %d bytes\n",
+ addr, (int) sizeof (struct vm_area_struct));
+
+ kdb_printf("vm_start = 0x%p vm_end = 0x%p\n", (void *) vp->vm_start,
+ (void *) vp->vm_end);
+ kdb_printf("vm_page_prot = 0x%llx\n",
+ (unsigned long long)pgprot_val(vp->vm_page_prot));
+
+ kdb_printf("vm_flags: ");
+ for (tp = vmflags; tp->mask; tp++) {
+ if (vp->vm_flags & tp->mask) {
+ kdb_printf(" %s", tp->name);
+ }
+ }
+ kdb_printf("\n");
+
+ if (!verbose_flg)
+ return 0;
+
+ kdb_printf("vm_mm = 0x%p\n", (void *) vp->vm_mm);
+ kdb_printf("vm_next = 0x%p\n", (void *) vp->vm_next);
+ kdb_printf("shared.vm_set.list.next = 0x%p\n", (void *) vp->shared.vm_set.list.next);
+ kdb_printf("shared.vm_set.list.prev = 0x%p\n", (void *) vp->shared.vm_set.list.prev);
+ kdb_printf("shared.vm_set.parent = 0x%p\n", (void *) vp->shared.vm_set.parent);
+ kdb_printf("shared.vm_set.head = 0x%p\n", (void *) vp->shared.vm_set.head);
+ kdb_printf("anon_vma_node.next = 0x%p\n", (void *) vp->anon_vma_node.next);
+ kdb_printf("anon_vma_node.prev = 0x%p\n", (void *) vp->anon_vma_node.prev);
+ kdb_printf("vm_ops = 0x%p\n", (void *) vp->vm_ops);
+ if (vp->vm_ops != NULL) {
+ kdb_printf("vm_ops->open = 0x%p\n", vp->vm_ops->open);
+ kdb_printf("vm_ops->close = 0x%p\n", vp->vm_ops->close);
+ kdb_printf("vm_ops->fault = 0x%p\n", vp->vm_ops->fault);
+#ifdef HAVE_VMOP_MPROTECT
+ kdb_printf("vm_ops->mprotect = 0x%p\n", vp->vm_ops->mprotect);
+#endif
+#ifdef CONFIG_NUMA
+ kdb_printf("vm_ops->set_policy = 0x%p\n", vp->vm_ops->set_policy);
+ kdb_printf("vm_ops->get_policy = 0x%p\n", vp->vm_ops->get_policy);
+#endif
+ }
+ kdb_printf("vm_pgoff = 0x%lx\n", vp->vm_pgoff);
+ kdb_printf("vm_file = 0x%p\n", (void *) vp->vm_file);
+ kdb_printf("vm_private_data = 0x%p\n", vp->vm_private_data);
+#ifdef CONFIG_NUMA
+ kdb_printf("vm_policy = 0x%p\n", vp->vm_policy);
+#endif
+
+ return 0;
+}
+
+static int
+kdbm_print_vmp(struct vm_area_struct *vp, int verbose_flg)
+{
+ struct __vmflags *tp;
+
+ if (verbose_flg) {
+ kdb_printf("0x%lx: ", (unsigned long) vp);
+ }
+
+ kdb_printf("0x%p 0x%p ", (void *) vp->vm_start, (void *) vp->vm_end);
+
+ for (tp = vmflags; tp->mask; tp++) {
+ if (vp->vm_flags & tp->mask) {
+ kdb_printf(" %s", tp->name);
+ }
+ }
+ kdb_printf("\n");
+
+ return 0;
+}
+
+
+#ifdef CONFIG_NUMA
+#include <linux/mempolicy.h>
+
+/*
+ * kdbm_mpol
+ *
+ * This function implements the 'mempolicy' command.
+ * Print a struct mempolicy.
+ *
+ * mempolicy <address> Print struct mempolicy at <address>
+ */
+static int
+kdbm_mpol(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int err = 0;
+ struct mempolicy *mp = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((err = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL)) != 0)
+ return(err);
+
+ if (!(mp = kmalloc(sizeof(*mp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc mp\n", __FUNCTION__);
+ goto out;
+ }
+
+ if ((err = kdb_getarea(*mp, addr))) {
+ kdb_printf("%s: invalid mempolicy address\n", __FUNCTION__);
+ goto out;
+ }
+
+ kdb_printf("struct mempolicy at 0x%p\n", (struct mempolicy *)addr);
+ kdb_printf(" refcnt %d\n", atomic_read(&mp->refcnt));
+
+ switch (mp->mode) {
+ case MPOL_DEFAULT:
+ kdb_printf(" mode %d (MPOL_DEFAULT)\n", mp->mode);
+ break;
+
+ case MPOL_PREFERRED:
+ kdb_printf(" mode %d (MPOL_PREFERRED)\n", mp->mode);
+ kdb_printf(" preferred_node %d\n", mp->v.preferred_node);
+ break;
+
+ case MPOL_BIND:
+ case MPOL_INTERLEAVE:
+ {
+ int i, nlongs;
+ unsigned long *longp;
+
+ kdb_printf(" mode %d (%s)\n", mp->mode,
+ mp->mode == MPOL_INTERLEAVE
+ ? "MPOL_INTERLEAVE"
+ : "MPOL_BIND");
+ nlongs = (int)BITS_TO_LONGS(MAX_NUMNODES);
+ kdb_printf(" nodes:");
+ longp = mp->v.nodes.bits;
+ for (i = 0; i < nlongs; i++, longp++)
+ kdb_printf(" 0x%lx ", *longp);
+ kdb_printf("\n");
+ break;
+ }
+
+ default:
+ kdb_printf(" mode %d (unknown)\n", mp->mode);
+ break;
+ }
+out:
+ if (mp)
+ kfree(mp);
+ return err;
+}
+
+#endif /* CONFIG_NUMA */
+
+/*
+ * kdbm_pgdat
+ *
+ * This function implements the 'pgdat' command.
+ * Print a struct pglist_data (pg_dat_t).
+ *
+ * pgdat <node_id> Print struct pglist_data for node <node_id>.
+ *
+ * Print pglist_data for node 0 if node_id not specified,
+ * or print the one pglist_data structure if !CONFIG_NUMA.
+ */
+static int
+kdbm_pgdat(int argc, const char **argv)
+{
+ int err = 0, node_id = 0, i;
+ pg_data_t *pgdatp = NULL;
+
+#ifdef CONFIG_NUMA
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+ if (argc == 1) {
+ int nextarg;
+ long offset = 0;
+ unsigned long node_id_ul;
+
+ nextarg = 1;
+ if ((err = kdbgetaddrarg(argc, argv, &nextarg, &node_id_ul,
+ &offset, NULL)) != 0) {
+ return(err);
+ }
+ node_id = (int)node_id_ul;
+ }
+#endif
+ for_each_online_pgdat(pgdatp) {
+ if (pgdatp->node_id == node_id)
+ break;
+ }
+ if (!pgdatp) {
+ kdb_printf("%s: specified node not found\n", __FUNCTION__);
+ return 0;
+ }
+ kdb_printf("struct pglist_data at 0x%p node_id = %d\n",
+ pgdatp, pgdatp->node_id);
+
+ for (i = 0; i < MAX_ZONELISTS; i++) {
+ int zr;
+ struct zoneref *zonerefp;
+ struct zone *zonep;
+
+ zonerefp = pgdatp->node_zonelists[i]._zonerefs;
+ kdb_printf(" _zonerefs[%d] at 0x%p\n", i, zonerefp);
+
+ for (zr = 0; zr <= MAX_ZONES_PER_ZONELIST; zr++, zonerefp++) {
+ int z;
+ pg_data_t *tmp_pgdatp;
+
+ zonep = zonelist_zone(zonerefp);
+ if (!zonep)
+ break;
+
+ kdb_printf(" 0x%p", zonep);
+
+ for_each_online_pgdat(tmp_pgdatp) {
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ if (zonep == &tmp_pgdatp->node_zones[z]) {
+ kdb_printf (" (node %d node_zones[%d])",
+ tmp_pgdatp->node_id, z);
+ break;
+ }
+ }
+ if (z != MAX_NR_ZONES)
+ break; /* found it */
+ }
+ kdb_printf("\n");
+ }
+ }
+
+ kdb_printf(" nr_zones = %d", pgdatp->nr_zones);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ kdb_printf(" node_mem_map = 0x%p\n", pgdatp->node_mem_map);
+#endif
+ kdb_printf(" bdata = 0x%p", pgdatp->bdata);
+ kdb_printf(" node_start_pfn = 0x%lx\n", pgdatp->node_start_pfn);
+ kdb_printf(" node_present_pages = %ld (0x%lx)\n",
+ pgdatp->node_present_pages, pgdatp->node_present_pages);
+ kdb_printf(" node_spanned_pages = %ld (0x%lx)\n",
+ pgdatp->node_spanned_pages, pgdatp->node_spanned_pages);
+ kdb_printf(" kswapd = 0x%p\n", pgdatp->kswapd);
+
+ return err;
+}
+
+/*
+ * kdbm_vm
+ *
+ * This function implements the 'vm' command. Print a vm_area_struct.
+ *
+ * vm [-v] <address> Print vm_area_struct at <address>
+ * vmp [-v] <pid> Print all vm_area_structs for <pid>
+ */
+
+static int
+kdbm_vm(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+ int verbose_flg = 0;
+
+ if (argc == 2) {
+ if (strcmp(argv[1], "-v") != 0) {
+ return KDB_ARGCOUNT;
+ }
+ verbose_flg = 1;
+ } else if (argc != 1) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[0], "vmp") == 0) {
+ struct task_struct *g, *tp;
+ struct vm_area_struct *vp;
+ pid_t pid;
+
+ if ((diag = kdbgetularg(argv[argc], (unsigned long *) &pid)))
+ return diag;
+
+ kdb_do_each_thread(g, tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ if (verbose_flg)
+ kdb_printf
+ ("vm_area_struct ");
+ kdb_printf
+ ("vm_start vm_end vm_flags\n");
+ vp = tp->mm->mmap;
+ while (vp != NULL) {
+ kdbm_print_vmp(vp, verbose_flg);
+ vp = vp->vm_next;
+ }
+ }
+ return 0;
+ }
+ } kdb_while_each_thread(g, tp);
+
+ kdb_printf("No process with pid == %d found\n", pid);
+
+ } else {
+ struct vm_area_struct v;
+
+ nextarg = argc;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(v, addr)))
+ return (diag);
+
+ kdbm_print_vm(&v, addr, verbose_flg);
+ }
+
+ return 0;
+}
+
+static int
+kdbm_print_pte(pte_t * pte)
+{
+ kdb_printf("0x%lx (", (unsigned long) pte_val(*pte));
+
+ if (pte_present(*pte)) {
+#ifdef pte_exec
+ if (pte_exec(*pte))
+ kdb_printf("X");
+#endif
+ if (pte_write(*pte))
+ kdb_printf("W");
+#ifdef pte_read
+ if (pte_read(*pte))
+ kdb_printf("R");
+#endif
+ if (pte_young(*pte))
+ kdb_printf("A");
+ if (pte_dirty(*pte))
+ kdb_printf("D");
+
+ } else {
+ kdb_printf("OFFSET=0x%lx ", swp_offset(pte_to_swp_entry(*pte)));
+ kdb_printf("TYPE=0x%ulx", swp_type(pte_to_swp_entry(*pte)));
+ }
+
+ kdb_printf(")");
+
+ /* final newline is output by caller of kdbm_print_pte() */
+
+ return 0;
+}
+
+/*
+ * kdbm_pte
+ *
+ * This function implements the 'pte' command. Print all pte_t structures
+ * that map to the given virtual address range (<address> through <address>
+ * plus <nbytes>) for the given process. The default value for nbytes is
+ * one.
+ *
+ * pte -m <mm> <address> [<nbytes>] Print all pte_t structures for
+ * virtual <address> in address space
+ * of <mm> which is a pointer to a
+ * mm_struct
+ * pte -p <pid> <address> [<nbytes>] Print all pte_t structures for
+ * virtual <address> in address space
+ * of <pid>
+ */
+
+static int
+kdbm_pte(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ unsigned long nbytes = 1;
+ long npgs;
+ int diag;
+ int found;
+ pid_t pid;
+ struct task_struct *tp;
+ struct mm_struct *mm, copy_of_mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (argc < 3 || argc > 4) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[1], "-p") == 0) {
+ if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
+ return diag;
+ }
+
+ found = 0;
+ for_each_process(tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ found = 1;
+ break;
+ }
+ kdb_printf("task structure's mm field is NULL\n");
+ return 0;
+ }
+ }
+
+ if (!found) {
+ kdb_printf("No process with pid == %d found\n", pid);
+ return 0;
+ }
+ mm = tp->mm;
+ } else if (strcmp(argv[1], "-m") == 0) {
+
+
+ nextarg = 2;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(copy_of_mm, addr)))
+ return (diag);
+ mm = ©_of_mm;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdbgetularg(argv[3], &addr))) {
+ return diag;
+ }
+
+ if (argc == 4) {
+ if ((diag = kdbgetularg(argv[4], &nbytes))) {
+ return diag;
+ }
+ }
+
+ kdb_printf("vaddr pte\n");
+
+ npgs = ((((addr & ~PAGE_MASK) + nbytes) + ~PAGE_MASK) >> PAGE_SHIFT);
+ while (npgs-- > 0) {
+
+ kdb_printf("0x%p ", (void *) (addr & PAGE_MASK));
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (pmd_present(*pmd)) {
+ pte = pte_offset_map(pmd, addr);
+ if (pte_present(*pte)) {
+ kdbm_print_pte(pte);
+ }
+ }
+ }
+ }
+
+ kdb_printf("\n");
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+/*
+ * kdbm_rpte
+ *
+ * This function implements the 'rpte' command. Print all pte_t structures
+ * that contain the given physical page range (<pfn> through <pfn>
+ * plus <npages>) for the given process. The default value for npages is
+ * one.
+ *
+ * rpte -m <mm> <pfn> [<npages>] Print all pte_t structures for
+ * physical page <pfn> in address space
+ * of <mm> which is a pointer to a
+ * mm_struct
+ * rpte -p <pid> <pfn> [<npages>] Print all pte_t structures for
+ * physical page <pfn> in address space
+ * of <pid>
+ */
+
+static int
+kdbm_rpte(int argc, const char **argv)
+{
+ unsigned long addr;
+ unsigned long pfn;
+ long offset = 0;
+ int nextarg;
+ unsigned long npages = 1;
+ int diag;
+ int found;
+ pid_t pid;
+ struct task_struct *tp;
+ struct mm_struct *mm, copy_of_mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long g, u, m, t;
+
+ if (argc < 3 || argc > 4) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[1], "-p") == 0) {
+ if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
+ return diag;
+ }
+
+ found = 0;
+ for_each_process(tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ found = 1;
+ break;
+ }
+ kdb_printf("task structure's mm field is NULL\n");
+ return 0;
+ }
+ }
+
+ if (!found) {
+ kdb_printf("No process with pid == %d found\n", pid);
+ return 0;
+ }
+ mm = tp->mm;
+ } else if (strcmp(argv[1], "-m") == 0) {
+
+
+ nextarg = 2;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(copy_of_mm, addr)))
+ return (diag);
+ mm = ©_of_mm;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdbgetularg(argv[3], &pfn))) {
+ return diag;
+ }
+
+ if (argc == 4) {
+ if ((diag = kdbgetularg(argv[4], &npages))) {
+ return diag;
+ }
+ }
+
+ /* spaces after vaddr depends on sizeof(unsigned long) */
+ kdb_printf("pfn vaddr%*s pte\n",
+ (int)(2*sizeof(unsigned long) + 2 - 5), " ");
+
+ for (g = 0, pgd = pgd_offset(mm, 0UL); g < PTRS_PER_PGD; ++g, ++pgd) {
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ continue;
+ for (u = 0, pud = pud_offset(pgd, 0UL); u < PTRS_PER_PUD; ++u, ++pud) {
+ if (pud_none(*pud) || pud_bad(*pud))
+ continue;
+ for (m = 0, pmd = pmd_offset(pud, 0UL); m < PTRS_PER_PMD; ++m, ++pmd) {
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ continue;
+ for (t = 0, pte = pte_offset_map(pmd, 0UL); t < PTRS_PER_PTE; ++t, ++pte) {
+ if (pte_none(*pte))
+ continue;
+ if (pte_pfn(*pte) < pfn || pte_pfn(*pte) >= (pfn + npages))
+ continue;
+ addr = g << PGDIR_SHIFT;
+#ifdef __ia64__
+ /* IA64 plays tricks with the pgd mapping to save space.
+ * This reverses pgd_index().
+ */
+ {
+ unsigned long region = g >> (PAGE_SHIFT - 6);
+ unsigned long l1index = g - (region << (PAGE_SHIFT - 6));
+ addr = (region << 61) + (l1index << PGDIR_SHIFT);
+ }
+#endif
+ addr += (m << PMD_SHIFT) + (t << PAGE_SHIFT);
+ kdb_printf("0x%-14lx " kdb_bfd_vma_fmt0 " ",
+ pte_pfn(*pte), addr);
+ kdbm_print_pte(pte);
+ kdb_printf("\n");
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+kdbm_print_dentry(unsigned long daddr)
+{
+ struct dentry d;
+ int diag;
+ char buf[256];
+
+ kdb_printf("Dentry at 0x%lx\n", daddr);
+ if ((diag = kdb_getarea(d, (unsigned long)daddr)))
+ return diag;
+
+ if ((d.d_name.len > sizeof(buf)) || (diag = kdb_getarea_size(buf, (unsigned long)(d.d_name.name), d.d_name.len)))
+ kdb_printf(" d_name.len = %d d_name.name = 0x%p\n",
+ d.d_name.len, d.d_name.name);
+ else
+ kdb_printf(" d_name.len = %d d_name.name = 0x%p <%.*s>\n",
+ d.d_name.len, d.d_name.name,
+ (int)(d.d_name.len), d.d_name.name);
+
+ kdb_printf(" d_count = %d d_flags = 0x%x d_inode = 0x%p\n",
+ atomic_read(&d.d_count), d.d_flags, d.d_inode);
+
+ kdb_printf(" d_parent = 0x%p\n", d.d_parent);
+
+ kdb_printf(" d_hash.nxt = 0x%p d_hash.prv = 0x%p\n",
+ d.d_hash.next, d.d_hash.pprev);
+
+ kdb_printf(" d_lru.nxt = 0x%p d_lru.prv = 0x%p\n",
+ d.d_lru.next, d.d_lru.prev);
+
+ kdb_printf(" d_child.nxt = 0x%p d_child.prv = 0x%p\n",
+ d.d_u.d_child.next, d.d_u.d_child.prev);
+
+ kdb_printf(" d_subdirs.nxt = 0x%p d_subdirs.prv = 0x%p\n",
+ d.d_subdirs.next, d.d_subdirs.prev);
+
+ kdb_printf(" d_alias.nxt = 0x%p d_alias.prv = 0x%p\n",
+ d.d_alias.next, d.d_alias.prev);
+
+ kdb_printf(" d_op = 0x%p d_sb = 0x%p d_fsdata = 0x%p\n",
+ d.d_op, d.d_sb, d.d_fsdata);
+
+ kdb_printf(" d_iname = %s\n",
+ d.d_iname);
+
+ if (d.d_inode) {
+ struct inode i;
+ kdb_printf("\nInode Entry at 0x%p\n", d.d_inode);
+ if ((diag = kdb_getarea(i, (unsigned long)d.d_inode)))
+ return diag;
+ kdb_printf(" i_mode = 0%o i_nlink = %d i_rdev = 0x%x\n",
+ i.i_mode, i.i_nlink, i.i_rdev);
+
+ kdb_printf(" i_ino = %ld i_count = %d\n",
+ i.i_ino, atomic_read(&i.i_count));
+
+ kdb_printf(" i_hash.nxt = 0x%p i_hash.prv = 0x%p\n",
+ i.i_hash.next, i.i_hash.pprev);
+
+ kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
+ i.i_list.next, i.i_list.prev);
+
+ kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
+ i.i_dentry.next, i.i_dentry.prev);
+
+ }
+ kdb_printf("\n");
+ return 0;
+}
+
+static int
+kdbm_filp(int argc, const char **argv)
+{
+ struct file f;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(f, addr)))
+ return diag;
+
+ kdb_printf("File Pointer at 0x%lx\n", addr);
+
+ kdb_printf(" fu_list.nxt = 0x%p fu_list.prv = 0x%p\n",
+ f.f_u.fu_list.next, f.f_u.fu_list.prev);
+
+ kdb_printf(" f_dentry = 0x%p f_vfsmnt = 0x%p f_op = 0x%p\n",
+ f.f_dentry, f.f_vfsmnt, f.f_op);
+
+ kdb_printf(" f_count = " kdb_f_count_fmt
+ " f_flags = 0x%x f_mode = 0x%x\n",
+ atomic_read(&f.f_count), f.f_flags, f.f_mode);
+
+ kdb_printf(" f_pos = %Ld\n", f.f_pos);
+#ifdef CONFIG_SECURITY
+ kdb_printf(" security = 0x%p\n", f.f_security);
+#endif
+
+ kdb_printf(" private_data = 0x%p f_mapping = 0x%p\n\n",
+ f.private_data, f.f_mapping);
+
+ return kdbm_print_dentry((unsigned long)f.f_dentry);
+}
+
+static int
+kdbm_fl(int argc, const char **argv)
+{
+ struct file_lock fl;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(fl, addr)))
+ return diag;
+
+ kdb_printf("File_lock at 0x%lx\n", addr);
+
+ kdb_printf(" fl_next = 0x%p fl_link.nxt = 0x%p fl_link.prv = 0x%p\n",
+ fl.fl_next, fl.fl_link.next, fl.fl_link.prev);
+ kdb_printf(" fl_block.nxt = 0x%p fl_block.prv = 0x%p\n",
+ fl.fl_block.next, fl.fl_block.prev);
+ kdb_printf(" fl_owner = 0x%p fl_pid = %d fl_wait = 0x%p\n",
+ fl.fl_owner, fl.fl_pid, &fl.fl_wait);
+ kdb_printf(" fl_file = 0x%p fl_flags = 0x%x\n",
+ fl.fl_file, fl.fl_flags);
+ kdb_printf(" fl_type = %d fl_start = 0x%llx fl_end = 0x%llx\n",
+ fl.fl_type, fl.fl_start, fl.fl_end);
+
+ kdb_printf(" file_lock_operations");
+ if (fl.fl_ops)
+ kdb_printf("\n fl_copy_lock = 0x%p fl_release_private = 0x%p\n",
+ fl.fl_ops->fl_copy_lock, fl.fl_ops->fl_release_private);
+ else
+ kdb_printf(" empty\n");
+
+ kdb_printf(" lock_manager_operations");
+ if (fl.fl_lmops)
+ kdb_printf("\n fl_compare_owner = 0x%p fl_notify = 0x%p\n",
+ fl.fl_lmops->fl_compare_owner, fl.fl_lmops->fl_notify);
+ else
+ kdb_printf(" empty\n");
+
+ kdb_printf(" fl_fasync = 0x%p fl_break 0x%lx\n",
+ fl.fl_fasync, fl.fl_break_time);
+
+ return 0;
+}
+
+
+static int
+kdbm_dentry(int argc, const char **argv)
+{
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ return diag;
+
+ return kdbm_print_dentry(addr);
+}
+
+static int
+kdbm_kobject(int argc, const char **argv)
+{
+ struct kobject k;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(k, addr)))
+ return diag;
+
+
+ kdb_printf("kobject at 0x%lx\n", addr);
+
+ if (k.name) {
+ char c;
+ kdb_printf(" name 0x%p", k.name);
+ if (kdb_getarea(c, (unsigned long)k.name) == 0)
+ kdb_printf(" '%s'", k.name);
+ kdb_printf("\n");
+ }
+
+ if (k.name != kobject_name((struct kobject *)addr))
+ kdb_printf(" name '%.20s'\n", k.name);
+
+ kdb_printf(" kref.refcount %d'\n", atomic_read(&k.kref.refcount));
+
+ kdb_printf(" entry.next = 0x%p entry.prev = 0x%p\n",
+ k.entry.next, k.entry.prev);
+
+ kdb_printf(" parent = 0x%p kset = 0x%p ktype = 0x%p sd = 0x%p\n",
+ k.parent, k.kset, k.ktype, k.sd);
+
+ return 0;
+}
+
+static int
+kdbm_sh(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct Scsi_Host sh;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(sh, addr)))
+ return diag;
+
+ kdb_printf("Scsi_Host at 0x%lx\n", addr);
+ kdb_printf("host_queue = 0x%p\n", sh.__devices.next);
+ kdb_printf("ehandler = 0x%p eh_action = 0x%p\n",
+ sh.ehandler, sh.eh_action);
+ kdb_printf("host_wait = 0x%p hostt = 0x%p\n",
+ &sh.host_wait, sh.hostt);
+ kdb_printf("host_failed = %d host_no = %d resetting = %d\n",
+ sh.host_failed, sh.host_no, sh.resetting);
+ kdb_printf("max id/lun/channel = [%d/%d/%d] this_id = %d\n",
+ sh.max_id, sh.max_lun, sh.max_channel, sh.this_id);
+ kdb_printf("can_queue = %d cmd_per_lun = %d sg_tablesize = %d u_isa_dma = %d\n",
+ sh.can_queue, sh.cmd_per_lun, sh.sg_tablesize, sh.unchecked_isa_dma);
+ kdb_printf("host_blocked = %d reverse_ordering = %d \n",
+ sh.host_blocked, sh.reverse_ordering);
+
+ return 0;
+}
+
+static int
+kdbm_sd(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct scsi_device *sd = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sd = kmalloc(sizeof(*sd), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sd: cannot kmalloc sd\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sd, addr)))
+ goto out;
+
+ kdb_printf("scsi_device at 0x%lx\n", addr);
+ kdb_printf("next = 0x%p prev = 0x%p host = 0x%p\n",
+ sd->siblings.next, sd->siblings.prev, sd->host);
+ kdb_printf("device_busy = %d current_cmnd 0x%p\n",
+ sd->device_busy, sd->current_cmnd);
+ kdb_printf("id/lun/chan = [%d/%d/%d] single_lun = %d device_blocked = %d\n",
+ sd->id, sd->lun, sd->channel, sd->sdev_target->single_lun, sd->device_blocked);
+ kdb_printf("queue_depth = %d current_tag = %d scsi_level = %d\n",
+ sd->queue_depth, sd->current_tag, sd->scsi_level);
+ kdb_printf("%8.8s %16.16s %4.4s\n", sd->vendor, sd->model, sd->rev);
+out:
+ if (sd)
+ kfree(sd);
+ return diag;
+}
+
+static int
+kdbm_sc(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct scsi_cmnd *sc = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sc = kmalloc(sizeof(*sc), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sc: cannot kmalloc sc\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sc, addr)))
+ goto out;
+
+ kdb_printf("scsi_cmnd at 0x%lx\n", addr);
+ kdb_printf("device = 0x%p next = 0x%p\n",
+ sc->device, sc->list.next);
+ kdb_printf("serial_number = %ld retries = %d\n",
+ sc->serial_number, sc->retries);
+ kdb_printf("cmd_len = %d\n", sc->cmd_len);
+ kdb_printf("cmnd = [%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x]\n",
+ sc->cmnd[0], sc->cmnd[1], sc->cmnd[2], sc->cmnd[3], sc->cmnd[4],
+ sc->cmnd[5], sc->cmnd[6], sc->cmnd[7], sc->cmnd[8], sc->cmnd[9],
+ sc->cmnd[10], sc->cmnd[11]);
+ kdb_printf("request_buffer = 0x%p request_bufflen = %d\n",
+ scsi_sglist(sc), scsi_bufflen(sc));
+ kdb_printf("use_sg = %d\n", scsi_sg_count(sc));
+ kdb_printf("underflow = %d transfersize = %d\n",
+ sc->underflow, sc->transfersize);
+ kdb_printf("tag = %d\n", sc->tag);
+
+out:
+ if (sc)
+ kfree(sc);
+ return diag;
+}
+
+static int __init kdbm_vm_init(void)
+{
+ kdb_register("vm", kdbm_vm, "[-v] <vaddr>", "Display vm_area_struct", 0);
+ kdb_register("vmp", kdbm_vm, "[-v] <pid>", "Display all vm_area_struct for <pid>", 0);
+#ifdef CONFIG_NUMA
+ kdb_register("mempolicy", kdbm_mpol, "<vaddr>", "Display mempolicy structure", 0);
+ kdb_register("pgdat", kdbm_pgdat, "<node_id>", "Display pglist_data node structure", 0);
+#else
+ kdb_register("pgdat", kdbm_pgdat, "", "Display pglist_data node structure", 0);
+#endif
+ kdb_register("pte", kdbm_pte, "( -m <mm> | -p <pid> ) <vaddr> [<nbytes>]", "Display pte_t for mm_struct or pid", 0);
+ kdb_register("rpte", kdbm_rpte, "( -m <mm> | -p <pid> ) <pfn> [<npages>]", "Find pte_t containing pfn for mm_struct or pid", 0);
+ kdb_register("dentry", kdbm_dentry, "<dentry>", "Display interesting dentry stuff", 0);
+ kdb_register("kobject", kdbm_kobject, "<kobject>", "Display interesting kobject stuff", 0);
+ kdb_register("filp", kdbm_filp, "<filp>", "Display interesting filp stuff", 0);
+ kdb_register("fl", kdbm_fl, "<fl>", "Display interesting file_lock stuff", 0);
+ kdb_register("sh", kdbm_sh, "<vaddr>", "Show scsi_host", 0);
+ kdb_register("sd", kdbm_sd, "<vaddr>", "Show scsi_device", 0);
+ kdb_register("sc", kdbm_sc, "<vaddr>", "Show scsi_cmnd", 0);
+
+ return 0;
+}
+
+static void __exit kdbm_vm_exit(void)
+{
+ kdb_unregister("vm");
+ kdb_unregister("vmp");
+#ifdef CONFIG_NUMA
+ kdb_unregister("mempolicy");
+#endif
+ kdb_unregister("pgdat");
+ kdb_unregister("pte");
+ kdb_unregister("rpte");
+ kdb_unregister("dentry");
+ kdb_unregister("kobject");
+ kdb_unregister("filp");
+ kdb_unregister("fl");
+ kdb_unregister("sh");
+ kdb_unregister("sd");
+ kdb_unregister("sc");
+}
+
+module_init(kdbm_vm_init)
+module_exit(kdbm_vm_exit)
return 0;
}
device_initcall(kallsyms_init);
+
++
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+
+const char *kdb_walk_kallsyms(loff_t *pos)
+{
+ static struct kallsym_iter kdb_walk_kallsyms_iter;
+ if (*pos == 0) {
+ memset(&kdb_walk_kallsyms_iter, 0, sizeof(kdb_walk_kallsyms_iter));
+ reset_iter(&kdb_walk_kallsyms_iter, 0);
+ }
+ while (1) {
+ if (!update_iter(&kdb_walk_kallsyms_iter, *pos))
+ return NULL;
+ ++*pos;
+ /* Some debugging symbols have no name. Ignore them. */
+ if (kdb_walk_kallsyms_iter.name[0])
+ return kdb_walk_kallsyms_iter.name;
+ }
+}
+#endif /* CONFIG_KDB */
#include <asm/system.h>
#include <asm/sections.h>
+#ifdef CONFIG_KDB_KDUMP
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kdb.h>
+#endif
+
- #ifndef CONFIG_XEN
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t* crash_notes;
- #endif
+int dump_after_notifier;
/* vmcoreinfo stuff */
static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
&prstatus, sizeof(prstatus));
final_note(buf);
}
- #endif
+#ifdef CONFIG_SYSCTL
+static ctl_table dump_after_notifier_table[] = {
+ {
+ .ctl_name = KERN_DUMP_AFTER_NOTIFIER,
+ .procname = "dump_after_notifier",
+ .data = &dump_after_notifier,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table kexec_sys_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = dump_after_notifier_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
static int __init crash_notes_memory_init(void)
{
- #ifndef CONFIG_XEN
/* Allocate memory for saving cpu registers. */
crash_notes = alloc_percpu(note_buf_t);
if (!crash_notes) {
" states failed\n");
return -ENOMEM;
}
- #endif
+#ifdef CONFIG_SYSCTL
+ register_sysctl_table(kexec_sys_table);
+#endif
return 0;
}
module_init(crash_notes_memory_init)
/* Don't keep modinfo and version sections. */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
- #ifdef CONFIG_KALLSYMS
- /* Keep symbol and string tables for decoding later. */
- sechdrs[symindex].sh_flags |= SHF_ALLOC;
- sechdrs[strindex].sh_flags |= SHF_ALLOC;
- #endif
+ if (unwindex)
+ sechdrs[unwindex].sh_flags |= SHF_ALLOC;
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
+ unwind_remove_table(mod->unwind_info, 1);
+ #ifdef CONFIG_KALLSYMS
+ mod->num_symtab = mod->core_num_syms;
+ mod->symtab = mod->core_symtab;
+ mod->strtab = mod->core_strtab;
+ #endif
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
* 'A' - ACPI table overridden.
* 'W' - Taint on warning.
* 'C' - modules from drivers/staging are loaded.
+ * 'N' - Unsuported modules loaded.
+ * 'X' - Modules with external support loaded.
*
- * The string is overwritten by the next call to print_taint().
+ * The string is overwritten by the next call to print_tainted().
*/
const char *print_tainted(void)
{
return do_syslog(type, buf, len);
}
+#ifdef CONFIG_DEBUG_KERNEL
+/* Its very handy to be able to view the syslog buffer during debug.
+ * But do_syslog() uses locks so it cannot be used during debugging.
+ * Instead, provide the start and end of the physical and logical logs.
+ * This is equivalent to do_syslog(3).
+ */
+void debugger_syslog_data(char *syslog_data[4])
+{
+ syslog_data[0] = log_buf;
+ syslog_data[1] = log_buf + log_buf_len;
+ syslog_data[2] = log_buf + log_end - (logged_chars < log_buf_len ? logged_chars : log_buf_len);
+ syslog_data[3] = log_buf + log_end;
+}
+#endif /* CONFIG_DEBUG_KERNEL */
+
++#ifdef CONFIG_KDB
++/* kdb dmesg command needs access to the syslog buffer. do_syslog() uses locks
++ * so it cannot be used during debugging. Just tell kdb where the start and
++ * end of the physical and logical logs are. This is equivalent to do_syslog(3).
++ */
++void kdb_syslog_data(char *syslog_data[4])
++{
++ syslog_data[0] = log_buf;
++ syslog_data[1] = log_buf + log_buf_len;
++ syslog_data[2] = log_buf + log_end - (logged_chars < log_buf_len ? logged_chars : log_buf_len);
++ syslog_data[3] = log_buf + log_end;
++}
++#endif /* CONFIG_KDB */
++
/*
* Call the console drivers on a range of log_buf
*/
};
#endif /* CONFIG_CGROUP_CPUACCT */
+#ifdef CONFIG_KDB
+
+#include <linux/kdb.h>
+
+static void
+kdb_prio(char *name, struct rt_prio_array *array, kdb_printf_t xxx_printf,
+ unsigned int cpu)
+{
+ int pri, printed_header = 0;
+ struct task_struct *p;
+
+ xxx_printf(" %s rt bitmap: 0x%lx 0x%lx 0x%lx\n",
+ name,
+ array->bitmap[0], array->bitmap[1], array->bitmap[2]);
+
+ pri = sched_find_first_bit(array->bitmap);
+ if (pri < MAX_RT_PRIO) {
+ xxx_printf(" rt bitmap priorities:");
+ while (pri < MAX_RT_PRIO) {
+ xxx_printf(" %d", pri);
+ pri++;
+ pri = find_next_bit(array->bitmap, MAX_RT_PRIO, pri);
+ }
+ xxx_printf("\n");
+ }
+
+ for (pri = 0; pri < MAX_RT_PRIO; pri++) {
+ int printed_hdr = 0;
+ struct list_head *head, *curr;
+
+ head = array->queue + pri;
+ curr = head->next;
+ while(curr != head) {
+ struct task_struct *task;
+ if (!printed_hdr) {
+ xxx_printf(" queue at priority=%d\n", pri);
+ printed_hdr = 1;
+ }
+ task = list_entry(curr, struct task_struct, rt.run_list);
+ if (task)
+ xxx_printf(" 0x%p %d %s time_slice:%d\n",
+ task, task->pid, task->comm,
+ task->rt.time_slice);
+ curr = curr->next;
+ }
+ }
+ for_each_process(p) {
+ if (p->se.on_rq && (task_cpu(p) == cpu) &&
+ (p->policy == SCHED_NORMAL)) {
+ if (!printed_header) {
+ xxx_printf(" sched_normal queue:\n");
+ printed_header = 1;
+ }
+ xxx_printf(" 0x%p %d %s pri:%d spri:%d npri:%d\n",
+ p, p->pid, p->comm, p->prio,
+ p->static_prio, p->normal_prio);
+ }
+ }
+}
+
+/* This code must be in sched.c because struct rq is only defined in this
+ * source. To allow most of kdb to be modular, this code cannot call any kdb
+ * functions directly, any external functions that it needs must be passed in
+ * as parameters.
+ */
+
+void
+kdb_runqueue(unsigned long cpu, kdb_printf_t xxx_printf)
+{
+ struct rq *rq;
+
+ rq = cpu_rq(cpu);
+
+ xxx_printf("CPU%ld lock:%s curr:0x%p(%d)(%s)",
+ cpu, (spin_is_locked(&rq->lock))?"LOCKED":"free",
+ rq->curr, rq->curr->pid, rq->curr->comm);
+ if (rq->curr == rq->idle)
+ xxx_printf(" is idle");
+ xxx_printf("\n ");
+#ifdef CONFIG_SMP
+ xxx_printf(" cpu_load:%lu %lu %lu",
+ rq->cpu_load[0], rq->cpu_load[1], rq->cpu_load[2]);
+#endif
+ xxx_printf(" nr_running:%lu nr_switches:%llu\n",
+ rq->nr_running, (long long)rq->nr_switches);
+ kdb_prio("active", &rq->rt.active, xxx_printf, (unsigned int)cpu);
+}
+EXPORT_SYMBOL(kdb_runqueue);
+
+#endif /* CONFIG_KDB */
++
+ #ifndef CONFIG_SMP
+
+ int rcu_expedited_torture_stats(char *page)
+ {
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
+
+ void synchronize_sched_expedited(void)
+ {
+ }
+ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+ #else /* #ifndef CONFIG_SMP */
+
+ static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
+ static DEFINE_MUTEX(rcu_sched_expedited_mutex);
+
+ #define RCU_EXPEDITED_STATE_POST -2
+ #define RCU_EXPEDITED_STATE_IDLE -1
+
+ static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+
+ int rcu_expedited_torture_stats(char *page)
+ {
+ int cnt = 0;
+ int cpu;
+
+ cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
+ for_each_online_cpu(cpu) {
+ cnt += sprintf(&page[cnt], " %d:%d",
+ cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
+ }
+ cnt += sprintf(&page[cnt], "\n");
+ return cnt;
+ }
+ EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
+
+ static long synchronize_sched_expedited_count;
+
+ /*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly. This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier. Failing to
+ * observe this restriction will result in deadlock.
+ */
+ void synchronize_sched_expedited(void)
+ {
+ int cpu;
+ unsigned long flags;
+ bool need_full_sync = 0;
+ struct rq *rq;
+ struct migration_req *req;
+ long snap;
+ int trycount = 0;
+
+ smp_mb(); /* ensure prior mod happens before capturing snap. */
+ snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
+ get_online_cpus();
+ while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
+ put_online_cpus();
+ if (trycount++ < 10)
+ udelay(trycount * num_online_cpus());
+ else {
+ synchronize_sched();
+ return;
+ }
+ if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
+ smp_mb(); /* ensure test happens before caller kfree */
+ return;
+ }
+ get_online_cpus();
+ }
+ rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
+ for_each_online_cpu(cpu) {
+ rq = cpu_rq(cpu);
+ req = &per_cpu(rcu_migration_req, cpu);
+ init_completion(&req->done);
+ req->task = NULL;
+ req->dest_cpu = RCU_MIGRATION_NEED_QS;
+ spin_lock_irqsave(&rq->lock, flags);
+ list_add(&req->list, &rq->migration_queue);
+ spin_unlock_irqrestore(&rq->lock, flags);
+ wake_up_process(rq->migration_thread);
+ }
+ for_each_online_cpu(cpu) {
+ rcu_expedited_state = cpu;
+ req = &per_cpu(rcu_migration_req, cpu);
+ rq = cpu_rq(cpu);
+ wait_for_completion(&req->done);
+ spin_lock_irqsave(&rq->lock, flags);
+ if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
+ need_full_sync = 1;
+ req->dest_cpu = RCU_MIGRATION_IDLE;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+ mutex_unlock(&rcu_sched_expedited_mutex);
+ put_online_cpus();
+ if (need_full_sync)
+ synchronize_sched();
+ }
+ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+ #endif /* #else #ifndef CONFIG_SMP */
.mode = 0644,
.proc_handler = &scan_unevictable_handler,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "heap-stack-gap",
+ .data = &heap_stack_gap,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
- #ifdef CONFIG_PRESWAP
+ #ifdef CONFIG_MEMORY_FAILURE
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "preswap",
- .data = NULL,
- .maxlen = sizeof(unsigned long),
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
- .proc_handler = &preswap_sysctl_handler,
- .extra1 = (void *)&preswap_zero,
- .extra2 = (void *)&preswap_infinity,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
},
#endif
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
} else
goto err;
+#ifdef CONFIG_IA64
+ memcpy(stats, &statn, sizeof(statn));
+#endif
- return send_reply(rep_skb, info->snd_pid);
+ return send_reply(rep_skb, info);
err:
nlmsg_free(rep_skb);
return rc;
err = dev_alloc_name(dev, newname);
if (err < 0)
return err;
- }
- else if (__dev_get_by_name(net, newname))
+ } else if (__dev_get_by_name(net, newname))
return -EEXIST;
- else
+ else {
+ if (strncmp(newname, dev->name, IFNAMSIZ))
+ printk(KERN_INFO "%s renamed to %s by %s [%u]\n",
+ dev->name, newname, current->comm,
+ current->pid);
strlcpy(dev->name, newname, IFNAMSIZ);
+ }
rollback:
/* For now only devices in the initial network namespace
select CRYPTO
select CRYPTO_ECB
select CRYPTO_ARC4
- select CRYPTO_AES
select CRC32
- select WIRELESS_EXT
---help---
This option enables the hardware independent IEEE 802.11
networking stack.
$(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
$(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
$(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \
- $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
- $(if $(CONFIG_MARKERS),-M $(markersfile)) \
$(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
- $(if $(cross_build),-c)
+ $(if $(cross_build),-c) \
+ $(if $(CONFIG_ENTERPRISE_SUPPORT), \
+ -N $(firstword $(wildcard $(dir $(MODVERDIR))/Module.supported \
+ $(objtree)/Module.supported /dev/null)))
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
cmd_modpost = $(modpost) -s