#include <linux/cpumask.h>
#include <linux/kdebug.h>
#include <linux/cpu.h>
+ #include <linux/gfp.h>
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h> /* for switch state wrappers */
+#endif /* CONFIG_KDB */
#include <asm/delay.h>
#include <asm/machvec.h>
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2006, 2007-2009 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * Common code for doing accurate backtraces on i386 and x86_64, including
+ * printing the values of arguments.
+ */
+
++#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/nmi.h>
+#include <asm/asm-offsets.h>
+#include <asm/system.h>
+
+#define KDB_DEBUG_BB(fmt, ...) \
+ {if (KDB_DEBUG(BB)) kdb_printf(fmt, ## __VA_ARGS__);}
+#define KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix) \
+ kdb_printf(prefix "%c0x%x" suffix, \
+ offset >= 0 ? '+' : '-', \
+ offset >= 0 ? offset : -offset)
+#define KDB_DEBUG_BB_OFFSET(offset, prefix, suffix) \
+ {if (KDB_DEBUG(BB)) KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix);}
+
+#define BB_CHECK(expr, val, ret) \
+({ \
+ if (unlikely(expr)) { \
+ kdb_printf("%s, line %d: BB_CHECK(" #expr ") failed " \
+ #val "=%lx\n", \
+ __FUNCTION__, __LINE__, (long)val); \
+ bb_giveup = 1; \
+ return ret; \
+ } \
+})
+
+static int bb_giveup;
+
+/* Use BBRG_Rxx for both i386 and x86_64. RAX through R15 must be at the end,
+ * starting with RAX. Some of these codes do not reflect actual registers,
+ * such codes are special cases when parsing the record of register changes.
+ * When updating BBRG_ entries, update bbrg_name as well.
+ */
+
+enum bb_reg_code
+{
+ BBRG_UNDEFINED = 0, /* Register contents are undefined */
+ BBRG_OSP, /* original stack pointer on entry to function */
+ BBRG_RAX,
+ BBRG_RBX,
+ BBRG_RCX,
+ BBRG_RDX,
+ BBRG_RDI,
+ BBRG_RSI,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_R8,
+ BBRG_R9,
+ BBRG_R10,
+ BBRG_R11,
+ BBRG_R12,
+ BBRG_R13,
+ BBRG_R14,
+ BBRG_R15,
+};
+
+const static char *bbrg_name[] = {
+ [BBRG_UNDEFINED] = "undefined",
+ [BBRG_OSP] = "osp",
+ [BBRG_RAX] = "rax",
+ [BBRG_RBX] = "rbx",
+ [BBRG_RCX] = "rcx",
+ [BBRG_RDX] = "rdx",
+ [BBRG_RDI] = "rdi",
+ [BBRG_RSI] = "rsi",
+ [BBRG_RBP] = "rbp",
+ [BBRG_RSP] = "rsp",
+ [BBRG_R8] = "r8",
+ [BBRG_R9] = "r9",
+ [BBRG_R10] = "r10",
+ [BBRG_R11] = "r11",
+ [BBRG_R12] = "r12",
+ [BBRG_R13] = "r13",
+ [BBRG_R14] = "r14",
+ [BBRG_R15] = "r15",
+};
+
+/* Map a register name to its register code. This includes the sub-register
+ * addressable fields, e.g. parts of rax can be addressed as ax, al, ah, eax.
+ * The list is sorted so it can be binary chopped, sort command is:
+ * LANG=C sort -t '"' -k2
+ */
+
+struct bb_reg_code_map {
+ enum bb_reg_code reg;
+ const char *name;
+};
+
+const static struct bb_reg_code_map
+bb_reg_code_map[] = {
+ { BBRG_RAX, "ah" },
+ { BBRG_RAX, "al" },
+ { BBRG_RAX, "ax" },
+ { BBRG_RBX, "bh" },
+ { BBRG_RBX, "bl" },
+ { BBRG_RBP, "bp" },
+ { BBRG_RBP, "bpl" },
+ { BBRG_RBX, "bx" },
+ { BBRG_RCX, "ch" },
+ { BBRG_RCX, "cl" },
+ { BBRG_RCX, "cx" },
+ { BBRG_RDX, "dh" },
+ { BBRG_RDI, "di" },
+ { BBRG_RDI, "dil" },
+ { BBRG_RDX, "dl" },
+ { BBRG_RDX, "dx" },
+ { BBRG_RAX, "eax" },
+ { BBRG_RBP, "ebp" },
+ { BBRG_RBX, "ebx" },
+ { BBRG_RCX, "ecx" },
+ { BBRG_RDI, "edi" },
+ { BBRG_RDX, "edx" },
+ { BBRG_RSI, "esi" },
+ { BBRG_RSP, "esp" },
+ { BBRG_R10, "r10" },
+ { BBRG_R10, "r10d" },
+ { BBRG_R10, "r10l" },
+ { BBRG_R10, "r10w" },
+ { BBRG_R11, "r11" },
+ { BBRG_R11, "r11d" },
+ { BBRG_R11, "r11l" },
+ { BBRG_R11, "r11w" },
+ { BBRG_R12, "r12" },
+ { BBRG_R12, "r12d" },
+ { BBRG_R12, "r12l" },
+ { BBRG_R12, "r12w" },
+ { BBRG_R13, "r13" },
+ { BBRG_R13, "r13d" },
+ { BBRG_R13, "r13l" },
+ { BBRG_R13, "r13w" },
+ { BBRG_R14, "r14" },
+ { BBRG_R14, "r14d" },
+ { BBRG_R14, "r14l" },
+ { BBRG_R14, "r14w" },
+ { BBRG_R15, "r15" },
+ { BBRG_R15, "r15d" },
+ { BBRG_R15, "r15l" },
+ { BBRG_R15, "r15w" },
+ { BBRG_R8, "r8" },
+ { BBRG_R8, "r8d" },
+ { BBRG_R8, "r8l" },
+ { BBRG_R8, "r8w" },
+ { BBRG_R9, "r9" },
+ { BBRG_R9, "r9d" },
+ { BBRG_R9, "r9l" },
+ { BBRG_R9, "r9w" },
+ { BBRG_RAX, "rax" },
+ { BBRG_RBP, "rbp" },
+ { BBRG_RBX, "rbx" },
+ { BBRG_RCX, "rcx" },
+ { BBRG_RDI, "rdi" },
+ { BBRG_RDX, "rdx" },
+ { BBRG_RSI, "rsi" },
+ { BBRG_RSP, "rsp" },
+ { BBRG_RSI, "si" },
+ { BBRG_RSI, "sil" },
+ { BBRG_RSP, "sp" },
+ { BBRG_RSP, "spl" },
+};
+
+/* Record register contents in terms of the values that were passed to this
+ * function, IOW track which registers contain an input value. A register's
+ * contents can be undefined, it can contain an input register value or it can
+ * contain an offset from the original stack pointer.
+ *
+ * This structure is used to represent the current contents of the integer
+ * registers, it is held in an array that is indexed by BBRG_xxx. The element
+ * for BBRG_xxx indicates what input value is currently in BBRG_xxx. When
+ * 'value' is BBRG_OSP then register BBRG_xxx contains a stack pointer,
+ * pointing at 'offset' from the original stack pointer on entry to the
+ * function. When 'value' is not BBRG_OSP then element BBRG_xxx contains the
+ * original contents of an input register and offset is ignored.
+ *
+ * An input register 'value' can be stored in more than one register and/or in
+ * more than one memory location.
+ */
+
+struct bb_reg_contains
+{
+ enum bb_reg_code value: 8;
+ short offset;
+};
+
+/* Note: the offsets in struct bb_mem_contains in this code are _NOT_ offsets
+ * from OSP, they are offsets from current RSP. It fits better with the way
+ * that struct pt_regs is built, some code pushes extra data before pt_regs so
+ * working with OSP relative offsets gets messy. struct bb_mem_contains
+ * entries must be in descending order of RSP offset.
+ */
+
+typedef struct { DECLARE_BITMAP(bits, BBRG_R15+1); } bbrgmask_t;
+#define BB_SKIP(reg) (1 << (BBRG_ ## reg))
+struct bb_mem_contains {
+ short offset_address;
+ enum bb_reg_code value: 8;
+};
+
+/* Transfer of control to a label outside the current function. If the
+ * transfer is to a known common restore path that expects known registers
+ * and/or a known memory state (e.g. struct pt_regs) then do a sanity check on
+ * the state at this point.
+ */
+
+struct bb_name_state {
+ const char *name; /* target function */
+ bfd_vma address; /* Address of target function */
+ const char *fname; /* optional from function name */
+ const struct bb_mem_contains *mem; /* expected memory state */
+ const struct bb_reg_contains *regs; /* expected register state */
+ const unsigned short mem_size; /* ARRAY_SIZE(mem) */
+ const unsigned short regs_size; /* ARRAY_SIZE(regs) */
+ const short osp_offset; /* RSP in regs == OSP+osp_offset */
+ const bbrgmask_t skip_mem; /* Some slots in mem may be undefined */
+ const bbrgmask_t skip_regs; /* Some slots in regs may be undefined */
+};
+
+/* NS (NAME_STATE) macros define the register and memory state when we transfer
+ * control to or start decoding a special case name. Use NS when the target
+ * label always has the same state. Use NS_FROM and specify the source label
+ * if the target state is slightly different depending on where it is branched
+ * from. This gives better state checking, by isolating the special cases.
+ *
+ * Note: for the same target label, NS_FROM entries must be followed by a
+ * single NS entry.
+ */
+
+#define NS_FROM(iname, ifname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ { \
+ .name = iname, \
+ .fname = ifname, \
+ .mem = imem, \
+ .regs = iregs, \
+ .mem_size = ARRAY_SIZE(imem), \
+ .regs_size = ARRAY_SIZE(iregs), \
+ .skip_mem.bits[0] = iskip_mem, \
+ .skip_regs.bits[0] = iskip_regs, \
+ .osp_offset = iosp_offset, \
+ .address = 0 \
+ }
+
+/* Shorter forms for the common cases */
+#define NS(iname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ NS_FROM(iname, NULL, imem, iregs, iskip_mem, iskip_regs, iosp_offset)
+#define NS_MEM(iname, imem, iskip_mem) \
+ NS_FROM(iname, NULL, imem, no_regs, iskip_mem, 0, 0)
+#define NS_MEM_FROM(iname, ifname, imem, iskip_mem) \
+ NS_FROM(iname, ifname, imem, no_regs, iskip_mem, 0, 0)
+#define NS_REG(iname, iregs, iskip_regs) \
+ NS_FROM(iname, NULL, no_memory, iregs, 0, iskip_regs, 0)
+#define NS_REG_FROM(iname, ifname, iregs, iskip_regs) \
+ NS_FROM(iname, ifname, no_memory, iregs, 0, iskip_regs, 0)
+
+static void
+bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src);
+
+static const char *bb_mod_name, *bb_func_name;
+
+static int
+bb_noret(const char *name)
+{
+ if (strcmp(name, "panic") == 0 ||
+ strcmp(name, "do_exit") == 0 ||
+ strcmp(name, "do_group_exit") == 0 ||
+ strcmp(name, "complete_and_exit") == 0)
+ return 1;
+ return 0;
+}
+
+/*============================================================================*/
+/* */
+/* Most of the basic block code and data is common to x86_64 and i386. This */
+/* large ifdef contains almost all of the differences between the two */
+/* architectures. */
+/* */
+/* Make sure you update the correct section of this ifdef. */
+/* */
+/*============================================================================*/
+
+#ifdef CONFIG_X86_64
+
+/* Registers that can be used to pass parameters, in the order that parameters
+ * are passed.
+ */
+
+const static enum bb_reg_code
+bb_param_reg[] = {
+ BBRG_RDI,
+ BBRG_RSI,
+ BBRG_RDX,
+ BBRG_RCX,
+ BBRG_R8,
+ BBRG_R9,
+};
+
+const static enum bb_reg_code
+bb_preserved_reg[] = {
+ BBRG_RBX,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_R12,
+ BBRG_R13,
+ BBRG_R14,
+ BBRG_R15,
+};
+
+static const struct bb_mem_contains full_pt_regs[] = {
+ { 0x70, BBRG_RDI },
+ { 0x68, BBRG_RSI },
+ { 0x60, BBRG_RDX },
+ { 0x58, BBRG_RCX },
+ { 0x50, BBRG_RAX },
+ { 0x48, BBRG_R8 },
+ { 0x40, BBRG_R9 },
+ { 0x38, BBRG_R10 },
+ { 0x30, BBRG_R11 },
+ { 0x28, BBRG_RBX },
+ { 0x20, BBRG_RBP },
+ { 0x18, BBRG_R12 },
+ { 0x10, BBRG_R13 },
+ { 0x08, BBRG_R14 },
+ { 0x00, BBRG_R15 },
+};
+static const struct bb_mem_contains full_pt_regs_plus_1[] = {
+ { 0x78, BBRG_RDI },
+ { 0x70, BBRG_RSI },
+ { 0x68, BBRG_RDX },
+ { 0x60, BBRG_RCX },
+ { 0x58, BBRG_RAX },
+ { 0x50, BBRG_R8 },
+ { 0x48, BBRG_R9 },
+ { 0x40, BBRG_R10 },
+ { 0x38, BBRG_R11 },
+ { 0x30, BBRG_RBX },
+ { 0x28, BBRG_RBP },
+ { 0x20, BBRG_R12 },
+ { 0x18, BBRG_R13 },
+ { 0x10, BBRG_R14 },
+ { 0x08, BBRG_R15 },
+};
+/*
+ * Going into error_exit we have the hardware pushed error_code on the stack
+ * plus a full pt_regs
+ */
+static const struct bb_mem_contains error_code_full_pt_regs[] = {
+ { 0x78, BBRG_UNDEFINED },
+ { 0x70, BBRG_RDI },
+ { 0x68, BBRG_RSI },
+ { 0x60, BBRG_RDX },
+ { 0x58, BBRG_RCX },
+ { 0x50, BBRG_RAX },
+ { 0x48, BBRG_R8 },
+ { 0x40, BBRG_R9 },
+ { 0x38, BBRG_R10 },
+ { 0x30, BBRG_R11 },
+ { 0x28, BBRG_RBX },
+ { 0x20, BBRG_RBP },
+ { 0x18, BBRG_R12 },
+ { 0x10, BBRG_R13 },
+ { 0x08, BBRG_R14 },
+ { 0x00, BBRG_R15 },
+};
+static const struct bb_mem_contains partial_pt_regs[] = {
+ { 0x40, BBRG_RDI },
+ { 0x38, BBRG_RSI },
+ { 0x30, BBRG_RDX },
+ { 0x28, BBRG_RCX },
+ { 0x20, BBRG_RAX },
+ { 0x18, BBRG_R8 },
+ { 0x10, BBRG_R9 },
+ { 0x08, BBRG_R10 },
+ { 0x00, BBRG_R11 },
+};
+static const struct bb_mem_contains partial_pt_regs_plus_1[] = {
+ { 0x48, BBRG_RDI },
+ { 0x40, BBRG_RSI },
+ { 0x38, BBRG_RDX },
+ { 0x30, BBRG_RCX },
+ { 0x28, BBRG_RAX },
+ { 0x20, BBRG_R8 },
+ { 0x18, BBRG_R9 },
+ { 0x10, BBRG_R10 },
+ { 0x08, BBRG_R11 },
+};
+static const struct bb_mem_contains partial_pt_regs_plus_2[] = {
+ { 0x50, BBRG_RDI },
+ { 0x48, BBRG_RSI },
+ { 0x40, BBRG_RDX },
+ { 0x38, BBRG_RCX },
+ { 0x30, BBRG_RAX },
+ { 0x28, BBRG_R8 },
+ { 0x20, BBRG_R9 },
+ { 0x18, BBRG_R10 },
+ { 0x10, BBRG_R11 },
+};
+static const struct bb_mem_contains no_memory[] = {
+};
+/* Hardware has already pushed an error_code on the stack. Use undefined just
+ * to set the initial stack offset.
+ */
+static const struct bb_mem_contains error_code[] = {
+ { 0x0, BBRG_UNDEFINED },
+};
+/* error_code plus original rax */
+static const struct bb_mem_contains error_code_rax[] = {
+ { 0x8, BBRG_UNDEFINED },
+ { 0x0, BBRG_RAX },
+};
+
+static const struct bb_reg_contains all_regs[] = {
+ [BBRG_RAX] = { BBRG_RAX, 0 },
+ [BBRG_RBX] = { BBRG_RBX, 0 },
+ [BBRG_RCX] = { BBRG_RCX, 0 },
+ [BBRG_RDX] = { BBRG_RDX, 0 },
+ [BBRG_RDI] = { BBRG_RDI, 0 },
+ [BBRG_RSI] = { BBRG_RSI, 0 },
+ [BBRG_RBP] = { BBRG_RBP, 0 },
+ [BBRG_RSP] = { BBRG_OSP, 0 },
+ [BBRG_R8 ] = { BBRG_R8, 0 },
+ [BBRG_R9 ] = { BBRG_R9, 0 },
+ [BBRG_R10] = { BBRG_R10, 0 },
+ [BBRG_R11] = { BBRG_R11, 0 },
+ [BBRG_R12] = { BBRG_R12, 0 },
+ [BBRG_R13] = { BBRG_R13, 0 },
+ [BBRG_R14] = { BBRG_R14, 0 },
+ [BBRG_R15] = { BBRG_R15, 0 },
+};
+static const struct bb_reg_contains no_regs[] = {
+};
+
+static struct bb_name_state bb_special_cases[] = {
+
+ /* First the cases that pass data only in memory. We do not check any
+ * register state for these cases.
+ */
+
+ /* Simple cases, no exceptions */
+ NS_MEM("ia32_ptregs_common", partial_pt_regs_plus_1, 0),
+ NS_MEM("ia32_sysret", partial_pt_regs, 0),
+ NS_MEM("int_careful", partial_pt_regs, 0),
+ NS_MEM("ia32_badarg", partial_pt_regs, 0),
+ NS_MEM("int_restore_rest", full_pt_regs, 0),
+ NS_MEM("int_signal", full_pt_regs, 0),
+ NS_MEM("int_very_careful", partial_pt_regs, 0),
+ NS_MEM("ptregscall_common", full_pt_regs_plus_1, 0),
+ NS_MEM("ret_from_intr", partial_pt_regs_plus_2, 0),
+ NS_MEM("stub32_clone", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_execve", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_fork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_iopl", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_rt_sigreturn", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_sigaltstack", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_sigreturn", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub32_vfork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_clone", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_execve", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_fork", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_iopl", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_rt_sigreturn", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_sigaltstack", partial_pt_regs_plus_1, 0),
+ NS_MEM("stub_vfork", partial_pt_regs_plus_1, 0),
+ NS_MEM("sysenter_auditsys", partial_pt_regs,
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
+
+ NS_MEM("paranoid_exit", error_code_full_pt_regs, 0),
+
+ NS_MEM_FROM("ia32_badsys", "ia32_sysenter_target",
+ partial_pt_regs,
+ /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ * some paths. It also stomps on RAX.
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("ia32_badsys", "ia32_cstar_target",
+ partial_pt_regs,
+ /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ * paths. It also stomps on RAX. Even more confusing, instead
+ * of storing RCX it stores RBP. WTF?
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("ia32_badsys", "ia32_syscall",
+ partial_pt_regs,
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
+ NS_MEM("ia32_badsys", partial_pt_regs, 0),
+
+#ifdef CONFIG_AUDITSYSCALL
+ NS_MEM_FROM("int_with_check", "sysexit_audit", partial_pt_regs,
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_with_check", "ia32_cstar_target", partial_pt_regs,
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+#endif
+ NS_MEM("int_with_check", no_memory, 0),
+
+ /* Various bits of code branch to int_ret_from_sys_call, with slightly
+ * different missing values in pt_regs.
+ */
+ NS_MEM_FROM("int_ret_from_sys_call", "ret_from_fork",
+ partial_pt_regs,
+ BB_SKIP(R11)),
+ NS_MEM_FROM("int_ret_from_sys_call", "stub_execve",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "stub_rt_sigreturn",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "kernel_execve",
+ partial_pt_regs,
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_syscall",
+ partial_pt_regs,
+ /* ia32_syscall only saves RDI through RCX. */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_sysenter_target",
+ partial_pt_regs,
+ /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ * some paths. It also stomps on RAX.
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_cstar_target",
+ partial_pt_regs,
+ /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ * paths. It also stomps on RAX. Even more confusing, instead
+ * of storing RCX it stores RBP. WTF?
+ */
+ BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+ NS_MEM_FROM("int_ret_from_sys_call", "ia32_badsys",
+ partial_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("int_ret_from_sys_call", partial_pt_regs, 0),
+
+#ifdef CONFIG_PREEMPT
+ NS_MEM("retint_kernel", partial_pt_regs, BB_SKIP(RAX)),
+#endif /* CONFIG_PREEMPT */
+
+ NS_MEM("retint_careful", partial_pt_regs, BB_SKIP(RAX)),
+
+ /* Horrible hack: For a brand new x86_64 task, switch_to() branches to
+ * ret_from_fork with a totally different stack state from all the
+ * other tasks that come out of switch_to(). This non-standard state
+ * cannot be represented so just ignore the branch from switch_to() to
+ * ret_from_fork. Due to inlining and linker labels, switch_to() can
+ * appear as several different function labels, including schedule,
+ * context_switch and __sched_text_start.
+ */
+ NS_MEM_FROM("ret_from_fork", "schedule", no_memory, 0),
+ NS_MEM_FROM("ret_from_fork", "__schedule", no_memory, 0),
+ NS_MEM_FROM("ret_from_fork", "__sched_text_start", no_memory, 0),
+ NS_MEM_FROM("ret_from_fork", "context_switch", no_memory, 0),
+ NS_MEM("ret_from_fork", full_pt_regs, 0),
+
+ NS_MEM_FROM("ret_from_sys_call", "ret_from_fork",
+ partial_pt_regs,
+ BB_SKIP(R11)),
+ NS_MEM("ret_from_sys_call", partial_pt_regs, 0),
+
+ NS_MEM("retint_restore_args",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_MEM("retint_swapgs",
+ partial_pt_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ /* Now the cases that pass data in registers. We do not check any
+ * memory state for these cases.
+ */
+
+ NS_REG("bad_put_user",
+ all_regs, BB_SKIP(RBX)),
+
+ NS_REG("bad_get_user",
+ all_regs, BB_SKIP(RAX) | BB_SKIP(RDX)),
+
+ NS_REG("bad_to_user",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("ia32_ptregs_common",
+ all_regs,
+ 0),
+
+ NS_REG("copy_user_generic_unrolled",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("copy_user_generic_string",
+ all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RCX)),
+
+ NS_REG("irq_return",
+ all_regs,
+ 0),
+
+ /* Finally the cases that pass data in both registers and memory.
+ */
+
+ NS("invalid_TSS", error_code, all_regs, 0, 0, 0),
+ NS("segment_not_present", error_code, all_regs, 0, 0, 0),
+ NS("alignment_check", error_code, all_regs, 0, 0, 0),
+ NS("page_fault", error_code, all_regs, 0, 0, 0),
+ NS("general_protection", error_code, all_regs, 0, 0, 0),
+ NS("error_entry", error_code_rax, all_regs, 0, BB_SKIP(RAX), -0x10),
+ NS("error_exit", error_code_full_pt_regs, no_regs, 0, 0, 0x30),
+ NS("common_interrupt", error_code, all_regs, 0, 0, -0x8),
+ NS("save_args", error_code, all_regs, 0, 0, -0x50),
+ NS("int3", no_memory, all_regs, 0, 0, -0x80),
+};
+
+static const char *bb_spurious[] = {
+ /* schedule */
+ "thread_return",
+ /* system_call */
+ "system_call_after_swapgs",
+ "system_call_fastpath",
+ "ret_from_sys_call",
+ "sysret_check",
+ "sysret_careful",
+ "sysret_signal",
+ "badsys",
+#ifdef CONFIG_AUDITSYSCALL
+ "auditsys",
+ "sysret_audit",
+#endif
+ "tracesys",
+ "int_ret_from_sys_call",
+ "int_with_check",
+ "int_careful",
+ "int_very_careful",
+ "int_signal",
+ "int_restore_rest",
+ /* common_interrupt */
+ "ret_from_intr",
+ "exit_intr",
+ "retint_with_reschedule",
+ "retint_check",
+ "retint_swapgs",
+ "retint_restore_args",
+ "restore_args",
+ "irq_return",
+ "bad_iret",
+ "retint_careful",
+ "retint_signal",
+#ifdef CONFIG_PREEMPT
+ "retint_kernel",
+#endif /* CONFIG_PREEMPT */
+ /* paranoid_exit */
+ "paranoid_swapgs",
+ "paranoid_restore",
+ "paranoid_userspace",
+ "paranoid_schedule",
+ /* error_entry */
+ "error_swapgs",
+ "error_sti",
+ "error_kernelspace",
+ /* nmi */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ "nmi_swapgs",
+ "nmi_restore",
+ "nmi_userspace",
+ "nmi_schedule",
+#endif
+ /* load_gs_index */
+ "gs_change",
+ "bad_gs",
+ /* ia32_sysenter_target */
+ "sysenter_do_call",
+ "sysenter_dispatch",
+ "sysexit_from_sys_call",
+#ifdef CONFIG_AUDITSYSCALL
+ "sysenter_auditsys",
+ "sysexit_audit",
+#endif
+ "sysenter_tracesys",
+ /* ia32_cstar_target */
+ "cstar_do_call",
+ "cstar_dispatch",
+ "sysretl_from_sys_call",
+#ifdef CONFIG_AUDITSYSCALL
+ "cstar_auditsys",
+ "sysretl_audit",
+#endif
+ "cstar_tracesys",
+ /* ia32_syscall */
+ "ia32_do_call",
+ "ia32_sysret",
+ "ia32_tracesys",
+#ifdef CONFIG_HIBERNATION
+ /* restore_image */
+ "loop",
+ "done",
+#endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_KPROBES
+ /* jprobe_return */
+ "jprobe_return_end",
+ /* kretprobe_trampoline_holder */
+ "kretprobe_trampoline",
+#endif /* CONFIG_KPROBES */
+#ifdef CONFIG_KEXEC
+ /* relocate_kernel */
+ "relocate_new_kernel",
+#endif /* CONFIG_KEXEC */
+#ifdef CONFIG_PARAVIRT_XEN
+ /* arch/i386/xen/xen-asm.S */
+ "xen_irq_enable_direct_end",
+ "xen_irq_disable_direct_end",
+ "xen_save_fl_direct_end",
+ "xen_restore_fl_direct_end",
+ "xen_iret_start_crit",
+ "iret_restore_end",
+ "xen_iret_end_crit",
+ "hyper_iret",
+#endif /* CONFIG_XEN */
+};
+
+static const char *bb_hardware_handlers[] = {
+ "system_call",
+ "common_interrupt",
+ "error_entry",
+ "debug",
+ "nmi",
+ "int3",
+ "double_fault",
+ "stack_segment",
+ "machine_check",
+ "kdb_call",
+};
+
+static int
+bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar)
+{
+ /* x86_64 interrupt stacks are 16 byte aligned and you must get the
+ * next rsp from stack, it cannot be statically calculated. Do not
+ * include the word at rsp, it is pushed by hardware but is treated as
+ * a normal software return value.
+ *
+ * When an IST switch occurs (e.g. NMI) then the saved rsp points to
+ * another stack entirely. Assume that the IST stack is 16 byte
+ * aligned and just return the size of the hardware data on this stack.
+ * The stack unwind code will take care of the stack switch.
+ */
+ kdb_machreg_t saved_rsp = *((kdb_machreg_t *)rsp + 3);
+ int hardware_pushed = saved_rsp - rsp - KDB_WORD_SIZE;
+ if (hardware_pushed < 4 * KDB_WORD_SIZE ||
+ saved_rsp < ar->stack.logical_start ||
+ saved_rsp >= ar->stack.logical_end)
+ return 4 * KDB_WORD_SIZE;
+ else
+ return hardware_pushed;
+}
+
+static void
+bb_start_block0(void)
+{
+ bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+ bb_reg_code_set_value(BBRG_R8, BBRG_R8);
+ bb_reg_code_set_value(BBRG_R9, BBRG_R9);
+ bb_reg_code_set_value(BBRG_R10, BBRG_R10);
+ bb_reg_code_set_value(BBRG_R11, BBRG_R11);
+ bb_reg_code_set_value(BBRG_R12, BBRG_R12);
+ bb_reg_code_set_value(BBRG_R13, BBRG_R13);
+ bb_reg_code_set_value(BBRG_R14, BBRG_R14);
+ bb_reg_code_set_value(BBRG_R15, BBRG_R15);
+}
+
+/* x86_64 does not have a special case for __switch_to */
+
+static void
+bb_fixup_switch_to(char *p)
+{
+}
+
+static int
+bb_asmlinkage_arch(void)
+{
+ return strncmp(bb_func_name, "__down", 6) == 0 ||
+ strncmp(bb_func_name, "__up", 4) == 0 ||
+ strncmp(bb_func_name, "stub_", 5) == 0 ||
+ strcmp(bb_func_name, "ret_from_fork") == 0 ||
+ strcmp(bb_func_name, "ptregscall_common") == 0;
+}
+
+#else /* !CONFIG_X86_64 */
+
+/* Registers that can be used to pass parameters, in the order that parameters
+ * are passed.
+ */
+
+const static enum bb_reg_code
+bb_param_reg[] = {
+ BBRG_RAX,
+ BBRG_RDX,
+ BBRG_RCX,
+};
+
+const static enum bb_reg_code
+bb_preserved_reg[] = {
+ BBRG_RBX,
+ BBRG_RBP,
+ BBRG_RSP,
+ BBRG_RSI,
+ BBRG_RDI,
+};
+
+static const struct bb_mem_contains full_pt_regs[] = {
+ { 0x18, BBRG_RAX },
+ { 0x14, BBRG_RBP },
+ { 0x10, BBRG_RDI },
+ { 0x0c, BBRG_RSI },
+ { 0x08, BBRG_RDX },
+ { 0x04, BBRG_RCX },
+ { 0x00, BBRG_RBX },
+};
+static const struct bb_mem_contains no_memory[] = {
+};
+/* Hardware has already pushed an error_code on the stack. Use undefined just
+ * to set the initial stack offset.
+ */
+static const struct bb_mem_contains error_code[] = {
+ { 0x0, BBRG_UNDEFINED },
+};
+/* rbx already pushed */
+static const struct bb_mem_contains rbx_pushed[] = {
+ { 0x0, BBRG_RBX },
+};
+#ifdef CONFIG_MATH_EMULATION
+static const struct bb_mem_contains mem_fpu_reg_round[] = {
+ { 0xc, BBRG_RBP },
+ { 0x8, BBRG_RSI },
+ { 0x4, BBRG_RDI },
+ { 0x0, BBRG_RBX },
+};
+#endif /* CONFIG_MATH_EMULATION */
+
+static const struct bb_reg_contains all_regs[] = {
+ [BBRG_RAX] = { BBRG_RAX, 0 },
+ [BBRG_RBX] = { BBRG_RBX, 0 },
+ [BBRG_RCX] = { BBRG_RCX, 0 },
+ [BBRG_RDX] = { BBRG_RDX, 0 },
+ [BBRG_RDI] = { BBRG_RDI, 0 },
+ [BBRG_RSI] = { BBRG_RSI, 0 },
+ [BBRG_RBP] = { BBRG_RBP, 0 },
+ [BBRG_RSP] = { BBRG_OSP, 0 },
+};
+static const struct bb_reg_contains no_regs[] = {
+};
+#ifdef CONFIG_MATH_EMULATION
+static const struct bb_reg_contains reg_fpu_reg_round[] = {
+ [BBRG_RBP] = { BBRG_OSP, -0x4 },
+ [BBRG_RSP] = { BBRG_OSP, -0x10 },
+};
+#endif /* CONFIG_MATH_EMULATION */
+
+static struct bb_name_state bb_special_cases[] = {
+
+ /* First the cases that pass data only in memory. We do not check any
+ * register state for these cases.
+ */
+
+ /* Simple cases, no exceptions */
+ NS_MEM("check_userspace", full_pt_regs, 0),
+ NS_MEM("device_not_available_emulate", full_pt_regs, 0),
+ NS_MEM("ldt_ss", full_pt_regs, 0),
+ NS_MEM("no_singlestep", full_pt_regs, 0),
+ NS_MEM("restore_all", full_pt_regs, 0),
+ NS_MEM("restore_nocheck", full_pt_regs, 0),
+ NS_MEM("restore_nocheck_notrace", full_pt_regs, 0),
+ NS_MEM("ret_from_exception", full_pt_regs, 0),
+ NS_MEM("ret_from_fork", full_pt_regs, 0),
+ NS_MEM("ret_from_intr", full_pt_regs, 0),
+ NS_MEM("work_notifysig", full_pt_regs, 0),
+ NS_MEM("work_pending", full_pt_regs, 0),
+
+#ifdef CONFIG_PREEMPT
+ NS_MEM("resume_kernel", full_pt_regs, 0),
+#endif /* CONFIG_PREEMPT */
+
+ NS_MEM("common_interrupt", error_code, 0),
+ NS_MEM("error_code", error_code, 0),
+
+ NS_MEM("bad_put_user", rbx_pushed, 0),
+
+ NS_MEM_FROM("resume_userspace", "syscall_badsys",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM_FROM("resume_userspace", "syscall_fault",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM_FROM("resume_userspace", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ /* Too difficult to trace through the various vm86 functions for now.
+ * They are C functions that start off with some memory state, fiddle
+ * the registers then jmp directly to resume_userspace. For the
+ * moment, just assume that they are valid and do no checks.
+ */
+ NS_FROM("resume_userspace", "do_int",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "do_sys_vm86",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "handle_vm86_fault",
+ no_memory, no_regs, 0, 0, 0),
+ NS_FROM("resume_userspace", "handle_vm86_trap",
+ no_memory, no_regs, 0, 0, 0),
+ NS_MEM("resume_userspace", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_badsys", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RBP)),
+ NS_MEM("syscall_badsys", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_call", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_call", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_exit", "syscall_trace_entry",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_exit", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_exit_work", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RAX) | BB_SKIP(RBP)),
+ NS_MEM_FROM("syscall_exit_work", "system_call",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_exit_work", full_pt_regs, 0),
+
+ NS_MEM_FROM("syscall_trace_entry", "ia32_sysenter_target",
+ full_pt_regs, BB_SKIP(RBP)),
+ NS_MEM_FROM("syscall_trace_entry", "system_call",
+ full_pt_regs, BB_SKIP(RAX)),
+ NS_MEM("syscall_trace_entry", full_pt_regs, 0),
+
+ /* Now the cases that pass data in registers. We do not check any
+ * memory state for these cases.
+ */
+
+ NS_REG("syscall_fault", all_regs, 0),
+
+ NS_REG("bad_get_user", all_regs,
+ BB_SKIP(RAX) | BB_SKIP(RDX)),
+
+ /* Finally the cases that pass data in both registers and memory.
+ */
+
+ /* This entry is redundant now because bb_fixup_switch_to() hides the
+ * jmp __switch_to case, however the entry is left here as
+ * documentation.
+ *
+ * NS("__switch_to", no_memory, no_regs, 0, 0, 0),
+ */
+
+ NS("iret_exc", no_memory, all_regs, 0, 0, 0x20),
+
+#ifdef CONFIG_MATH_EMULATION
+ NS("fpu_reg_round", mem_fpu_reg_round, reg_fpu_reg_round, 0, 0, 0),
+#endif /* CONFIG_MATH_EMULATION */
+};
+
+static const char *bb_spurious[] = {
+ /* ret_from_exception */
+ "ret_from_intr",
+ "check_userspace",
+ "resume_userspace",
+ /* resume_kernel */
+#ifdef CONFIG_PREEMPT
+ "need_resched",
+#endif /* CONFIG_PREEMPT */
+ /* ia32_sysenter_target */
+ "sysenter_past_esp",
+ /* system_call */
+ "no_singlestep",
+ "syscall_call",
+ "syscall_exit",
+ "restore_all",
+ "restore_nocheck",
+ "restore_nocheck_notrace",
+ "ldt_ss",
+ /* do not include iret_exc, it is in a .fixup section */
+ /* work_pending */
+ "work_resched",
+ "work_notifysig",
+#ifdef CONFIG_VM86
+ "work_notifysig_v86",
+#endif /* CONFIG_VM86 */
+ /* page_fault */
+ "error_code",
+ /* device_not_available */
+ "device_not_available_emulate",
+ /* debug */
+ "debug_esp_fix_insn",
+ "debug_stack_correct",
+ /* nmi */
+ "nmi_stack_correct",
+ "nmi_stack_fixup",
+ "nmi_debug_stack_check",
+ "nmi_espfix_stack",
+#ifdef CONFIG_HIBERNATION
+ /* restore_image */
+ "copy_loop",
+ "done",
+#endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_KPROBES
+ /* jprobe_return */
+ "jprobe_return_end",
+#endif /* CONFIG_KPROBES */
+#ifdef CONFIG_KEXEC
+ /* relocate_kernel */
+ "relocate_new_kernel",
+#endif /* CONFIG_KEXEC */
+#ifdef CONFIG_MATH_EMULATION
+ /* assorted *.S files in arch/i386/math_emu */
+ "Denorm_done",
+ "Denorm_shift_more_than_32",
+ "Denorm_shift_more_than_63",
+ "Denorm_shift_more_than_64",
+ "Do_unmasked_underflow",
+ "Exp_not_underflow",
+ "fpu_Arith_exit",
+ "fpu_reg_round",
+ "fpu_reg_round_signed_special_exit",
+ "fpu_reg_round_special_exit",
+ "L_accum_done",
+ "L_accum_loaded",
+ "L_accum_loop",
+ "L_arg1_larger",
+ "L_bugged",
+ "L_bugged_1",
+ "L_bugged_2",
+ "L_bugged_3",
+ "L_bugged_4",
+ "L_bugged_denorm_486",
+ "L_bugged_round24",
+ "L_bugged_round53",
+ "L_bugged_round64",
+ "LCheck_24_round_up",
+ "LCheck_53_round_up",
+ "LCheck_Round_Overflow",
+ "LCheck_truncate_24",
+ "LCheck_truncate_53",
+ "LCheck_truncate_64",
+ "LDenormal_adj_exponent",
+ "L_deNormalised",
+ "LDo_24_round_up",
+ "LDo_2nd_32_bits",
+ "LDo_2nd_div",
+ "LDo_3rd_32_bits",
+ "LDo_3rd_div",
+ "LDo_53_round_up",
+ "LDo_64_round_up",
+ "L_done",
+ "LDo_truncate_24",
+ "LDown_24",
+ "LDown_53",
+ "LDown_64",
+ "L_entry_bugged",
+ "L_error_exit",
+ "L_exactly_32",
+ "L_exception_exit",
+ "L_exit",
+ "L_exit_nuo_valid",
+ "L_exit_nuo_zero",
+ "L_exit_valid",
+ "L_extent_zero",
+ "LFirst_div_done",
+ "LFirst_div_not_1",
+ "L_Full_Division",
+ "LGreater_Half_24",
+ "LGreater_Half_53",
+ "LGreater_than_1",
+ "LLess_than_1",
+ "L_Make_denorm",
+ "L_more_31_no_low",
+ "L_more_63_no_low",
+ "L_more_than_31",
+ "L_more_than_63",
+ "L_more_than_64",
+ "L_more_than_65",
+ "L_more_than_95",
+ "L_must_be_zero",
+ "L_n_exit",
+ "L_no_adjust",
+ "L_no_bit_lost",
+ "L_no_overflow",
+ "L_no_precision_loss",
+ "L_Normalised",
+ "L_norm_bugged",
+ "L_n_shift_1",
+ "L_nuo_shift_1",
+ "L_overflow",
+ "L_precision_lost_down",
+ "L_precision_lost_up",
+ "LPrevent_2nd_overflow",
+ "LPrevent_3rd_overflow",
+ "LPseudoDenormal",
+ "L_Re_normalise",
+ "LResult_Normalised",
+ "L_round",
+ "LRound_large",
+ "LRound_nearest_24",
+ "LRound_nearest_53",
+ "LRound_nearest_64",
+ "LRound_not_small",
+ "LRound_ovfl",
+ "LRound_precision",
+ "LRound_prep",
+ "L_round_the_result",
+ "LRound_To_24",
+ "LRound_To_53",
+ "LRound_To_64",
+ "LSecond_div_done",
+ "LSecond_div_not_1",
+ "L_shift_1",
+ "L_shift_32",
+ "L_shift_65_nc",
+ "L_shift_done",
+ "Ls_less_than_32",
+ "Ls_more_than_63",
+ "Ls_more_than_95",
+ "L_Store_significand",
+ "L_subtr",
+ "LTest_over",
+ "LTruncate_53",
+ "LTruncate_64",
+ "L_underflow",
+ "L_underflow_to_zero",
+ "LUp_24",
+ "LUp_53",
+ "LUp_64",
+ "L_zero",
+ "Normalise_result",
+ "Signal_underflow",
+ "sqrt_arg_ge_2",
+ "sqrt_get_more_precision",
+ "sqrt_more_prec_large",
+ "sqrt_more_prec_ok",
+ "sqrt_more_prec_small",
+ "sqrt_near_exact",
+ "sqrt_near_exact_large",
+ "sqrt_near_exact_ok",
+ "sqrt_near_exact_small",
+ "sqrt_near_exact_x",
+ "sqrt_prelim_no_adjust",
+ "sqrt_round_result",
+ "sqrt_stage_2_done",
+ "sqrt_stage_2_error",
+ "sqrt_stage_2_finish",
+ "sqrt_stage_2_positive",
+ "sqrt_stage_3_error",
+ "sqrt_stage_3_finished",
+ "sqrt_stage_3_no_error",
+ "sqrt_stage_3_positive",
+ "Unmasked_underflow",
+ "xExp_not_underflow",
+#endif /* CONFIG_MATH_EMULATION */
+};
+
+static const char *bb_hardware_handlers[] = {
+ "ret_from_exception",
+ "system_call",
+ "work_pending",
+ "syscall_fault",
+ "page_fault",
+ "coprocessor_error",
+ "simd_coprocessor_error",
+ "device_not_available",
+ "debug",
+ "nmi",
+ "int3",
+ "overflow",
+ "bounds",
+ "invalid_op",
+ "coprocessor_segment_overrun",
+ "invalid_TSS",
+ "segment_not_present",
+ "stack_segment",
+ "general_protection",
+ "alignment_check",
+ "kdb_call",
+ "divide_error",
+ "machine_check",
+ "spurious_interrupt_bug",
+};
+
+static int
+bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar)
+{
+ return (2 * KDB_WORD_SIZE);
+}
+
+static void
+bb_start_block0(void)
+{
+ bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+}
+
+/* The i386 code that switches stack in a context switch is an extremely
+ * special case. It saves the rip pointing to a label that is not otherwise
+ * referenced, saves the current rsp then pushes a word. The magic code that
+ * resumes the new task picks up the saved rip and rsp, effectively referencing
+ * a label that otherwise is not used and ignoring the pushed word.
+ *
+ * The simplest way to handle this very strange case is to recognise jmp
+ * address <__switch_to> and treat it as a popfl instruction. This avoids
+ * terminating the block on this jmp and removes one word from the stack state,
+ * which is the end effect of all the magic code.
+ *
+ * Called with the instruction line, starting after the first ':'.
+ */
+
+static void
+bb_fixup_switch_to(char *p)
+{
+ char *p1 = p;
+ p += strspn(p, " \t"); /* start of instruction */
+ if (strncmp(p, "jmp", 3))
+ return;
+ p += strcspn(p, " \t"); /* end of instruction */
+ p += strspn(p, " \t"); /* start of address */
+ p += strcspn(p, " \t"); /* end of address */
+ p += strspn(p, " \t"); /* start of comment */
+ if (strcmp(p, "<__switch_to>") == 0)
+ strcpy(p1, "popfl");
+}
+
+static int
+bb_asmlinkage_arch(void)
+{
+ return strcmp(bb_func_name, "ret_from_exception") == 0 ||
+ strcmp(bb_func_name, "syscall_trace_entry") == 0;
+}
+
+#endif /* CONFIG_X86_64 */
+
+
+/*============================================================================*/
+/* */
+/* Common code and data. */
+/* */
+/*============================================================================*/
+
+
+/* Tracking registers by decoding the instructions is quite a bit harder than
+ * doing the same tracking using compiler generated information. Register
+ * contents can remain in the same register, they can be copied to other
+ * registers, they can be stored on stack or they can be modified/overwritten.
+ * At any one time, there are 0 or more copies of the original value that was
+ * supplied in each register on input to the current function. If a register
+ * exists in multiple places, one copy of that register is the master version,
+ * the others are temporary copies which may or may not be destroyed before the
+ * end of the function.
+ *
+ * The compiler knows which copy of a register is the master and which are
+ * temporary copies, which makes it relatively easy to track register contents
+ * as they are saved and restored. Without that compiler based knowledge, this
+ * code has to track _every_ possible copy of each register, simply because we
+ * do not know which is the master copy and which are temporary copies which
+ * may be destroyed later.
+ *
+ * It gets worse: registers that contain parameters can be copied to other
+ * registers which are then saved on stack in a lower level function. Also the
+ * stack pointer may be held in multiple registers (typically RSP and RBP)
+ * which contain different offsets from the base of the stack on entry to this
+ * function. All of which means that we have to track _all_ register
+ * movements, or at least as much as possible.
+ *
+ * Start with the basic block that contains the start of the function, by
+ * definition all registers contain their initial value. Track each
+ * instruction's effect on register contents, this includes reading from a
+ * parameter register before any write to that register, IOW the register
+ * really does contain a parameter. The register state is represented by a
+ * dynamically sized array with each entry containing :-
+ *
+ * Register name
+ * Location it is copied to (another register or stack + offset)
+ *
+ * Besides the register tracking array, we track which parameter registers are
+ * read before being written, to determine how many parameters are passed in
+ * registers. We also track which registers contain stack pointers, including
+ * their offset from the original stack pointer on entry to the function.
+ *
+ * At each exit from the current basic block (via JMP instruction or drop
+ * through), the register state is cloned to form the state on input to the
+ * target basic block and the target is marked for processing using this state.
+ * When there are multiple ways to enter a basic block (e.g. several JMP
+ * instructions referencing the same target) then there will be multiple sets
+ * of register state to form the "input" for that basic block, there is no
+ * guarantee that all paths to that block will have the same register state.
+ *
+ * As each target block is processed, all the known sets of register state are
+ * merged to form a suitable subset of the state which agrees with all the
+ * inputs. The most common case is where one path to this block copies a
+ * register to another register but another path does not, therefore the copy
+ * is only a temporary and should not be propogated into this block.
+ *
+ * If the target block already has an input state from the current transfer
+ * point and the new input state is identical to the previous input state then
+ * we have reached a steady state for the arc from the current location to the
+ * target block. Therefore there is no need to process the target block again.
+ *
+ * The steps of "process a block, create state for target block(s), pick a new
+ * target block, merge state for target block, process target block" will
+ * continue until all the state changes have propogated all the way down the
+ * basic block tree, including round any cycles in the tree. The merge step
+ * only deletes tracking entries from the input state(s), it never adds a
+ * tracking entry. Therefore the overall algorithm is guaranteed to converge
+ * to a steady state, the worst possible case is that every tracking entry into
+ * a block is deleted, which will result in an empty output state.
+ *
+ * As each instruction is decoded, it is checked to see if this is the point at
+ * which execution left this function. This can be a call to another function
+ * (actually the return address to this function) or is the instruction which
+ * was about to be executed when an interrupt occurred (including an oops).
+ * Save the register state at this point.
+ *
+ * We always know what the registers contain when execution left this function.
+ * For an interrupt, the registers are in struct pt_regs. For a call to
+ * another function, we have already deduced the register state on entry to the
+ * other function by unwinding to the start of that function. Given the
+ * register state on exit from this function plus the known register contents
+ * on entry to the next function, we can determine the stack pointer value on
+ * input to this function. That in turn lets us calculate the address of input
+ * registers that have been stored on stack, giving us the input parameters.
+ * Finally the stack pointer gives us the return address which is the exit
+ * point from the calling function, repeat the unwind process on that function.
+ *
+ * The data that tracks which registers contain input parameters is function
+ * global, not local to any basic block. To determine which input registers
+ * contain parameters, we have to decode the entire function. Otherwise an
+ * exit early in the function might not have read any parameters yet.
+ */
+
+/* Record memory contents in terms of the values that were passed to this
+ * function, IOW track which memory locations contain an input value. A memory
+ * location's contents can be undefined, it can contain an input register value
+ * or it can contain an offset from the original stack pointer.
+ *
+ * This structure is used to record register contents that have been stored in
+ * memory. Location (BBRG_OSP + 'offset_address') contains the input value
+ * from register 'value'. When 'value' is BBRG_OSP then offset_value contains
+ * the offset from the original stack pointer that was stored in this memory
+ * location. When 'value' is not BBRG_OSP then the memory location contains
+ * the original contents of an input register and offset_value is ignored.
+ *
+ * An input register 'value' can be stored in more than one register and/or in
+ * more than one memory location.
+ */
+
+struct bb_memory_contains
+{
+ short offset_address;
+ enum bb_reg_code value: 8;
+ short offset_value;
+};
+
+/* Track the register state in each basic block. */
+
+struct bb_reg_state
+{
+ /* Indexed by register value 'reg - BBRG_RAX' */
+ struct bb_reg_contains contains[KDB_INT_REGISTERS];
+ int ref_count;
+ int mem_count;
+ /* dynamic size for memory locations, see mem_count */
+ struct bb_memory_contains memory[0];
+};
+
+static struct bb_reg_state *bb_reg_state, *bb_exit_state;
+static int bb_reg_state_max, bb_reg_params, bb_memory_params;
+
+struct bb_actual
+{
+ bfd_vma value;
+ int valid;
+};
+
+/* Contains the actual hex value of a register, plus a valid bit. Indexed by
+ * register value 'reg - BBRG_RAX'
+ */
+static struct bb_actual bb_actual[KDB_INT_REGISTERS];
+
+static bfd_vma bb_func_start, bb_func_end;
+static bfd_vma bb_common_interrupt, bb_error_entry, bb_ret_from_intr,
+ bb_thread_return, bb_sync_regs, bb_save_v86_state,
+ bb__sched_text_start, bb__sched_text_end,
+ bb_save_args, bb_save_rest, bb_save_paranoid;
+
+/* Record jmp instructions, both conditional and unconditional. These form the
+ * arcs between the basic blocks. This is also used to record the state when
+ * one block drops through into the next.
+ *
+ * A bb can have multiple associated bb_jmp entries, one for each jcc
+ * instruction plus at most one bb_jmp for the drop through case. If a bb
+ * drops through to the next bb then the drop through bb_jmp entry will be the
+ * last entry in the set of bb_jmp's that are associated with the bb. This is
+ * enforced by the fact that jcc entries are added during the disassembly phase
+ * of pass 1, the drop through entries are added near the end of pass 1.
+ *
+ * At address 'from' in this block, we have a jump to address 'to'. The
+ * register state at 'from' is copied to the target block.
+ */
+
+struct bb_jmp
+{
+ bfd_vma from;
+ bfd_vma to;
+ struct bb_reg_state *state;
+ unsigned int drop_through: 1;
+};
+
+struct bb
+{
+ bfd_vma start;
+ /* The end address of a basic block is sloppy. It can be the first
+ * byte of the last instruction in the block or it can be the last byte
+ * of the block.
+ */
+ bfd_vma end;
+ unsigned int changed: 1;
+ unsigned int drop_through: 1;
+};
+
+static struct bb **bb_list, *bb_curr;
+static int bb_max, bb_count;
+
+static struct bb_jmp *bb_jmp_list;
+static int bb_jmp_max, bb_jmp_count;
+
+/* Add a new bb entry to the list. This does an insert sort. */
+
+static struct bb *
+bb_new(bfd_vma order)
+{
+ int i, j;
+ struct bb *bb, *p;
+ if (bb_giveup)
+ return NULL;
+ if (bb_count == bb_max) {
+ struct bb **bb_list_new;
+ bb_max += 10;
+ bb_list_new = debug_kmalloc(bb_max*sizeof(*bb_list_new),
+ GFP_ATOMIC);
+ if (!bb_list_new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memcpy(bb_list_new, bb_list, bb_count*sizeof(*bb_list));
+ debug_kfree(bb_list);
+ bb_list = bb_list_new;
+ }
+ bb = debug_kmalloc(sizeof(*bb), GFP_ATOMIC);
+ if (!bb) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memset(bb, 0, sizeof(*bb));
+ for (i = 0; i < bb_count; ++i) {
+ p = bb_list[i];
+ if ((p->start && p->start > order) ||
+ (p->end && p->end > order))
+ break;
+ }
+ for (j = bb_count-1; j >= i; --j)
+ bb_list[j+1] = bb_list[j];
+ bb_list[i] = bb;
+ ++bb_count;
+ return bb;
+}
+
+/* Add a new bb_jmp entry to the list. This list is not sorted. */
+
+static struct bb_jmp *
+bb_jmp_new(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ struct bb_jmp *bb_jmp;
+ if (bb_giveup)
+ return NULL;
+ if (bb_jmp_count == bb_jmp_max) {
+ struct bb_jmp *bb_jmp_list_new;
+ bb_jmp_max += 10;
+ bb_jmp_list_new =
+ debug_kmalloc(bb_jmp_max*sizeof(*bb_jmp_list_new),
+ GFP_ATOMIC);
+ if (!bb_jmp_list_new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return NULL;
+ }
+ memcpy(bb_jmp_list_new, bb_jmp_list,
+ bb_jmp_count*sizeof(*bb_jmp_list));
+ debug_kfree(bb_jmp_list);
+ bb_jmp_list = bb_jmp_list_new;
+ }
+ bb_jmp = bb_jmp_list + bb_jmp_count++;
+ bb_jmp->from = from;
+ bb_jmp->to = to;
+ bb_jmp->drop_through = drop_through;
+ bb_jmp->state = NULL;
+ return bb_jmp;
+}
+
+static void
+bb_delete(int i)
+{
+ struct bb *bb = bb_list[i];
+ memcpy(bb_list+i, bb_list+i+1, (bb_count-i-1)*sizeof(*bb_list));
+ bb_list[--bb_count] = NULL;
+ debug_kfree(bb);
+}
+
+static struct bb *
+bb_add(bfd_vma start, bfd_vma end)
+{
+ int i;
+ struct bb *bb;
+ /* Ignore basic blocks whose start address is outside the current
+ * function. These occur for call instructions and for tail recursion.
+ */
+ if (start &&
+ (start < bb_func_start || start >= bb_func_end))
+ return NULL;
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ if ((start && bb->start == start) ||
+ (end && bb->end == end))
+ return bb;
+ }
+ bb = bb_new(start ? start : end);
+ if (bb) {
+ bb->start = start;
+ bb->end = end;
+ }
+ return bb;
+}
+
+static struct bb_jmp *
+bb_jmp_add(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ int i;
+ struct bb_jmp *bb_jmp;
+ for (i = 0, bb_jmp = bb_jmp_list; i < bb_jmp_count; ++i, ++bb_jmp) {
+ if (bb_jmp->from == from &&
+ bb_jmp->to == to &&
+ bb_jmp->drop_through == drop_through)
+ return bb_jmp;
+ }
+ bb_jmp = bb_jmp_new(from, to, drop_through);
+ return bb_jmp;
+}
+
+static unsigned long bb_curr_addr, bb_exit_addr;
+static char bb_buffer[256]; /* A bit too big to go on stack */
+
+/* Computed jmp uses 'jmp *addr(,%reg,[48])' where 'addr' is the start of a
+ * table of addresses that point into the current function. Run the table and
+ * generate bb starts for each target address plus a bb_jmp from this address
+ * to the target address.
+ *
+ * Only called for 'jmp' instructions, with the pointer starting at 'jmp'.
+ */
+
+static void
+bb_pass1_computed_jmp(char *p)
+{
+ unsigned long table, scale;
+ kdb_machreg_t addr;
+ struct bb* bb;
+ p += strcspn(p, " \t"); /* end of instruction */
+ p += strspn(p, " \t"); /* start of address */
+ if (*p++ != '*')
+ return;
+ table = simple_strtoul(p, &p, 0);
+ if (strncmp(p, "(,%", 3) != 0)
+ return;
+ p += 3;
+ p += strcspn(p, ","); /* end of reg */
+ if (*p++ != ',')
+ return;
+ scale = simple_strtoul(p, &p, 0);
+ if (scale != KDB_WORD_SIZE || strcmp(p, ")"))
+ return;
+ while (!bb_giveup) {
+ if (kdb_getword(&addr, table, sizeof(addr)))
+ return;
+ if (addr < bb_func_start || addr >= bb_func_end)
+ return;
+ bb = bb_add(addr, 0);
+ if (bb)
+ bb_jmp_add(bb_curr_addr, addr, 0);
+ table += KDB_WORD_SIZE;
+ }
+}
+
+/* Pass 1, identify the start and end of each basic block */
+
+static int
+bb_dis_pass1(PTR file, const char *fmt, ...)
+{
+ int l = strlen(bb_buffer);
+ char *p;
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ va_end(ap);
+ if ((p = strchr(bb_buffer, '\n'))) {
+ *p = '\0';
+ /* ret[q], iret[q], sysexit, sysret, ud2a or jmp[q] end a
+ * block. As does a call to a function marked noret.
+ */
+ p = bb_buffer;
+ p += strcspn(p, ":");
+ if (*p++ == ':') {
+ bb_fixup_switch_to(p);
+ p += strspn(p, " \t"); /* start of instruction */
+ if (strncmp(p, "ret", 3) == 0 ||
+ strncmp(p, "iret", 4) == 0 ||
+ strncmp(p, "sysexit", 7) == 0 ||
+ strncmp(p, "sysret", 6) == 0 ||
+ strncmp(p, "ud2a", 4) == 0 ||
+ strncmp(p, "jmp", 3) == 0) {
+ if (strncmp(p, "jmp", 3) == 0)
+ bb_pass1_computed_jmp(p);
+ bb_add(0, bb_curr_addr);
+ };
+ if (strncmp(p, "call", 4) == 0) {
+ strsep(&p, " \t"); /* end of opcode */
+ if (p)
+ p += strspn(p, " \t"); /* operand(s) */
+ if (p && strchr(p, '<')) {
+ p = strchr(p, '<') + 1;
+ *strchr(p, '>') = '\0';
+ if (bb_noret(p))
+ bb_add(0, bb_curr_addr);
+ }
+ };
+ }
+ bb_buffer[0] = '\0';
+ }
+ return 0;
+}
+
+static void
+bb_printaddr_pass1(bfd_vma addr, disassemble_info *dip)
+{
+ kdb_symtab_t symtab;
+ unsigned int offset;
+ struct bb* bb;
+ /* disasm only calls the printaddr routine for the target of jmp, loop
+ * or call instructions, i.e. the start of a basic block. call is
+ * ignored by bb_add because the target address is outside the current
+ * function.
+ */
+ dip->fprintf_func(dip->stream, "0x%lx", addr);
+ kdbnearsym(addr, &symtab);
+ if (symtab.sym_name) {
+ dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ if ((offset = addr - symtab.sym_start))
+ dip->fprintf_func(dip->stream, "+0x%x", offset);
+ dip->fprintf_func(dip->stream, ">");
+ }
+ bb = bb_add(addr, 0);
+ if (bb)
+ bb_jmp_add(bb_curr_addr, addr, 0);
+}
+
+static void
+bb_pass1(void)
+{
+ int i;
+ unsigned long addr;
+ struct bb *bb;
+ struct bb_jmp *bb_jmp;
+
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: func_name %s func_start " kdb_bfd_vma_fmt0
+ " func_end " kdb_bfd_vma_fmt0 "\n",
+ __FUNCTION__,
+ bb_func_name,
+ bb_func_start,
+ bb_func_end);
+ kdb_di.fprintf_func = bb_dis_pass1;
+ kdb_di.print_address_func = bb_printaddr_pass1;
+
+ bb_add(bb_func_start, 0);
+ for (bb_curr_addr = bb_func_start;
+ bb_curr_addr < bb_func_end;
+ ++bb_curr_addr) {
+ unsigned char c;
+ if (kdb_getarea(c, bb_curr_addr)) {
+ kdb_printf("%s: unreadable function code at ",
+ __FUNCTION__);
+ kdb_symbol_print(bb_curr_addr, NULL, KDB_SP_DEFAULT);
+ kdb_printf(", giving up\n");
+ bb_giveup = 1;
+ return;
+ }
+ }
+ for (addr = bb_func_start; addr < bb_func_end; ) {
+ bb_curr_addr = addr;
+ addr += kdba_id_printinsn(addr, &kdb_di);
+ kdb_di.fprintf_func(NULL, "\n");
+ }
+ if (bb_giveup)
+ goto out;
+
+ /* Special case: a block consisting of a single instruction which is
+ * both the target of a jmp and is also an ending instruction, so we
+ * add two blocks using the same address, one as a start and one as an
+ * end, in no guaranteed order. The end must be ordered after the
+ * start.
+ */
+ for (i = 0; i < bb_count-1; ++i) {
+ struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ if (bb1->end && bb1->end == bb2->start) {
+ bb = bb_list[i+1];
+ bb_list[i+1] = bb_list[i];
+ bb_list[i] = bb;
+ }
+ }
+
+ /* Some bb have a start address, some have an end address. Collapse
+ * them into entries that have both start and end addresses. The first
+ * entry is guaranteed to have a start address.
+ */
+ for (i = 0; i < bb_count-1; ++i) {
+ struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ if (bb1->end)
+ continue;
+ if (bb2->start) {
+ bb1->end = bb2->start - 1;
+ bb1->drop_through = 1;
+ bb_jmp_add(bb1->end, bb2->start, 1);
+ } else {
+ bb1->end = bb2->end;
+ bb_delete(i+1);
+ }
+ }
+ bb = bb_list[bb_count-1];
+ if (!bb->end)
+ bb->end = bb_func_end - 1;
+
+ /* It would be nice to check that all bb have a valid start and end
+ * address but there is just too much garbage code in the kernel to do
+ * that check. Aligned functions in assembler code mean that there is
+ * space between the end of one function and the start of the next and
+ * that space contains previous code from the assembler's buffers. It
+ * looks like dead code with nothing that branches to it, so no start
+ * address. do_sys_vm86() ends with 'jmp resume_userspace' which the C
+ * compiler does not know about so gcc appends the normal exit code,
+ * again nothing branches to this dangling code.
+ *
+ * The best we can do is delete bb entries with no start address.
+ */
+ for (i = 0; i < bb_count; ++i) {
+ struct bb *bb = bb_list[i];
+ if (!bb->start)
+ bb_delete(i--);
+ }
+ for (i = 0; i < bb_count; ++i) {
+ struct bb *bb = bb_list[i];
+ if (!bb->end) {
+ kdb_printf("%s: incomplete bb state\n", __FUNCTION__);
+ bb_giveup = 1;
+ goto debug;
+ }
+ }
+
+out:
+ if (!KDB_DEBUG(BB))
+ return;
+debug:
+ kdb_printf("%s: end\n", __FUNCTION__);
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ kdb_printf(" bb[%d] start "
+ kdb_bfd_vma_fmt0
+ " end " kdb_bfd_vma_fmt0
+ " drop_through %d",
+ i, bb->start, bb->end, bb->drop_through);
+ kdb_printf("\n");
+ }
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ kdb_printf(" bb_jmp[%d] from "
+ kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d\n",
+ i, bb_jmp->from, bb_jmp->to, bb_jmp->drop_through);
+ }
+}
+
+/* Pass 2, record register changes in each basic block */
+
+/* For each opcode that we care about, indicate how it uses its operands. Most
+ * opcodes can be handled generically because they completely specify their
+ * operands in the instruction, however many opcodes have side effects such as
+ * reading or writing rax or updating rsp. Instructions that change registers
+ * that are not listed in the operands must be handled as special cases. In
+ * addition, instructions that copy registers while preserving their contents
+ * (push, pop, mov) or change the contents in a well defined way (add with an
+ * immediate, lea) must be handled as special cases in order to track the
+ * register contents.
+ *
+ * The tables below only list opcodes that are actually used in the Linux
+ * kernel, so they omit most of the floating point and all of the SSE type
+ * instructions. The operand usage entries only cater for accesses to memory
+ * and to the integer registers, accesses to floating point registers and flags
+ * are not relevant for kernel backtraces.
+ */
+
+enum bb_operand_usage {
+ BBOU_UNKNOWN = 0,
+ /* generic entries. because xchg can do any combinations of
+ * read src, write src, read dst and write dst we need to
+ * define all 16 possibilities. These are ordered by rs = 1,
+ * rd = 2, ws = 4, wd = 8, bb_usage_x*() functions rely on this
+ * order.
+ */
+ BBOU_RS = 1, /* read src */ /* 1 */
+ BBOU_RD, /* read dst */ /* 2 */
+ BBOU_RSRD, /* 3 */
+ BBOU_WS, /* write src */ /* 4 */
+ BBOU_RSWS, /* 5 */
+ BBOU_RDWS, /* 6 */
+ BBOU_RSRDWS, /* 7 */
+ BBOU_WD, /* write dst */ /* 8 */
+ BBOU_RSWD, /* 9 */
+ BBOU_RDWD, /* 10 */
+ BBOU_RSRDWD, /* 11 */
+ BBOU_WSWD, /* 12 */
+ BBOU_RSWSWD, /* 13 */
+ BBOU_RDWSWD, /* 14 */
+ BBOU_RSRDWSWD, /* 15 */
+ /* opcode specific entries */
+ BBOU_ADD,
+ BBOU_AND,
+ BBOU_CALL,
+ BBOU_CBW,
+ BBOU_CMOV,
+ BBOU_CMPXCHG,
+ BBOU_CMPXCHGD,
+ BBOU_CPUID,
+ BBOU_CWD,
+ BBOU_DIV,
+ BBOU_IDIV,
+ BBOU_IMUL,
+ BBOU_IRET,
+ BBOU_JMP,
+ BBOU_LAHF,
+ BBOU_LEA,
+ BBOU_LEAVE,
+ BBOU_LODS,
+ BBOU_LOOP,
+ BBOU_LSS,
+ BBOU_MONITOR,
+ BBOU_MOV,
+ BBOU_MOVS,
+ BBOU_MUL,
+ BBOU_MWAIT,
+ BBOU_NOP,
+ BBOU_OUTS,
+ BBOU_POP,
+ BBOU_POPF,
+ BBOU_PUSH,
+ BBOU_PUSHF,
+ BBOU_RDMSR,
+ BBOU_RDTSC,
+ BBOU_RET,
+ BBOU_SAHF,
+ BBOU_SCAS,
+ BBOU_SUB,
+ BBOU_SYSEXIT,
+ BBOU_SYSRET,
+ BBOU_WRMSR,
+ BBOU_XADD,
+ BBOU_XCHG,
+ BBOU_XOR,
+};
+
+struct bb_opcode_usage {
+ int length;
+ enum bb_operand_usage usage;
+ const char *opcode;
+};
+
+/* This table is sorted in alphabetical order of opcode, except that the
+ * trailing '"' is treated as a high value. For example, 'in' sorts after
+ * 'inc', 'bt' after 'btc'. This modified sort order ensures that shorter
+ * opcodes come after long ones. A normal sort would put 'in' first, so 'in'
+ * would match both 'inc' and 'in'. When adding any new entries to this table,
+ * be careful to put shorter entries last in their group.
+ *
+ * To automatically sort the table (in vi)
+ * Mark the first and last opcode line with 'a and 'b
+ * 'a
+ * !'bsed -e 's/"}/}}/' | LANG=C sort -t '"' -k2 | sed -e 's/}}/"}/'
+ *
+ * If a new instruction has to be added, first consider if it affects registers
+ * other than those listed in the operands. Also consider if you want to track
+ * the results of issuing the instruction, IOW can you extract useful
+ * information by looking in detail at the modified registers or memory. If
+ * either test is true then you need a special case to handle the instruction.
+ *
+ * The generic entries at the start of enum bb_operand_usage all have one thing
+ * in common, if a register or memory location is updated then that location
+ * becomes undefined, i.e. we lose track of anything that was previously saved
+ * in that location. So only use a generic BBOU_* value when the result of the
+ * instruction cannot be calculated exactly _and_ when all the affected
+ * registers are listed in the operands.
+ *
+ * Examples:
+ *
+ * 'call' does not generate a known result, but as a side effect of call,
+ * several scratch registers become undefined, so it needs a special BBOU_CALL
+ * entry.
+ *
+ * 'adc' generates a variable result, it depends on the carry flag, so 'adc'
+ * gets a generic entry. 'add' can generate an exact result (add with
+ * immediate on a register that points to the stack) or it can generate an
+ * unknown result (add a variable, or add immediate to a register that does not
+ * contain a stack pointer) so 'add' has its own BBOU_ADD entry.
+ */
+
+static const struct bb_opcode_usage
+bb_opcode_usage_all[] = {
+ {3, BBOU_RSRDWD, "adc"},
+ {3, BBOU_ADD, "add"},
+ {3, BBOU_AND, "and"},
+ {3, BBOU_RSWD, "bsf"},
+ {3, BBOU_RSWD, "bsr"},
+ {5, BBOU_RSWS, "bswap"},
+ {3, BBOU_RSRDWD, "btc"},
+ {3, BBOU_RSRDWD, "btr"},
+ {3, BBOU_RSRDWD, "bts"},
+ {2, BBOU_RSRD, "bt"},
+ {4, BBOU_CALL, "call"},
+ {4, BBOU_CBW, "cbtw"}, /* Intel cbw */
+ {3, BBOU_NOP, "clc"},
+ {3, BBOU_NOP, "cld"},
+ {7, BBOU_RS, "clflush"},
+ {4, BBOU_NOP, "clgi"},
+ {3, BBOU_NOP, "cli"},
+ {4, BBOU_CWD, "cltd"}, /* Intel cdq */
+ {4, BBOU_CBW, "cltq"}, /* Intel cdqe */
+ {4, BBOU_NOP, "clts"},
+ {4, BBOU_CMOV, "cmov"},
+ {9, BBOU_CMPXCHGD,"cmpxchg16"},
+ {8, BBOU_CMPXCHGD,"cmpxchg8"},
+ {7, BBOU_CMPXCHG, "cmpxchg"},
+ {3, BBOU_RSRD, "cmp"},
+ {5, BBOU_CPUID, "cpuid"},
+ {4, BBOU_CWD, "cqto"}, /* Intel cdo */
+ {4, BBOU_CWD, "cwtd"}, /* Intel cwd */
+ {4, BBOU_CBW, "cwtl"}, /* Intel cwde */
+ {4, BBOU_NOP, "data"}, /* alternative ASM_NOP<n> generates data16 on x86_64 */
+ {3, BBOU_RSWS, "dec"},
+ {3, BBOU_DIV, "div"},
+ {5, BBOU_RS, "fdivl"},
+ {5, BBOU_NOP, "finit"},
+ {6, BBOU_RS, "fistpl"},
+ {4, BBOU_RS, "fldl"},
+ {4, BBOU_RS, "fmul"},
+ {6, BBOU_NOP, "fnclex"},
+ {6, BBOU_NOP, "fninit"},
+ {6, BBOU_RS, "fnsave"},
+ {7, BBOU_NOP, "fnsetpm"},
+ {6, BBOU_RS, "frstor"},
+ {5, BBOU_WS, "fstsw"},
+ {5, BBOU_RS, "fsubp"},
+ {5, BBOU_NOP, "fwait"},
+ {7, BBOU_RS, "fxrstor"},
+ {6, BBOU_RS, "fxsave"},
+ {3, BBOU_NOP, "hlt"},
+ {4, BBOU_IDIV, "idiv"},
+ {4, BBOU_IMUL, "imul"},
+ {3, BBOU_RSWS, "inc"},
+ {3, BBOU_NOP, "int"},
+ {7, BBOU_RSRD, "invlpga"},
+ {6, BBOU_RS, "invlpg"},
+ {2, BBOU_RSWD, "in"},
+ {4, BBOU_IRET, "iret"},
+ {1, BBOU_JMP, "j"},
+ {4, BBOU_LAHF, "lahf"},
+ {3, BBOU_RSWD, "lar"},
+ {5, BBOU_RS, "lcall"},
+ {5, BBOU_LEAVE, "leave"},
+ {3, BBOU_LEA, "lea"},
+ {6, BBOU_NOP, "lfence"},
+ {4, BBOU_RS, "lgdt"},
+ {4, BBOU_RS, "lidt"},
+ {4, BBOU_RS, "ljmp"},
+ {4, BBOU_RS, "lldt"},
+ {4, BBOU_RS, "lmsw"},
+ {4, BBOU_LODS, "lods"},
+ {4, BBOU_LOOP, "loop"},
+ {4, BBOU_NOP, "lret"},
+ {3, BBOU_RSWD, "lsl"},
+ {3, BBOU_LSS, "lss"},
+ {3, BBOU_RS, "ltr"},
+ {6, BBOU_NOP, "mfence"},
+ {7, BBOU_MONITOR, "monitor"},
+ {4, BBOU_MOVS, "movs"},
+ {3, BBOU_MOV, "mov"},
+ {3, BBOU_MUL, "mul"},
+ {5, BBOU_MWAIT, "mwait"},
+ {3, BBOU_RSWS, "neg"},
+ {3, BBOU_NOP, "nop"},
+ {3, BBOU_RSWS, "not"},
+ {2, BBOU_RSRDWD, "or"},
+ {4, BBOU_OUTS, "outs"},
+ {3, BBOU_RSRD, "out"},
+ {5, BBOU_NOP, "pause"},
+ {4, BBOU_POPF, "popf"},
+ {3, BBOU_POP, "pop"},
+ {8, BBOU_RS, "prefetch"},
+ {5, BBOU_PUSHF, "pushf"},
+ {4, BBOU_PUSH, "push"},
+ {3, BBOU_RSRDWD, "rcl"},
+ {3, BBOU_RSRDWD, "rcr"},
+ {5, BBOU_RDMSR, "rdmsr"},
+ {5, BBOU_RDMSR, "rdpmc"}, /* same side effects as rdmsr */
+ {5, BBOU_RDTSC, "rdtsc"},
+ {3, BBOU_RET, "ret"},
+ {3, BBOU_RSRDWD, "rol"},
+ {3, BBOU_RSRDWD, "ror"},
+ {4, BBOU_SAHF, "sahf"},
+ {3, BBOU_RSRDWD, "sar"},
+ {3, BBOU_RSRDWD, "sbb"},
+ {4, BBOU_SCAS, "scas"},
+ {3, BBOU_WS, "set"},
+ {6, BBOU_NOP, "sfence"},
+ {4, BBOU_WS, "sgdt"},
+ {3, BBOU_RSRDWD, "shl"},
+ {3, BBOU_RSRDWD, "shr"},
+ {4, BBOU_WS, "sidt"},
+ {4, BBOU_WS, "sldt"},
+ {3, BBOU_NOP, "stc"},
+ {3, BBOU_NOP, "std"},
+ {4, BBOU_NOP, "stgi"},
+ {3, BBOU_NOP, "sti"},
+ {4, BBOU_SCAS, "stos"},
+ {4, BBOU_WS, "strl"},
+ {3, BBOU_WS, "str"},
+ {3, BBOU_SUB, "sub"},
+ {6, BBOU_NOP, "swapgs"},
+ {7, BBOU_SYSEXIT, "sysexit"},
+ {6, BBOU_SYSRET, "sysret"},
+ {4, BBOU_NOP, "test"},
+ {4, BBOU_NOP, "ud2a"},
+ {7, BBOU_RS, "vmclear"},
+ {8, BBOU_NOP, "vmlaunch"},
+ {6, BBOU_RS, "vmload"},
+ {7, BBOU_RS, "vmptrld"},
+ {6, BBOU_WD, "vmread"}, /* vmread src is an encoding, not a register */
+ {8, BBOU_NOP, "vmresume"},
+ {5, BBOU_RS, "vmrun"},
+ {6, BBOU_RS, "vmsave"},
+ {7, BBOU_WD, "vmwrite"}, /* vmwrite src is an encoding, not a register */
+ {3, BBOU_NOP, "vmxoff"},
+ {6, BBOU_NOP, "wbinvd"},
+ {5, BBOU_WRMSR, "wrmsr"},
+ {4, BBOU_XADD, "xadd"},
+ {4, BBOU_XCHG, "xchg"},
+ {3, BBOU_XOR, "xor"},
+ {4, BBOU_NOP, "xrstor"},
+ {4, BBOU_NOP, "xsave"},
+ {10, BBOU_WS, "xstore-rng"},
+};
+
+/* To speed up searching, index bb_opcode_usage_all by the first letter of each
+ * opcode.
+ */
+static struct {
+ const struct bb_opcode_usage *opcode;
+ int size;
+} bb_opcode_usage[26];
+
+struct bb_operand {
+ char *base;
+ char *index;
+ char *segment;
+ long disp;
+ unsigned int scale;
+ enum bb_reg_code base_rc; /* UNDEFINED or RAX through R15 */
+ enum bb_reg_code index_rc; /* UNDEFINED or RAX through R15 */
+ unsigned int present :1;
+ unsigned int disp_present :1;
+ unsigned int indirect :1; /* must be combined with reg or memory */
+ unsigned int immediate :1; /* exactly one of these 3 must be set */
+ unsigned int reg :1;
+ unsigned int memory :1;
+};
+
+struct bb_decode {
+ char *prefix;
+ char *opcode;
+ const struct bb_opcode_usage *match;
+ struct bb_operand src;
+ struct bb_operand dst;
+ struct bb_operand dst2;
+};
+
+static struct bb_decode bb_decode;
+
+static enum bb_reg_code
+bb_reg_map(const char *reg)
+{
+ int lo, hi, c;
+ const struct bb_reg_code_map *p;
+ lo = 0;
+ hi = ARRAY_SIZE(bb_reg_code_map) - 1;
+ while (lo <= hi) {
+ int mid = (hi + lo) / 2;
+ p = bb_reg_code_map + mid;
+ c = strcmp(p->name, reg+1);
+ if (c == 0)
+ return p->reg;
+ else if (c > 0)
+ hi = mid - 1;
+ else
+ lo = mid + 1;
+ }
+ return BBRG_UNDEFINED;
+}
+
+static void
+bb_parse_operand(char *str, struct bb_operand *operand)
+{
+ char *p = str;
+ int sign = 1;
+ operand->present = 1;
+ /* extract any segment prefix */
+ if (p[0] == '%' && p[1] && p[2] == 's' && p[3] == ':') {
+ operand->memory = 1;
+ operand->segment = p;
+ p[3] = '\0';
+ p += 4;
+ }
+ /* extract displacement, base, index, scale */
+ if (*p == '*') {
+ /* jmp/call *disp(%reg), *%reg or *0xnnn */
+ operand->indirect = 1;
+ ++p;
+ }
+ if (*p == '-') {
+ sign = -1;
+ ++p;
+ }
+ if (*p == '$') {
+ operand->immediate = 1;
+ operand->disp_present = 1;
+ operand->disp = simple_strtoul(p+1, &p, 0);
+ } else if (isdigit(*p)) {
+ operand->memory = 1;
+ operand->disp_present = 1;
+ operand->disp = simple_strtoul(p, &p, 0) * sign;
+ }
+ if (*p == '%') {
+ operand->reg = 1;
+ operand->base = p;
+ } else if (*p == '(') {
+ operand->memory = 1;
+ operand->base = ++p;
+ p += strcspn(p, ",)");
+ if (p == operand->base)
+ operand->base = NULL;
+ if (*p == ',') {
+ *p = '\0';
+ operand->index = ++p;
+ p += strcspn(p, ",)");
+ if (p == operand->index)
+ operand->index = NULL;
+ }
+ if (*p == ',') {
+ *p = '\0';
+ operand->scale = simple_strtoul(p+1, &p, 0);
+ }
+ *p = '\0';
+ } else if (*p) {
+ kdb_printf("%s: unexpected token '%c' after disp '%s'\n",
+ __FUNCTION__, *p, str);
+ bb_giveup = 1;
+ }
+ if ((operand->immediate + operand->reg + operand->memory != 1) ||
+ (operand->indirect && operand->immediate)) {
+ kdb_printf("%s: incorrect decode '%s' N %d I %d R %d M %d\n",
+ __FUNCTION__, str,
+ operand->indirect, operand->immediate, operand->reg,
+ operand->memory);
+ bb_giveup = 1;
+ }
+ if (operand->base)
+ operand->base_rc = bb_reg_map(operand->base);
+ if (operand->index)
+ operand->index_rc = bb_reg_map(operand->index);
+}
+
+static void
+bb_print_operand(const char *type, const struct bb_operand *operand)
+{
+ if (!operand->present)
+ return;
+ kdb_printf(" %s %c%c: ",
+ type,
+ operand->indirect ? 'N' : ' ',
+ operand->immediate ? 'I' :
+ operand->reg ? 'R' :
+ operand->memory ? 'M' :
+ '?'
+ );
+ if (operand->segment)
+ kdb_printf("%s:", operand->segment);
+ if (operand->immediate) {
+ kdb_printf("$0x%lx", operand->disp);
+ } else if (operand->reg) {
+ if (operand->indirect)
+ kdb_printf("*");
+ kdb_printf("%s", operand->base);
+ } else if (operand->memory) {
+ if (operand->indirect && (operand->base || operand->index))
+ kdb_printf("*");
+ if (operand->disp_present) {
+ kdb_printf("0x%lx", operand->disp);
+ }
+ if (operand->base || operand->index || operand->scale) {
+ kdb_printf("(");
+ if (operand->base)
+ kdb_printf("%s", operand->base);
+ if (operand->index || operand->scale)
+ kdb_printf(",");
+ if (operand->index)
+ kdb_printf("%s", operand->index);
+ if (operand->scale)
+ kdb_printf(",%d", operand->scale);
+ kdb_printf(")");
+ }
+ }
+ if (operand->base_rc)
+ kdb_printf(" base_rc %d (%s)",
+ operand->base_rc, bbrg_name[operand->base_rc]);
+ if (operand->index_rc)
+ kdb_printf(" index_rc %d (%s)",
+ operand->index_rc,
+ bbrg_name[operand->index_rc]);
+ kdb_printf("\n");
+}
+
+static void
+bb_print_opcode(void)
+{
+ const struct bb_opcode_usage *o = bb_decode.match;
+ kdb_printf(" ");
+ if (bb_decode.prefix)
+ kdb_printf("%s ", bb_decode.prefix);
+ kdb_printf("opcode '%s' matched by '%s', usage %d\n",
+ bb_decode.opcode, o->opcode, o->usage);
+}
+
+static int
+bb_parse_opcode(void)
+{
+ int c, i;
+ const struct bb_opcode_usage *o;
+ static int bb_parse_opcode_error_limit = 5;
+ c = bb_decode.opcode[0] - 'a';
+ if (c < 0 || c >= ARRAY_SIZE(bb_opcode_usage))
+ goto nomatch;
+ o = bb_opcode_usage[c].opcode;
+ if (!o)
+ goto nomatch;
+ for (i = 0; i < bb_opcode_usage[c].size; ++i, ++o) {
+ if (strncmp(bb_decode.opcode, o->opcode, o->length) == 0) {
+ bb_decode.match = o;
+ if (KDB_DEBUG(BB))
+ bb_print_opcode();
+ return 0;
+ }
+ }
+nomatch:
+ if (!bb_parse_opcode_error_limit)
+ return 1;
+ --bb_parse_opcode_error_limit;
+ kdb_printf("%s: no match at [%s]%s " kdb_bfd_vma_fmt0 " - '%s'\n",
+ __FUNCTION__,
+ bb_mod_name, bb_func_name, bb_curr_addr,
+ bb_decode.opcode);
+ return 1;
+}
+
+static bool
+bb_is_int_reg(enum bb_reg_code reg)
+{
+ return reg >= BBRG_RAX && reg < (BBRG_RAX + KDB_INT_REGISTERS);
+}
+
+static bool
+bb_is_simple_memory(const struct bb_operand *operand)
+{
+ return operand->memory &&
+ bb_is_int_reg(operand->base_rc) &&
+ !operand->index_rc &&
+ operand->scale == 0 &&
+ !operand->segment;
+}
+
+static bool
+bb_is_static_disp(const struct bb_operand *operand)
+{
+ return operand->memory &&
+ !operand->base_rc &&
+ !operand->index_rc &&
+ operand->scale == 0 &&
+ !operand->segment &&
+ !operand->indirect;
+}
+
+static enum bb_reg_code
+bb_reg_code_value(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_reg_state->contains[reg - BBRG_RAX].value;
+}
+
+static short
+bb_reg_code_offset(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_reg_state->contains[reg - BBRG_RAX].offset;
+}
+
+static void
+bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src)
+{
+ BB_CHECK(!bb_is_int_reg(dst), dst, );
+ bb_reg_state->contains[dst - BBRG_RAX].value = src;
+}
+
+static void
+bb_reg_code_set_offset(enum bb_reg_code dst, short offset)
+{
+ BB_CHECK(!bb_is_int_reg(dst), dst, );
+ bb_reg_state->contains[dst - BBRG_RAX].offset = offset;
+}
+
+static bool
+bb_is_osp_defined(enum bb_reg_code reg)
+{
+ if (bb_is_int_reg(reg))
+ return bb_reg_code_value(reg) == BBRG_OSP;
+ else
+ return 0;
+}
+
+static bfd_vma
+bb_actual_value(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_actual[reg - BBRG_RAX].value;
+}
+
+static int
+bb_actual_valid(enum bb_reg_code reg)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ return bb_actual[reg - BBRG_RAX].valid;
+}
+
+static void
+bb_actual_set_value(enum bb_reg_code reg, bfd_vma value)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, );
+ bb_actual[reg - BBRG_RAX].value = value;
+}
+
+static void
+bb_actual_set_valid(enum bb_reg_code reg, int valid)
+{
+ BB_CHECK(!bb_is_int_reg(reg), reg, );
+ bb_actual[reg - BBRG_RAX].valid = valid;
+}
+
+/* The scheduler code switches RSP then does PUSH, it is not an error for RSP
+ * to be undefined in this area of the code.
+ */
+static bool
+bb_is_scheduler_address(void)
+{
+ return bb_curr_addr >= bb__sched_text_start &&
+ bb_curr_addr < bb__sched_text_end;
+}
+
+static void
+bb_reg_read(enum bb_reg_code reg)
+{
+ int i, r = 0;
+ if (!bb_is_int_reg(reg) ||
+ bb_reg_code_value(reg) != reg)
+ return;
+ for (i = 0;
+ i < min_t(unsigned int, REGPARM, ARRAY_SIZE(bb_param_reg));
+ ++i) {
+ if (reg == bb_param_reg[i]) {
+ r = i + 1;
+ break;
+ }
+ }
+ bb_reg_params = max(bb_reg_params, r);
+}
+
+static void
+bb_do_reg_state_print(const struct bb_reg_state *s)
+{
+ int i, offset_address, offset_value;
+ const struct bb_memory_contains *c;
+ enum bb_reg_code value;
+ kdb_printf(" bb_reg_state %p\n", s);
+ for (i = 0; i < ARRAY_SIZE(s->contains); ++i) {
+ value = s->contains[i].value;
+ offset_value = s->contains[i].offset;
+ kdb_printf(" %s = %s",
+ bbrg_name[i + BBRG_RAX], bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ kdb_printf("\n");
+ }
+ for (i = 0, c = s->memory; i < s->mem_count; ++i, ++c) {
+ offset_address = c->offset_address;
+ value = c->value;
+ offset_value = c->offset_value;
+ kdb_printf(" slot %d offset_address %c0x%x %s",
+ i,
+ offset_address >= 0 ? '+' : '-',
+ offset_address >= 0 ? offset_address : -offset_address,
+ bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ kdb_printf("\n");
+ }
+}
+
+static void
+bb_reg_state_print(const struct bb_reg_state *s)
+{
+ if (KDB_DEBUG(BB))
+ bb_do_reg_state_print(s);
+}
+
+/* Set register 'dst' to contain the value from 'src'. This includes reading
+ * from 'src' and writing to 'dst'. The offset value is copied iff 'src'
+ * contains a stack pointer.
+ *
+ * Be very careful about the context here. 'dst' and 'src' reflect integer
+ * registers by name, _not_ by the value of their contents. "mov %rax,%rsi"
+ * will call this function as bb_reg_set_reg(BBRG_RSI, BBRG_RAX), which
+ * reflects what the assembler code is doing. However we need to track the
+ * _values_ in the registers, not their names. IOW, we really care about "what
+ * value does rax contain when it is copied into rsi?", so we can record the
+ * fact that we now have two copies of that value, one in rax and one in rsi.
+ */
+
+static void
+bb_reg_set_reg(enum bb_reg_code dst, enum bb_reg_code src)
+{
+ enum bb_reg_code src_value = BBRG_UNDEFINED;
+ short offset_value = 0;
+ KDB_DEBUG_BB(" %s = %s", bbrg_name[dst], bbrg_name[src]);
+ if (bb_is_int_reg(src)) {
+ bb_reg_read(src);
+ src_value = bb_reg_code_value(src);
+ KDB_DEBUG_BB(" (%s", bbrg_name[src_value]);
+ if (bb_is_osp_defined(src)) {
+ offset_value = bb_reg_code_offset(src);
+ KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ }
+ KDB_DEBUG_BB(")");
+ }
+ if (bb_is_int_reg(dst)) {
+ bb_reg_code_set_value(dst, src_value);
+ bb_reg_code_set_offset(dst, offset_value);
+ }
+ KDB_DEBUG_BB("\n");
+}
+
+static void
+bb_reg_set_undef(enum bb_reg_code dst)
+{
+ bb_reg_set_reg(dst, BBRG_UNDEFINED);
+}
+
+/* Delete any record of a stored register held in osp + 'offset' */
+
+static void
+bb_delete_memory(short offset)
+{
+ int i;
+ struct bb_memory_contains *c;
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++c) {
+ if (c->offset_address == offset &&
+ c->value != BBRG_UNDEFINED) {
+ KDB_DEBUG_BB(" delete %s from ",
+ bbrg_name[c->value]);
+ KDB_DEBUG_BB_OFFSET(offset, "osp", "");
+ KDB_DEBUG_BB(" slot %d\n",
+ (int)(c - bb_reg_state->memory));
+ memset(c, BBRG_UNDEFINED, sizeof(*c));
+ if (i == bb_reg_state->mem_count - 1)
+ --bb_reg_state->mem_count;
+ }
+ }
+}
+
+/* Set memory location *('dst' + 'offset_address') to contain the supplied
+ * value and offset. 'dst' is assumed to be a register that contains a stack
+ * pointer.
+ */
+
+static void
+bb_memory_set_reg_value(enum bb_reg_code dst, short offset_address,
+ enum bb_reg_code value, short offset_value)
+{
+ int i;
+ struct bb_memory_contains *c, *free = NULL;
+ BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ KDB_DEBUG_BB(" *(%s", bbrg_name[dst]);
+ KDB_DEBUG_BB_OFFSET(offset_address, "", "");
+ offset_address += bb_reg_code_offset(dst);
+ KDB_DEBUG_BB_OFFSET(offset_address, " osp", ") = ");
+ KDB_DEBUG_BB("%s", bbrg_name[value]);
+ if (value == BBRG_OSP)
+ KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state_max;
+ ++i, ++c) {
+ if (c->offset_address == offset_address)
+ free = c;
+ else if (c->value == BBRG_UNDEFINED && !free)
+ free = c;
+ }
+ if (!free) {
+ struct bb_reg_state *new, *old = bb_reg_state;
+ size_t old_size, new_size;
+ int slot;
+ old_size = sizeof(*old) + bb_reg_state_max *
+ sizeof(old->memory[0]);
+ slot = bb_reg_state_max;
+ bb_reg_state_max += 5;
+ new_size = sizeof(*new) + bb_reg_state_max *
+ sizeof(new->memory[0]);
+ new = debug_kmalloc(new_size, GFP_ATOMIC);
+ if (!new) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ } else {
+ memcpy(new, old, old_size);
+ memset((char *)new + old_size, BBRG_UNDEFINED,
+ new_size - old_size);
+ bb_reg_state = new;
+ debug_kfree(old);
+ free = bb_reg_state->memory + slot;
+ }
+ }
+ if (free) {
+ int slot = free - bb_reg_state->memory;
+ free->offset_address = offset_address;
+ free->value = value;
+ free->offset_value = offset_value;
+ KDB_DEBUG_BB(" slot %d", slot);
+ bb_reg_state->mem_count = max(bb_reg_state->mem_count, slot+1);
+ }
+ KDB_DEBUG_BB("\n");
+}
+
+/* Set memory location *('dst' + 'offset') to contain the value from register
+ * 'src'. 'dst' is assumed to be a register that contains a stack pointer.
+ * This differs from bb_memory_set_reg_value because it takes a src register
+ * which contains a value and possibly an offset, bb_memory_set_reg_value is
+ * passed the value and offset directly.
+ */
+
+static void
+bb_memory_set_reg(enum bb_reg_code dst, enum bb_reg_code src,
+ short offset_address)
+{
+ int offset_value;
+ enum bb_reg_code value;
+ BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ if (!bb_is_int_reg(src))
+ return;
+ value = bb_reg_code_value(src);
+ if (value == BBRG_UNDEFINED) {
+ bb_delete_memory(offset_address + bb_reg_code_offset(dst));
+ return;
+ }
+ offset_value = bb_reg_code_offset(src);
+ bb_reg_read(src);
+ bb_memory_set_reg_value(dst, offset_address, value, offset_value);
+}
+
+/* Set register 'dst' to contain the value from memory *('src' + offset_address).
+ * 'src' is assumed to be a register that contains a stack pointer.
+ */
+
+static void
+bb_reg_set_memory(enum bb_reg_code dst, enum bb_reg_code src, short offset_address)
+{
+ int i, defined = 0;
+ struct bb_memory_contains *s;
+ BB_CHECK(!bb_is_osp_defined(src), src, );
+ KDB_DEBUG_BB(" %s = *(%s",
+ bbrg_name[dst], bbrg_name[src]);
+ KDB_DEBUG_BB_OFFSET(offset_address, "", ")");
+ offset_address += bb_reg_code_offset(src);
+ KDB_DEBUG_BB_OFFSET(offset_address, " (osp", ")");
+ for (i = 0, s = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++s) {
+ if (s->offset_address == offset_address && bb_is_int_reg(dst)) {
+ bb_reg_code_set_value(dst, s->value);
+ KDB_DEBUG_BB(" value %s", bbrg_name[s->value]);
+ if (s->value == BBRG_OSP) {
+ bb_reg_code_set_offset(dst, s->offset_value);
+ KDB_DEBUG_BB_OFFSET(s->offset_value, "", "");
+ } else {
+ bb_reg_code_set_offset(dst, 0);
+ }
+ defined = 1;
+ }
+ }
+ if (!defined)
+ bb_reg_set_reg(dst, BBRG_UNDEFINED);
+ else
+ KDB_DEBUG_BB("\n");
+}
+
+/* A generic read from an operand. */
+
+static void
+bb_read_operand(const struct bb_operand *operand)
+{
+ int m = 0;
+ if (operand->base_rc)
+ bb_reg_read(operand->base_rc);
+ if (operand->index_rc)
+ bb_reg_read(operand->index_rc);
+ if (bb_is_simple_memory(operand) &&
+ bb_is_osp_defined(operand->base_rc) &&
+ bb_decode.match->usage != BBOU_LEA) {
+ m = (bb_reg_code_offset(operand->base_rc) + operand->disp +
+ KDB_WORD_SIZE - 1) / KDB_WORD_SIZE;
+ bb_memory_params = max(bb_memory_params, m);
+ }
+}
+
+/* A generic write to an operand, resulting in an undefined value in that
+ * location. All well defined operands are handled separately, this function
+ * only handles the opcodes where the result is undefined.
+ */
+
+static void
+bb_write_operand(const struct bb_operand *operand)
+{
+ enum bb_reg_code base_rc = operand->base_rc;
+ if (operand->memory) {
+ if (base_rc)
+ bb_reg_read(base_rc);
+ if (operand->index_rc)
+ bb_reg_read(operand->index_rc);
+ } else if (operand->reg && base_rc) {
+ bb_reg_set_undef(base_rc);
+ }
+ if (bb_is_simple_memory(operand) && bb_is_osp_defined(base_rc)) {
+ int offset;
+ offset = bb_reg_code_offset(base_rc) + operand->disp;
+ offset = ALIGN(offset - KDB_WORD_SIZE + 1, KDB_WORD_SIZE);
+ bb_delete_memory(offset);
+ }
+}
+
+/* Adjust a register that contains a stack pointer */
+
+static void
+bb_adjust_osp(enum bb_reg_code reg, int adjust)
+{
+ int offset = bb_reg_code_offset(reg), old_offset = offset;
+ KDB_DEBUG_BB(" %s osp offset ", bbrg_name[reg]);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", " -> ");
+ offset += adjust;
+ bb_reg_code_set_offset(reg, offset);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", "\n");
+ /* When RSP is adjusted upwards, it invalidates any memory
+ * stored between the old and current stack offsets.
+ */
+ if (reg == BBRG_RSP) {
+ while (old_offset < bb_reg_code_offset(reg)) {
+ bb_delete_memory(old_offset);
+ old_offset += KDB_WORD_SIZE;
+ }
+ }
+}
+
+/* The current instruction adjusts a register that contains a stack pointer.
+ * Direction is 1 or -1, depending on whether the instruction is add/lea or
+ * sub.
+ */
+
+static void
+bb_adjust_osp_instruction(int direction)
+{
+ enum bb_reg_code dst_reg = bb_decode.dst.base_rc;
+ if (bb_decode.src.immediate ||
+ bb_decode.match->usage == BBOU_LEA /* lea has its own checks */) {
+ int adjust = direction * bb_decode.src.disp;
+ bb_adjust_osp(dst_reg, adjust);
+ } else {
+ /* variable stack adjustment, osp offset is not well defined */
+ KDB_DEBUG_BB(" %s osp offset ", bbrg_name[dst_reg]);
+ KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(dst_reg), "", " -> undefined\n");
+ bb_reg_code_set_value(dst_reg, BBRG_UNDEFINED);
+ bb_reg_code_set_offset(dst_reg, 0);
+ }
+}
+
+/* Some instructions using memory have an explicit length suffix (b, w, l, q).
+ * The equivalent instructions using a register imply the length from the
+ * register name. Deduce the operand length.
+ */
+
+static int
+bb_operand_length(const struct bb_operand *operand, char opcode_suffix)
+{
+ int l = 0;
+ switch (opcode_suffix) {
+ case 'b':
+ l = 8;
+ break;
+ case 'w':
+ l = 16;
+ break;
+ case 'l':
+ l = 32;
+ break;
+ case 'q':
+ l = 64;
+ break;
+ }
+ if (l == 0 && operand->reg) {
+ switch (strlen(operand->base)) {
+ case 3:
+ switch (operand->base[2]) {
+ case 'h':
+ case 'l':
+ l = 8;
+ break;
+ default:
+ l = 16;
+ break;
+ }
+ case 4:
+ if (operand->base[1] == 'r')
+ l = 64;
+ else
+ l = 32;
+ break;
+ }
+ }
+ return l;
+}
+
+static int
+bb_reg_state_size(const struct bb_reg_state *state)
+{
+ return sizeof(*state) +
+ state->mem_count * sizeof(state->memory[0]);
+}
+
+/* Canonicalize the current bb_reg_state so it can be compared against
+ * previously created states. Sort the memory entries in descending order of
+ * offset_address (stack grows down). Empty slots are moved to the end of the
+ * list and trimmed.
+ */
+
+static void
+bb_reg_state_canonicalize(void)
+{
+ int i, order, changed;
+ struct bb_memory_contains *p1, *p2, temp;
+ do {
+ changed = 0;
+ for (i = 0, p1 = bb_reg_state->memory;
+ i < bb_reg_state->mem_count-1;
+ ++i, ++p1) {
+ p2 = p1 + 1;
+ if (p2->value == BBRG_UNDEFINED) {
+ order = 0;
+ } else if (p1->value == BBRG_UNDEFINED) {
+ order = 1;
+ } else if (p1->offset_address < p2->offset_address) {
+ order = 1;
+ } else if (p1->offset_address > p2->offset_address) {
+ order = -1;
+ } else {
+ order = 0;
+ }
+ if (order > 0) {
+ temp = *p2;
+ *p2 = *p1;
+ *p1 = temp;
+ changed = 1;
+ }
+ }
+ } while(changed);
+ for (i = 0, p1 = bb_reg_state->memory;
+ i < bb_reg_state_max;
+ ++i, ++p1) {
+ if (p1->value != BBRG_UNDEFINED)
+ bb_reg_state->mem_count = i + 1;
+ }
+ bb_reg_state_print(bb_reg_state);
+}
+
+static int
+bb_special_case(bfd_vma to)
+{
+ int i, j, rsp_offset, expect_offset, offset, errors = 0, max_errors = 40;
+ enum bb_reg_code reg, expect_value, value;
+ struct bb_name_state *r;
+
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (to == r->address &&
+ (r->fname == NULL || strcmp(bb_func_name, r->fname) == 0))
+ goto match;
+ }
+ /* Some inline assembler code has jumps to .fixup sections which result
+ * in out of line transfers with undefined state, ignore them.
+ */
+ if (strcmp(bb_func_name, "strnlen_user") == 0 ||
+ strcmp(bb_func_name, "copy_from_user") == 0)
+ return 1;
+ return 0;
+
+match:
+ /* Check the running registers match */
+ for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ expect_value = r->regs[reg].value;
+ if (test_bit(expect_value, r->skip_regs.bits)) {
+ /* this regs entry is not defined for this label */
+ continue;
+ }
+ if (expect_value == BBRG_UNDEFINED)
+ continue;
+ expect_offset = r->regs[reg].offset;
+ value = bb_reg_code_value(reg);
+ offset = bb_reg_code_offset(reg);
+ if (expect_value == value &&
+ (value != BBRG_OSP || r->osp_offset == offset))
+ continue;
+ kdb_printf("%s: Expected %s to contain %s",
+ __FUNCTION__,
+ bbrg_name[reg],
+ bbrg_name[expect_value]);
+ if (r->osp_offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(r->osp_offset, "", "");
+ kdb_printf(". It actually contains %s", bbrg_name[value]);
+ if (offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ kdb_printf("\n");
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ /* Check that any memory data on stack matches */
+ i = j = 0;
+ while (i < bb_reg_state->mem_count &&
+ j < r->mem_size) {
+ expect_value = r->mem[j].value;
+ if (test_bit(expect_value, r->skip_mem.bits) ||
+ expect_value == BBRG_UNDEFINED) {
+ /* this memory slot is not defined for this label */
+ ++j;
+ continue;
+ }
+ rsp_offset = bb_reg_state->memory[i].offset_address -
+ bb_reg_code_offset(BBRG_RSP);
+ if (rsp_offset >
+ r->mem[j].offset_address) {
+ /* extra slots in memory are OK */
+ ++i;
+ } else if (rsp_offset <
+ r->mem[j].offset_address) {
+ /* Required memory slot is missing */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "missing memory entry[%d] %s\n",
+ __FUNCTION__, j, bbrg_name[expect_value]);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ ++j;
+ } else {
+ if (bb_reg_state->memory[i].offset_value ||
+ bb_reg_state->memory[i].value != expect_value) {
+ /* memory slot is present but contains wrong
+ * value.
+ */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "wrong value in slot %d, "
+ "should be %s, it is %s\n",
+ __FUNCTION__, i,
+ bbrg_name[expect_value],
+ bbrg_name[bb_reg_state->memory[i].value]);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ ++i;
+ ++j;
+ }
+ }
+ while (j < r->mem_size) {
+ expect_value = r->mem[j].value;
+ if (test_bit(expect_value, r->skip_mem.bits) ||
+ expect_value == BBRG_UNDEFINED)
+ ++j;
+ else
+ break;
+ }
+ if (j != r->mem_size) {
+ /* Hit end of memory before testing all the pt_reg slots */
+ kdb_printf("%s: Invalid bb_reg_state.memory, "
+ "missing trailing entries\n",
+ __FUNCTION__);
+ ++errors;
+ if (max_errors-- == 0)
+ goto fail;
+ }
+ if (errors)
+ goto fail;
+ return 1;
+fail:
+ kdb_printf("%s: on transfer to %s\n", __FUNCTION__, r->name);
+ bb_giveup = 1;
+ return 1;
+}
+
+/* Transfer of control to a label outside the current function. If the
+ * transfer is to a known common code path then do a sanity check on the state
+ * at this point.
+ */
+
+static void
+bb_sanity_check(int type)
+{
+ enum bb_reg_code expect, actual;
+ int i, offset, error = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bb_preserved_reg); ++i) {
+ expect = bb_preserved_reg[i];
+ actual = bb_reg_code_value(expect);
+ offset = bb_reg_code_offset(expect);
+ if (expect == actual)
+ continue;
+ /* type == 1 is sysret/sysexit, ignore RSP */
+ if (type && expect == BBRG_RSP)
+ continue;
+ /* type == 1 is sysret/sysexit, ignore RBP for i386 */
+ /* We used to have "#ifndef CONFIG_X86_64" for the type=1 RBP
+ * test; however, x86_64 can run ia32 compatible mode and
+ * hit this problem. Perform the following test anyway!
+ */
+ if (type && expect == BBRG_RBP)
+ continue;
+ /* RSP should contain OSP+0. Except for ptregscall_common and
+ * ia32_ptregs_common, they get a partial pt_regs, fudge the
+ * stack to make it a full pt_regs then reverse the effect on
+ * exit, so the offset is -0x50 on exit.
+ */
+ if (expect == BBRG_RSP &&
+ bb_is_osp_defined(expect) &&
+ (offset == 0 ||
+ (offset == -0x50 &&
+ (strcmp(bb_func_name, "ptregscall_common") == 0 ||
+ strcmp(bb_func_name, "ia32_ptregs_common") == 0))))
+ continue;
+ /* The put_user and save_paranoid functions are special.
+ * %rbx gets clobbered */
+ if (expect == BBRG_RBX &&
+ (strncmp(bb_func_name, "__put_user_", 11) == 0 ||
+ strcmp(bb_func_name, "save_paranoid") == 0))
+ continue;
+ /* Ignore rbp and rsp for error_entry */
+ if ((strcmp(bb_func_name, "error_entry") == 0) &&
+ (expect == BBRG_RBX ||
+ (expect == BBRG_RSP && bb_is_osp_defined(expect) && offset == -0x10)))
+ continue;
+ kdb_printf("%s: Expected %s, got %s",
+ __FUNCTION__,
+ bbrg_name[expect], bbrg_name[actual]);
+ if (offset)
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ kdb_printf("\n");
+ error = 1;
+ }
+ BB_CHECK(error, error, );
+}
+
+/* Transfer of control. Follow the arc and save the current state as input to
+ * another basic block.
+ */
+
+static void
+bb_transfer(bfd_vma from, bfd_vma to, unsigned int drop_through)
+{
+ int i, found;
+ size_t size;
+ struct bb* bb = NULL; /*stupid gcc */
+ struct bb_jmp *bb_jmp;
+ struct bb_reg_state *state;
+ bb_reg_state_canonicalize();
+ found = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ if (bb_jmp->from == from &&
+ bb_jmp->to == to &&
+ bb_jmp->drop_through == drop_through) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ /* Transfer outside the current function. Check the special
+ * cases (mainly in entry.S) first. If it is not a known
+ * special case then check if the target address is the start
+ * of a function or not. If it is the start of a function then
+ * assume tail recursion and require that the state be the same
+ * as on entry. Otherwise assume out of line code (e.g.
+ * spinlock contention path) and ignore it, the state can be
+ * anything.
+ */
+ kdb_symtab_t symtab;
+ if (bb_special_case(to))
+ return;
+ kdbnearsym(to, &symtab);
+ if (symtab.sym_start != to)
+ return;
+ bb_sanity_check(0);
+ if (bb_giveup)
+ return;
+#ifdef NO_SIBLINGS
+ /* Only print this message when the kernel is compiled with
+ * -fno-optimize-sibling-calls. Otherwise it would print a
+ * message for every tail recursion call. If you see the
+ * message below then you probably have an assembler label that
+ * is not listed in the special cases.
+ */
+ kdb_printf(" not matched: from "
+ kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d bb_jmp[%d]\n",
+ from, to, drop_through, i);
+#endif /* NO_SIBLINGS */
+ return;
+ }
+ KDB_DEBUG_BB(" matched: from " kdb_bfd_vma_fmt0
+ " to " kdb_bfd_vma_fmt0
+ " drop_through %d bb_jmp[%d]\n",
+ from, to, drop_through, i);
+ found = 0;
+ for (i = 0; i < bb_count; ++i) {
+ bb = bb_list[i];
+ if (bb->start == to) {
+ found = 1;
+ break;
+ }
+ }
+ BB_CHECK(!found, to, );
+ /* If the register state for this arc has already been set (we are
+ * rescanning the block that originates the arc) and the state is the
+ * same as the previous state for this arc then this input to the
+ * target block is the same as last time, so there is no need to rescan
+ * the target block.
+ */
+ state = bb_jmp->state;
+ size = bb_reg_state_size(bb_reg_state);
+ if (state) {
+ bb_reg_state->ref_count = state->ref_count;
+ if (memcmp(state, bb_reg_state, size) == 0) {
+ KDB_DEBUG_BB(" no state change\n");
+ return;
+ }
+ if (--state->ref_count == 0)
+ debug_kfree(state);
+ bb_jmp->state = NULL;
+ }
+ /* New input state is required. To save space, check if any other arcs
+ * have the same state and reuse them where possible. The overall set
+ * of inputs to the target block is now different so the target block
+ * must be rescanned.
+ */
+ bb->changed = 1;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ state = bb_jmp_list[i].state;
+ if (!state)
+ continue;
+ bb_reg_state->ref_count = state->ref_count;
+ if (memcmp(state, bb_reg_state, size) == 0) {
+ KDB_DEBUG_BB(" reuse bb_jmp[%d]\n", i);
+ bb_jmp->state = state;
+ ++state->ref_count;
+ return;
+ }
+ }
+ state = debug_kmalloc(size, GFP_ATOMIC);
+ if (!state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(state, bb_reg_state, size);
+ state->ref_count = 1;
+ bb_jmp->state = state;
+ KDB_DEBUG_BB(" new state %p\n", state);
+}
+
+/* Isolate the processing for 'mov' so it can be used for 'xadd'/'xchg' as
+ * well.
+ *
+ * xadd/xchg expect this function to return BBOU_NOP for special cases,
+ * otherwise it returns BBOU_RSWD. All special cases must be handled entirely
+ * within this function, including doing bb_read_operand or bb_write_operand
+ * where necessary.
+ */
+
+static enum bb_operand_usage
+bb_usage_mov(const struct bb_operand *src, const struct bb_operand *dst, int l)
+{
+ int full_register_src, full_register_dst;
+ full_register_src = bb_operand_length(src, bb_decode.opcode[l])
+ == KDB_WORD_SIZE * 8;
+ full_register_dst = bb_operand_length(dst, bb_decode.opcode[l])
+ == KDB_WORD_SIZE * 8;
+ /* If both src and dst are full integer registers then record the
+ * register change.
+ */
+ if (src->reg &&
+ bb_is_int_reg(src->base_rc) &&
+ dst->reg &&
+ bb_is_int_reg(dst->base_rc) &&
+ full_register_src &&
+ full_register_dst) {
+ /* Special case for the code that switches stacks in
+ * jprobe_return. That code must modify RSP but it does it in
+ * a well defined manner. Do not invalidate RSP.
+ */
+ if (src->base_rc == BBRG_RBX &&
+ dst->base_rc == BBRG_RSP &&
+ strcmp(bb_func_name, "jprobe_return") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ /* math_abort takes the equivalent of a longjmp structure and
+ * resets the stack. Ignore this, it leaves RSP well defined.
+ */
+ if (dst->base_rc == BBRG_RSP &&
+ strcmp(bb_func_name, "math_abort") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ bb_reg_set_reg(dst->base_rc, src->base_rc);
+ return BBOU_NOP;
+ }
+ /* If the move is from a full integer register to stack then record it.
+ */
+ if (src->reg &&
+ bb_is_simple_memory(dst) &&
+ bb_is_osp_defined(dst->base_rc) &&
+ full_register_src) {
+ /* Ugly special case. Initializing list heads on stack causes
+ * false references to stack variables when the list head is
+ * used. Static code analysis cannot detect that the list head
+ * has been changed by a previous execution loop and that a
+ * basic block is only executed after the list head has been
+ * changed.
+ *
+ * These false references can result in valid stack variables
+ * being incorrectly cleared on some logic paths. Ignore
+ * stores to stack variables which point to themselves or to
+ * the previous word so the list head initialization is not
+ * recorded.
+ */
+ if (bb_is_osp_defined(src->base_rc)) {
+ int stack1 = bb_reg_code_offset(src->base_rc);
+ int stack2 = bb_reg_code_offset(dst->base_rc) +
+ dst->disp;
+ if (stack1 == stack2 ||
+ stack1 == stack2 - KDB_WORD_SIZE)
+ return BBOU_NOP;
+ }
+ bb_memory_set_reg(dst->base_rc, src->base_rc, dst->disp);
+ return BBOU_NOP;
+ }
+ /* If the move is from stack to a full integer register then record it.
+ */
+ if (bb_is_simple_memory(src) &&
+ bb_is_osp_defined(src->base_rc) &&
+ dst->reg &&
+ bb_is_int_reg(dst->base_rc) &&
+ full_register_dst) {
+#ifdef CONFIG_X86_32
+#ifndef TSS_sysenter_sp0
+#define TSS_sysenter_sp0 SYSENTER_stack_sp0
+#endif
+ /* mov from TSS_sysenter_sp0+offset to esp to fix up the
+ * sysenter stack, it leaves esp well defined. mov
+ * TSS_ysenter_sp0+offset(%esp),%esp is followed by up to 5
+ * push instructions to mimic the hardware stack push. If
+ * TSS_sysenter_sp0 is offset then only 3 words will be
+ * pushed.
+ */
+ if (dst->base_rc == BBRG_RSP &&
+ src->disp >= TSS_sysenter_sp0 &&
+ bb_is_osp_defined(BBRG_RSP)) {
+ int pushes;
+ pushes = src->disp == TSS_sysenter_sp0 ? 5 : 3;
+ bb_reg_code_set_offset(BBRG_RSP,
+ bb_reg_code_offset(BBRG_RSP) +
+ pushes * KDB_WORD_SIZE);
+ KDB_DEBUG_BB_OFFSET(
+ bb_reg_code_offset(BBRG_RSP),
+ " sysenter fixup, RSP",
+ "\n");
+ return BBOU_NOP;
+ }
+#endif /* CONFIG_X86_32 */
+ bb_read_operand(src);
+ bb_reg_set_memory(dst->base_rc, src->base_rc, src->disp);
+ return BBOU_NOP;
+ }
+ /* move %gs:0x<nn>,%rsp is used to unconditionally switch to another
+ * stack. Ignore this special case, it is handled by the stack
+ * unwinding code.
+ */
+ if (src->segment &&
+ strcmp(src->segment, "%gs") == 0 &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP)
+ return BBOU_NOP;
+ /* move %reg,%reg is a nop */
+ if (src->reg &&
+ dst->reg &&
+ !src->segment &&
+ !dst->segment &&
+ strcmp(src->base, dst->base) == 0)
+ return BBOU_NOP;
+ /* Special case for the code that switches stacks in the scheduler
+ * (switch_to()). That code must modify RSP but it does it in a well
+ * defined manner. Do not invalidate RSP.
+ */
+ if (dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ full_register_dst &&
+ bb_is_scheduler_address()) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ /* Special case for the code that switches stacks in resume from
+ * hibernation code. That code must modify RSP but it does it in a
+ * well defined manner. Do not invalidate RSP.
+ */
+ if (src->memory &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ full_register_dst &&
+ strcmp(bb_func_name, "restore_image") == 0) {
+ bb_read_operand(src);
+ return BBOU_NOP;
+ }
+ return BBOU_RSWD;
+}
+
+static enum bb_operand_usage
+bb_usage_xadd(const struct bb_operand *src, const struct bb_operand *dst)
+{
+ /* Simulate xadd as a series of instructions including mov, that way we
+ * get the benefit of all the special cases already handled by
+ * BBOU_MOV.
+ *
+ * tmp = src + dst, src = dst, dst = tmp.
+ *
+ * For tmp, pick a register that is undefined. If all registers are
+ * defined then pick one that is not being used by xadd.
+ */
+ enum bb_reg_code reg = BBRG_UNDEFINED;
+ struct bb_operand tmp;
+ struct bb_reg_contains save_tmp;
+ enum bb_operand_usage usage;
+ int undefined = 0;
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ undefined = 1;
+ break;
+ }
+ }
+ if (!undefined) {
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (reg != src->base_rc &&
+ reg != src->index_rc &&
+ reg != dst->base_rc &&
+ reg != dst->index_rc &&
+ reg != BBRG_RSP)
+ break;
+ }
+ }
+ KDB_DEBUG_BB(" %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ bb_reg_set_undef(reg);
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.present = 1;
+ tmp.reg = 1;
+ tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ if (tmp.base) {
+ tmp.base[0] = '%';
+ strcpy(tmp.base + 1, bbrg_name[reg]);
+ }
+ tmp.base_rc = reg;
+ bb_read_operand(src);
+ bb_read_operand(dst);
+ if (bb_usage_mov(src, dst, sizeof("xadd")-1) == BBOU_NOP)
+ usage = BBOU_RSRD;
+ else
+ usage = BBOU_RSRDWS;
+ bb_usage_mov(&tmp, dst, sizeof("xadd")-1);
+ KDB_DEBUG_BB(" %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ debug_kfree(tmp.base);
+ return usage;
+}
+
+static enum bb_operand_usage
+bb_usage_xchg(const struct bb_operand *src, const struct bb_operand *dst)
+{
+ /* Simulate xchg as a series of mov instructions, that way we get the
+ * benefit of all the special cases already handled by BBOU_MOV.
+ *
+ * mov dst,tmp; mov src,dst; mov tmp,src;
+ *
+ * For tmp, pick a register that is undefined. If all registers are
+ * defined then pick one that is not being used by xchg.
+ */
+ enum bb_reg_code reg = BBRG_UNDEFINED;
+ int rs = BBOU_RS, rd = BBOU_RD, ws = BBOU_WS, wd = BBOU_WD;
+ struct bb_operand tmp;
+ struct bb_reg_contains save_tmp;
+ int undefined = 0;
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ undefined = 1;
+ break;
+ }
+ }
+ if (!undefined) {
+ for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ if (reg != src->base_rc &&
+ reg != src->index_rc &&
+ reg != dst->base_rc &&
+ reg != dst->index_rc &&
+ reg != BBRG_RSP)
+ break;
+ }
+ }
+ KDB_DEBUG_BB(" %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.present = 1;
+ tmp.reg = 1;
+ tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ if (tmp.base) {
+ tmp.base[0] = '%';
+ strcpy(tmp.base + 1, bbrg_name[reg]);
+ }
+ tmp.base_rc = reg;
+ if (bb_usage_mov(dst, &tmp, sizeof("xchg")-1) == BBOU_NOP)
+ rd = 0;
+ if (bb_usage_mov(src, dst, sizeof("xchg")-1) == BBOU_NOP) {
+ rs = 0;
+ wd = 0;
+ }
+ if (bb_usage_mov(&tmp, src, sizeof("xchg")-1) == BBOU_NOP)
+ ws = 0;
+ KDB_DEBUG_BB(" %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ debug_kfree(tmp.base);
+ return rs | rd | ws | wd;
+}
+
+/* Invalidate all the scratch registers */
+
+static void
+bb_invalidate_scratch_reg(void)
+{
+ int i, j;
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ for (j = 0; j < ARRAY_SIZE(bb_preserved_reg); ++j) {
+ if (i == bb_preserved_reg[j])
+ goto preserved;
+ }
+ bb_reg_set_undef(i);
+preserved:
+ continue;
+ }
+}
+
+static void
+bb_pass2_computed_jmp(const struct bb_operand *src)
+{
+ unsigned long table = src->disp;
+ kdb_machreg_t addr;
+ while (!bb_giveup) {
+ if (kdb_getword(&addr, table, sizeof(addr)))
+ return;
+ if (addr < bb_func_start || addr >= bb_func_end)
+ return;
+ bb_transfer(bb_curr_addr, addr, 0);
+ table += KDB_WORD_SIZE;
+ }
+}
+
+/* The current instruction has been decoded and all the information is in
+ * bb_decode. Based on the opcode, track any operand usage that we care about.
+ */
+
+static void
+bb_usage(void)
+{
+ enum bb_operand_usage usage = bb_decode.match->usage;
+ struct bb_operand *src = &bb_decode.src;
+ struct bb_operand *dst = &bb_decode.dst;
+ struct bb_operand *dst2 = &bb_decode.dst2;
+ int opcode_suffix, operand_length;
+
+ /* First handle all the special usage cases, and map them to a generic
+ * case after catering for the side effects.
+ */
+
+ if (usage == BBOU_IMUL &&
+ src->present && !dst->present && !dst2->present) {
+ /* single operand imul, same effects as mul */
+ usage = BBOU_MUL;
+ }
+
+ /* AT&T syntax uses movs<l1><l2> for move with sign extension, instead
+ * of the Intel movsx. The AT&T syntax causes problems for the opcode
+ * mapping; movs with sign extension needs to be treated as a generic
+ * read src, write dst, but instead it falls under the movs I/O
+ * instruction. Fix it.
+ */
+ if (usage == BBOU_MOVS && strlen(bb_decode.opcode) > 5)
+ usage = BBOU_RSWD;
+
+ /* This switch statement deliberately does not use 'default' at the top
+ * level. That way the compiler will complain if a new BBOU_ enum is
+ * added above and not explicitly handled here.
+ */
+ switch (usage) {
+ case BBOU_UNKNOWN: /* drop through */
+ case BBOU_RS: /* drop through */
+ case BBOU_RD: /* drop through */
+ case BBOU_RSRD: /* drop through */
+ case BBOU_WS: /* drop through */
+ case BBOU_RSWS: /* drop through */
+ case BBOU_RDWS: /* drop through */
+ case BBOU_RSRDWS: /* drop through */
+ case BBOU_WD: /* drop through */
+ case BBOU_RSWD: /* drop through */
+ case BBOU_RDWD: /* drop through */
+ case BBOU_RSRDWD: /* drop through */
+ case BBOU_WSWD: /* drop through */
+ case BBOU_RSWSWD: /* drop through */
+ case BBOU_RDWSWD: /* drop through */
+ case BBOU_RSRDWSWD:
+ break; /* ignore generic usage for now */
+ case BBOU_ADD:
+ /* Special case for add instructions that adjust registers
+ * which are mapping the stack.
+ */
+ if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ bb_adjust_osp_instruction(1);
+ usage = BBOU_RS;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_AND:
+ /* Special case when trying to round the stack pointer
+ * to achieve byte alignment
+ */
+ if (dst->reg && dst->base_rc == BBRG_RSP &&
+ src->immediate && strncmp(bb_func_name, "efi_call", 8) == 0) {
+ usage = BBOU_NOP;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_CALL:
+ bb_reg_state_print(bb_reg_state);
+ usage = BBOU_NOP;
+ if (bb_is_static_disp(src)) {
+ /* save_args is special. It saves
+ * a partial pt_regs onto the stack and switches
+ * to the interrupt stack.
+ */
+ if (src->disp == bb_save_args) {
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x48);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x40);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x38);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x30);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x28);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R8, 0x20);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R9, 0x18);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x10);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x08);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0);
+ /* This is actually on the interrupt stack,
+ * but we fudge it so the unwind works.
+ */
+ bb_memory_set_reg_value(BBRG_RSP, -0x8, BBRG_RBP, 0);
+ bb_reg_set_reg(BBRG_RBP, BBRG_RSP);
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ }
+ /* save_rest juggles the stack frame to append the
+ * rest of the pt_regs onto a stack where SAVE_ARGS
+ * or save_args has already been done.
+ */
+ else if (src->disp == bb_save_rest) {
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x30);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x28);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x20);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x18);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x10);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0x08);
+ }
+ /* error_entry and save_paranoid save a full pt_regs.
+ * Break out so the scratch registers aren't invalidated.
+ */
+ else if (src->disp == bb_error_entry || src->disp == bb_save_paranoid) {
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x70);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x68);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x60);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x58);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x50);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R8, 0x48);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R9, 0x40);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x38);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x30);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x28);
+ bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x20);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x18);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x10);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x08);
+ bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0);
+ break;
+ }
+ }
+ /* Invalidate the scratch registers */
+ bb_invalidate_scratch_reg();
+
+ /* These special cases need scratch registers invalidated first */
+ if (bb_is_static_disp(src)) {
+ /* Function sync_regs and save_v86_state are special.
+ * Their return value is the new stack pointer
+ */
+ if (src->disp == bb_sync_regs) {
+ bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
+ } else if (src->disp == bb_save_v86_state) {
+ bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
+ bb_adjust_osp(BBRG_RAX, +KDB_WORD_SIZE);
+ }
+ }
+ break;
+ case BBOU_CBW:
+ /* Convert word in RAX. Read RAX, write RAX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_CMOV:
+ /* cmove %gs:0x<nn>,%rsp is used to conditionally switch to
+ * another stack. Ignore this special case, it is handled by
+ * the stack unwinding code.
+ */
+ if (src->segment &&
+ strcmp(src->segment, "%gs") == 0 &&
+ dst->reg &&
+ dst->base_rc == BBRG_RSP)
+ usage = BBOU_NOP;
+ else
+ usage = BBOU_RSWD;
+ break;
+ case BBOU_CMPXCHG:
+ /* Read RAX, write RAX plus src read, dst write */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_RSWD;
+ break;
+ case BBOU_CMPXCHGD:
+ /* Read RAX, RBX, RCX, RDX, write RAX, RDX plus src read/write */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RBX);
+ bb_reg_read(BBRG_RCX);
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_RSWS;
+ break;
+ case BBOU_CPUID:
+ /* Read RAX, write RAX, RBX, RCX, RDX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RBX);
+ bb_reg_set_undef(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_CWD:
+ /* Convert word in RAX, RDX. Read RAX, write RDX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_DIV: /* drop through */
+ case BBOU_IDIV:
+ /* The 8 bit variants only affect RAX, the 16, 32 and 64 bit
+ * variants affect RDX as well.
+ */
+ switch (usage) {
+ case BBOU_DIV:
+ opcode_suffix = bb_decode.opcode[3];
+ break;
+ case BBOU_IDIV:
+ opcode_suffix = bb_decode.opcode[4];
+ break;
+ default:
+ opcode_suffix = 'q';
+ break;
+ }
+ operand_length = bb_operand_length(src, opcode_suffix);
+ bb_reg_read(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RAX);
+ if (operand_length != 8) {
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RDX);
+ }
+ usage = BBOU_RS;
+ break;
+ case BBOU_IMUL:
+ /* Only the two and three operand forms get here. The one
+ * operand form is treated as mul.
+ */
+ if (dst2->present) {
+ /* The three operand form is a special case, read the first two
+ * operands, write the third.
+ */
+ bb_read_operand(src);
+ bb_read_operand(dst);
+ bb_write_operand(dst2);
+ usage = BBOU_NOP;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_IRET:
+ bb_sanity_check(0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_JMP:
+ if (bb_is_static_disp(src))
+ bb_transfer(bb_curr_addr, src->disp, 0);
+ else if (src->indirect &&
+ src->disp &&
+ src->base == NULL &&
+ src->index &&
+ src->scale == KDB_WORD_SIZE)
+ bb_pass2_computed_jmp(src);
+ usage = BBOU_RS;
+ break;
+ case BBOU_LAHF:
+ /* Write RAX */
+ bb_reg_set_undef(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LEA:
+ /* dst = src + disp. Often used to calculate offsets into the
+ * stack, so check if it uses a stack pointer.
+ */
+ usage = BBOU_RSWD;
+ if (bb_is_simple_memory(src)) {
+ if (bb_is_osp_defined(src->base_rc)) {
+ bb_reg_set_reg(dst->base_rc, src->base_rc);
+ bb_adjust_osp_instruction(1);
+ usage = BBOU_RS;
+ } else if (src->disp == 0 &&
+ src->base_rc == dst->base_rc) {
+ /* lea 0(%reg),%reg is generated by i386
+ * GENERIC_NOP7.
+ */
+ usage = BBOU_NOP;
+ } else if (src->disp == 4096 &&
+ (src->base_rc == BBRG_R8 ||
+ src->base_rc == BBRG_RDI) &&
+ strcmp(bb_func_name, "relocate_kernel") == 0) {
+ /* relocate_kernel: setup a new stack at the
+ * end of the physical control page, using
+ * (x86_64) lea 4096(%r8),%rsp or (i386) lea
+ * 4096(%edi),%esp
+ */
+ usage = BBOU_NOP;
+ }
+ }
+ break;
+ case BBOU_LEAVE:
+ /* RSP = RBP; RBP = *(RSP); RSP += KDB_WORD_SIZE; */
+ bb_reg_set_reg(BBRG_RSP, BBRG_RBP);
+ if (bb_is_osp_defined(BBRG_RSP))
+ bb_reg_set_memory(BBRG_RBP, BBRG_RSP, 0);
+ else
+ bb_reg_set_undef(BBRG_RBP);
+ if (bb_is_osp_defined(BBRG_RSP))
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ /* common_interrupt uses leave in a non-standard manner */
+ if (strcmp(bb_func_name, "common_interrupt") != 0)
+ bb_sanity_check(0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LODS:
+ /* Read RSI, write RAX, RSI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RSI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LOOP:
+ /* Read and write RCX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RCX);
+ if (bb_is_static_disp(src))
+ bb_transfer(bb_curr_addr, src->disp, 0);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_LSS:
+ /* lss offset(%esp),%esp leaves esp well defined */
+ if (dst->reg &&
+ dst->base_rc == BBRG_RSP &&
+ bb_is_simple_memory(src) &&
+ src->base_rc == BBRG_RSP) {
+ bb_adjust_osp(BBRG_RSP, 2*KDB_WORD_SIZE + src->disp);
+ usage = BBOU_NOP;
+ } else {
+ usage = BBOU_RSWD;
+ }
+ break;
+ case BBOU_MONITOR:
+ /* Read RAX, RCX, RDX */
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MOV:
+ usage = bb_usage_mov(src, dst, sizeof("mov")-1);
+ break;
+ case BBOU_MOVS:
+ /* Read RSI, RDI, write RSI, RDI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_read(BBRG_RDI);
+ bb_reg_set_undef(BBRG_RSI);
+ bb_reg_set_undef(BBRG_RDI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MUL:
+ /* imul (one operand form only) or mul. Read RAX. If the
+ * operand length is not 8 then write RDX.
+ */
+ if (bb_decode.opcode[0] == 'i')
+ opcode_suffix = bb_decode.opcode[4];
+ else
+ opcode_suffix = bb_decode.opcode[3];
+ operand_length = bb_operand_length(src, opcode_suffix);
+ bb_reg_read(BBRG_RAX);
+ if (operand_length != 8)
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_MWAIT:
+ /* Read RAX, RCX */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RCX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_NOP:
+ break;
+ case BBOU_OUTS:
+ /* Read RSI, RDX, write RSI */
+ bb_reg_read(BBRG_RSI);
+ bb_reg_read(BBRG_RDX);
+ bb_reg_set_undef(BBRG_RSI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_POP:
+ /* Complicated by the fact that you can pop from top of stack
+ * to a stack location, for this case the destination location
+ * is calculated after adjusting RSP. Analysis of the kernel
+ * code shows that gcc only uses this strange format to get the
+ * flags into a local variable, e.g. pushf; popl 0x10(%esp); so
+ * I am going to ignore this special case.
+ */
+ usage = BBOU_WS;
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("pop when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ if (src->reg) {
+ bb_reg_set_memory(src->base_rc, BBRG_RSP, 0);
+ usage = BBOU_NOP;
+ }
+ /* pop %rsp does not adjust rsp */
+ if (!src->reg ||
+ src->base_rc != BBRG_RSP)
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ }
+ break;
+ case BBOU_POPF:
+ /* Do not care about flags, just adjust RSP */
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("popf when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ }
+ usage = BBOU_WS;
+ break;
+ case BBOU_PUSH:
+ /* Complicated by the fact that you can push from a stack
+ * location to top of stack, the source location is calculated
+ * before adjusting RSP. Analysis of the kernel code shows
+ * that gcc only uses this strange format to restore the flags
+ * from a local variable, e.g. pushl 0x10(%esp); popf; so I am
+ * going to ignore this special case.
+ */
+ usage = BBOU_RS;
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("push when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ if (src->reg &&
+ bb_reg_code_offset(BBRG_RSP) <= 0)
+ bb_memory_set_reg(BBRG_RSP, src->base_rc, 0);
+ }
+ break;
+ case BBOU_PUSHF:
+ /* Do not care about flags, just adjust RSP */
+ if (!bb_is_osp_defined(BBRG_RSP)) {
+ if (!bb_is_scheduler_address()) {
+ kdb_printf("pushf when BBRG_RSP is undefined?\n");
+ bb_giveup = 1;
+ }
+ } else {
+ bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ }
+ usage = BBOU_WS;
+ break;
+ case BBOU_RDMSR:
+ /* Read RCX, write RAX, RDX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_RDTSC:
+ /* Write RAX, RDX */
+ bb_reg_set_undef(BBRG_RAX);
+ bb_reg_set_undef(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_RET:
+ usage = BBOU_NOP;
+ if (src->immediate && bb_is_osp_defined(BBRG_RSP)) {
+ bb_adjust_osp(BBRG_RSP, src->disp);
+ }
+ /* Functions that restore state which was saved by another
+ * function or build new kernel stacks. We cannot verify what
+ * is being restored so skip the sanity check.
+ */
+ if (strcmp(bb_func_name, "restore_image") == 0 ||
+ strcmp(bb_func_name, "relocate_kernel") == 0 ||
+ strcmp(bb_func_name, "identity_mapped") == 0 ||
+ strcmp(bb_func_name, "xen_iret_crit_fixup") == 0 ||
+ strcmp(bb_func_name, "math_abort") == 0 ||
+ strcmp(bb_func_name, "save_args") == 0 ||
+ strcmp(bb_func_name, "kretprobe_trampoline_holder") == 0)
+ break;
+ bb_sanity_check(0);
+ break;
+ case BBOU_SAHF:
+ /* Read RAX */
+ bb_reg_read(BBRG_RAX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SCAS:
+ /* Read RAX, RDI, write RDI */
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RDI);
+ bb_reg_set_undef(BBRG_RDI);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SUB:
+ /* Special case for sub instructions that adjust registers
+ * which are mapping the stack.
+ */
+ if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ bb_adjust_osp_instruction(-1);
+ usage = BBOU_RS;
+ } else {
+ usage = BBOU_RSRDWD;
+ }
+ break;
+ case BBOU_SYSEXIT:
+ bb_sanity_check(1);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_SYSRET:
+ bb_sanity_check(1);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_WRMSR:
+ /* Read RCX, RAX, RDX */
+ bb_reg_read(BBRG_RCX);
+ bb_reg_read(BBRG_RAX);
+ bb_reg_read(BBRG_RDX);
+ usage = BBOU_NOP;
+ break;
+ case BBOU_XADD:
+ usage = bb_usage_xadd(src, dst);
+ break;
+ case BBOU_XCHG:
+ /* i386 do_IRQ with 4K stacks does xchg %ebx,%esp; call
+ * irq_handler; mov %ebx,%esp; to switch stacks. Ignore this
+ * stack switch when tracking registers, it is handled by
+ * higher level backtrace code. Convert xchg %ebx,%esp to mov
+ * %esp,%ebx so the later mov %ebx,%esp becomes a NOP and the
+ * stack remains defined so we can backtrace through do_IRQ's
+ * stack switch.
+ *
+ * Ditto for do_softirq.
+ */
+ if (src->reg &&
+ dst->reg &&
+ src->base_rc == BBRG_RBX &&
+ dst->base_rc == BBRG_RSP &&
+ (strcmp(bb_func_name, "do_IRQ") == 0 ||
+ strcmp(bb_func_name, "do_softirq") == 0)) {
+ strcpy(bb_decode.opcode, "mov");
+ usage = bb_usage_mov(dst, src, sizeof("mov")-1);
+ } else {
+ usage = bb_usage_xchg(src, dst);
+ }
+ break;
+ case BBOU_XOR:
+ /* xor %reg,%reg only counts as a register write, the original
+ * contents of reg are irrelevant.
+ */
+ if (src->reg && dst->reg && src->base_rc == dst->base_rc)
+ usage = BBOU_WS;
+ else
+ usage = BBOU_RSRDWD;
+ break;
+ }
+
+ /* The switch statement above handled all the special cases. Every
+ * opcode should now have a usage of NOP or one of the generic cases.
+ */
+ if (usage == BBOU_UNKNOWN || usage == BBOU_NOP) {
+ /* nothing to do */
+ } else if (usage >= BBOU_RS && usage <= BBOU_RSRDWSWD) {
+ if (usage & BBOU_RS)
+ bb_read_operand(src);
+ if (usage & BBOU_RD)
+ bb_read_operand(dst);
+ if (usage & BBOU_WS)
+ bb_write_operand(src);
+ if (usage & BBOU_WD)
+ bb_write_operand(dst);
+ } else {
+ kdb_printf("%s: opcode not fully handled\n", __FUNCTION__);
+ if (!KDB_DEBUG(BB)) {
+ bb_print_opcode();
+ if (bb_decode.src.present)
+ bb_print_operand("src", &bb_decode.src);
+ if (bb_decode.dst.present)
+ bb_print_operand("dst", &bb_decode.dst);
+ if (bb_decode.dst2.present)
+ bb_print_operand("dst2", &bb_decode.dst2);
+ }
+ bb_giveup = 1;
+ }
+}
+
+static void
+bb_parse_buffer(void)
+{
+ char *p, *src, *dst = NULL, *dst2 = NULL;
+ int paren = 0;
+ p = bb_buffer;
+ memset(&bb_decode, 0, sizeof(bb_decode));
+ KDB_DEBUG_BB(" '%s'\n", p);
+ p += strcspn(p, ":"); /* skip address and function name+offset: */
+ if (*p++ != ':') {
+ kdb_printf("%s: cannot find ':' in buffer '%s'\n",
+ __FUNCTION__, bb_buffer);
+ bb_giveup = 1;
+ return;
+ }
+ p += strspn(p, " \t"); /* step to opcode */
+ if (strncmp(p, "(bad)", 5) == 0)
+ strcpy(p, "nop");
+ /* separate any opcode prefix */
+ if (strncmp(p, "lock", 4) == 0 ||
+ strncmp(p, "rep", 3) == 0 ||
+ strncmp(p, "rex", 3) == 0 ||
+ strncmp(p, "addr", 4) == 0) {
+ bb_decode.prefix = p;
+ p += strcspn(p, " \t");
+ *p++ = '\0';
+ p += strspn(p, " \t");
+ }
+ bb_decode.opcode = p;
+ strsep(&p, " \t"); /* step to end of opcode */
+ if (bb_parse_opcode())
+ return;
+ if (!p)
+ goto no_operands;
+ p += strspn(p, " \t"); /* step to operand(s) */
+ if (!*p)
+ goto no_operands;
+ src = p;
+ p = strsep(&p, " \t"); /* strip comments after operands */
+ /* split 'src','dst' but ignore ',' inside '(' ')' */
+ while (*p) {
+ if (*p == '(') {
+ ++paren;
+ } else if (*p == ')') {
+ --paren;
+ } else if (*p == ',' && paren == 0) {
+ *p = '\0';
+ if (dst)
+ dst2 = p+1;
+ else
+ dst = p+1;
+ }
+ ++p;
+ }
+ bb_parse_operand(src, &bb_decode.src);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("src", &bb_decode.src);
+ if (dst && !bb_giveup) {
+ bb_parse_operand(dst, &bb_decode.dst);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("dst", &bb_decode.dst);
+ }
+ if (dst2 && !bb_giveup) {
+ bb_parse_operand(dst2, &bb_decode.dst2);
+ if (KDB_DEBUG(BB))
+ bb_print_operand("dst2", &bb_decode.dst2);
+ }
+no_operands:
+ if (!bb_giveup)
+ bb_usage();
+}
+
+static int
+bb_dis_pass2(PTR file, const char *fmt, ...)
+{
+ char *p;
+ int l = strlen(bb_buffer);
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ va_end(ap);
+ if ((p = strchr(bb_buffer, '\n'))) {
+ *p = '\0';
+ p = bb_buffer;
+ p += strcspn(p, ":");
+ if (*p++ == ':')
+ bb_fixup_switch_to(p);
+ bb_parse_buffer();
+ bb_buffer[0] = '\0';
+ }
+ return 0;
+}
+
+static void
+bb_printaddr_pass2(bfd_vma addr, disassemble_info *dip)
+{
+ kdb_symtab_t symtab;
+ unsigned int offset;
+ dip->fprintf_func(dip->stream, "0x%lx", addr);
+ kdbnearsym(addr, &symtab);
+ if (symtab.sym_name) {
+ dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ if ((offset = addr - symtab.sym_start))
+ dip->fprintf_func(dip->stream, "+0x%x", offset);
+ dip->fprintf_func(dip->stream, ">");
+ }
+}
+
+/* Set the starting register and memory state for the current bb */
+
+static void
+bb_start_block0_special(void)
+{
+ int i;
+ short offset_address;
+ enum bb_reg_code reg, value;
+ struct bb_name_state *r;
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (bb_func_start == r->address && r->fname == NULL)
+ goto match;
+ }
+ return;
+match:
+ /* Set the running registers */
+ for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ value = r->regs[reg].value;
+ if (test_bit(value, r->skip_regs.bits)) {
+ /* this regs entry is not defined for this label */
+ continue;
+ }
+ bb_reg_code_set_value(reg, value);
+ bb_reg_code_set_offset(reg, r->regs[reg].offset);
+ }
+ /* Set any memory contents, e.g. pt_regs. Adjust RSP as required. */
+ offset_address = 0;
+ for (i = 0; i < r->mem_size; ++i) {
+ offset_address = max_t(int,
+ r->mem[i].offset_address + KDB_WORD_SIZE,
+ offset_address);
+ }
+ if (bb_reg_code_offset(BBRG_RSP) > -offset_address)
+ bb_adjust_osp(BBRG_RSP, -offset_address - bb_reg_code_offset(BBRG_RSP));
+ for (i = 0; i < r->mem_size; ++i) {
+ value = r->mem[i].value;
+ if (test_bit(value, r->skip_mem.bits)) {
+ /* this memory entry is not defined for this label */
+ continue;
+ }
+ bb_memory_set_reg_value(BBRG_RSP, r->mem[i].offset_address,
+ value, 0);
+ bb_reg_set_undef(value);
+ }
+ return;
+}
+
+static void
+bb_pass2_start_block(int number)
+{
+ int i, j, k, first, changed;
+ size_t size;
+ struct bb_jmp *bb_jmp;
+ struct bb_reg_state *state;
+ struct bb_memory_contains *c1, *c2;
+ bb_reg_state->mem_count = bb_reg_state_max;
+ size = bb_reg_state_size(bb_reg_state);
+ memset(bb_reg_state, 0, size);
+
+ if (number == 0) {
+ /* The first block is assumed to have well defined inputs */
+ bb_start_block0();
+ /* Some assembler labels have non-standard entry
+ * states.
+ */
+ bb_start_block0_special();
+ bb_reg_state_print(bb_reg_state);
+ return;
+ }
+
+ /* Merge all the input states for the current bb together */
+ first = 1;
+ changed = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ bb_jmp = bb_jmp_list + i;
+ if (bb_jmp->to != bb_curr->start)
+ continue;
+ state = bb_jmp->state;
+ if (!state)
+ continue;
+ if (first) {
+ size = bb_reg_state_size(state);
+ memcpy(bb_reg_state, state, size);
+ KDB_DEBUG_BB(" first state %p\n", state);
+ bb_reg_state_print(bb_reg_state);
+ first = 0;
+ continue;
+ }
+
+ KDB_DEBUG_BB(" merging state %p\n", state);
+ /* Merge the register states */
+ for (j = 0; j < ARRAY_SIZE(state->contains); ++j) {
+ if (memcmp(bb_reg_state->contains + j,
+ state->contains + j,
+ sizeof(bb_reg_state->contains[0]))) {
+ /* Different states for this register from two
+ * or more inputs, make it undefined.
+ */
+ if (bb_reg_state->contains[j].value ==
+ BBRG_UNDEFINED) {
+ KDB_DEBUG_BB(" ignoring %s\n",
+ bbrg_name[j + BBRG_RAX]);
+ } else {
+ bb_reg_set_undef(BBRG_RAX + j);
+ changed = 1;
+ }
+ }
+ }
+
+ /* Merge the memory states. This relies on both
+ * bb_reg_state->memory and state->memory being sorted in
+ * descending order, with undefined entries at the end.
+ */
+ c1 = bb_reg_state->memory;
+ c2 = state->memory;
+ j = k = 0;
+ while (j < bb_reg_state->mem_count &&
+ k < state->mem_count) {
+ if (c1->offset_address < c2->offset_address) {
+ KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ " ignoring c2->offset_address ",
+ "\n");
+ ++c2;
+ ++k;
+ continue;
+ }
+ if (c1->offset_address > c2->offset_address) {
+ /* Memory location is not in all input states,
+ * delete the memory location.
+ */
+ bb_delete_memory(c1->offset_address);
+ changed = 1;
+ ++c1;
+ ++j;
+ continue;
+ }
+ if (memcmp(c1, c2, sizeof(*c1))) {
+ /* Same location, different contents, delete
+ * the memory location.
+ */
+ bb_delete_memory(c1->offset_address);
+ KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ " ignoring c2->offset_address ",
+ "\n");
+ changed = 1;
+ }
+ ++c1;
+ ++c2;
+ ++j;
+ ++k;
+ }
+ while (j < bb_reg_state->mem_count) {
+ bb_delete_memory(c1->offset_address);
+ changed = 1;
+ ++c1;
+ ++j;
+ }
+ }
+ if (changed) {
+ KDB_DEBUG_BB(" final state\n");
+ bb_reg_state_print(bb_reg_state);
+ }
+}
+
+/* We have reached the exit point from the current function, either a call to
+ * the next function or the instruction that was about to executed when an
+ * interrupt occurred. Save the current register state in bb_exit_state.
+ */
+
+static void
+bb_save_exit_state(void)
+{
+ size_t size;
+ debug_kfree(bb_exit_state);
+ bb_exit_state = NULL;
+ bb_reg_state_canonicalize();
+ size = bb_reg_state_size(bb_reg_state);
+ bb_exit_state = debug_kmalloc(size, GFP_ATOMIC);
+ if (!bb_exit_state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(bb_exit_state, bb_reg_state, size);
+}
+
+static int
+bb_pass2_do_changed_blocks(int allow_missing)
+{
+ int i, j, missing, changed, maxloops;
+ unsigned long addr;
+ struct bb_jmp *bb_jmp;
+ KDB_DEBUG_BB("\n %s: allow_missing %d\n", __FUNCTION__, allow_missing);
+ /* Absolute worst case is we have to iterate over all the basic blocks
+ * in an "out of order" state, each iteration losing one register or
+ * memory state. Any more loops than that is a bug. "out of order"
+ * means that the layout of blocks in memory does not match the logic
+ * flow through those blocks so (for example) block 27 comes before
+ * block 2. To allow for out of order blocks, multiply maxloops by the
+ * number of blocks.
+ */
+ maxloops = (KDB_INT_REGISTERS + bb_reg_state_max) * bb_count;
+ changed = 1;
+ do {
+ changed = 0;
+ for (i = 0; i < bb_count; ++i) {
+ bb_curr = bb_list[i];
+ if (!bb_curr->changed)
+ continue;
+ missing = 0;
+ for (j = 0, bb_jmp = bb_jmp_list;
+ j < bb_jmp_count;
+ ++j, ++bb_jmp) {
+ if (bb_jmp->to == bb_curr->start &&
+ !bb_jmp->state)
+ ++missing;
+ }
+ if (missing > allow_missing)
+ continue;
+ bb_curr->changed = 0;
+ changed = 1;
+ KDB_DEBUG_BB("\n bb[%d]\n", i);
+ bb_pass2_start_block(i);
+ for (addr = bb_curr->start;
+ addr <= bb_curr->end; ) {
+ bb_curr_addr = addr;
+ if (addr == bb_exit_addr)
+ bb_save_exit_state();
+ addr += kdba_id_printinsn(addr, &kdb_di);
+ kdb_di.fprintf_func(NULL, "\n");
+ if (bb_giveup)
+ goto done;
+ }
+ if (!bb_exit_state) {
+ /* ATTRIB_NORET functions are a problem with
+ * the current gcc. Allow the trailing address
+ * a bit of leaway.
+ */
+ if (addr == bb_exit_addr ||
+ addr == bb_exit_addr + 1)
+ bb_save_exit_state();
+ }
+ if (bb_curr->drop_through)
+ bb_transfer(bb_curr->end,
+ bb_list[i+1]->start, 1);
+ }
+ if (maxloops-- == 0) {
+ kdb_printf("\n\n%s maxloops reached\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ goto done;
+ }
+ } while(changed);
+done:
+ for (i = 0; i < bb_count; ++i) {
+ bb_curr = bb_list[i];
+ if (bb_curr->changed)
+ return 1; /* more to do, increase allow_missing */
+ }
+ return 0; /* all blocks done */
+}
+
+/* Assume that the current function is a pass through function that does not
+ * refer to its register parameters. Exclude known asmlinkage functions and
+ * assume the other functions actually use their registers.
+ */
+
+static void
+bb_assume_pass_through(void)
+{
+ static int first_time = 1;
+ if (strncmp(bb_func_name, "sys_", 4) == 0 ||
+ strncmp(bb_func_name, "compat_sys_", 11) == 0 ||
+ strcmp(bb_func_name, "schedule") == 0 ||
+ strcmp(bb_func_name, "do_softirq") == 0 ||
+ strcmp(bb_func_name, "printk") == 0 ||
+ strcmp(bb_func_name, "vprintk") == 0 ||
+ strcmp(bb_func_name, "preempt_schedule") == 0 ||
+ strcmp(bb_func_name, "start_kernel") == 0 ||
+ strcmp(bb_func_name, "csum_partial") == 0 ||
+ strcmp(bb_func_name, "csum_partial_copy_generic") == 0 ||
+ strcmp(bb_func_name, "math_state_restore") == 0 ||
+ strcmp(bb_func_name, "panic") == 0 ||
+ strcmp(bb_func_name, "kdb_printf") == 0 ||
+ strcmp(bb_func_name, "kdb_interrupt") == 0)
+ return;
+ if (bb_asmlinkage_arch())
+ return;
+ bb_reg_params = REGPARM;
+ if (first_time) {
+ kdb_printf(" %s has memory parameters but no register "
+ "parameters.\n Assuming it is a 'pass "
+ "through' function that does not refer to "
+ "its register\n parameters and setting %d "
+ "register parameters\n",
+ bb_func_name, REGPARM);
+ first_time = 0;
+ return;
+ }
+ kdb_printf(" Assuming %s is 'pass through' with %d register "
+ "parameters\n",
+ bb_func_name, REGPARM);
+}
+
+static void
+bb_pass2(void)
+{
+ int allow_missing;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: start\n", __FUNCTION__);
+
+ kdb_di.fprintf_func = bb_dis_pass2;
+ kdb_di.print_address_func = bb_printaddr_pass2;
+
+ bb_reg_state = debug_kmalloc(sizeof(*bb_reg_state), GFP_ATOMIC);
+ if (!bb_reg_state) {
+ kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ bb_list[0]->changed = 1;
+
+ /* If a block does not have all its input states available then it is
+ * possible for a register to initially appear to hold a known value,
+ * but when other inputs are available then it becomes a variable
+ * value. The initial false state of "known" can generate false values
+ * for other registers and can even make it look like stack locations
+ * are being changed.
+ *
+ * To avoid these false positives, only process blocks which have all
+ * their inputs defined. That gives a clean depth first traversal of
+ * the tree, except for loops. If there are any loops, then start
+ * processing blocks with one missing input, then two missing inputs
+ * etc.
+ *
+ * Absolute worst case is we have to iterate over all the jmp entries,
+ * each iteration allowing one more missing input. Any more loops than
+ * that is a bug. Watch out for the corner case of 0 jmp entries.
+ */
+ for (allow_missing = 0; allow_missing <= bb_jmp_count; ++allow_missing) {
+ if (!bb_pass2_do_changed_blocks(allow_missing))
+ break;
+ if (bb_giveup)
+ break;
+ }
+ if (allow_missing > bb_jmp_count) {
+ kdb_printf("\n\n%s maxloops reached\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+
+ if (bb_memory_params && bb_reg_params)
+ bb_reg_params = REGPARM;
+ if (REGPARM &&
+ bb_memory_params &&
+ !bb_reg_params)
+ bb_assume_pass_through();
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: end bb_reg_params %d bb_memory_params %d\n",
+ __FUNCTION__, bb_reg_params, bb_memory_params);
+ if (bb_exit_state) {
+ kdb_printf("%s: bb_exit_state at " kdb_bfd_vma_fmt0 "\n",
+ __FUNCTION__, bb_exit_addr);
+ bb_do_reg_state_print(bb_exit_state);
+ }
+ }
+}
+
+static void
+bb_cleanup(void)
+{
+ int i;
+ struct bb* bb;
+ struct bb_reg_state *state;
+ while (bb_count) {
+ bb = bb_list[0];
+ bb_delete(0);
+ }
+ debug_kfree(bb_list);
+ bb_list = NULL;
+ bb_count = bb_max = 0;
+ for (i = 0; i < bb_jmp_count; ++i) {
+ state = bb_jmp_list[i].state;
+ if (state && --state->ref_count == 0)
+ debug_kfree(state);
+ }
+ debug_kfree(bb_jmp_list);
+ bb_jmp_list = NULL;
+ bb_jmp_count = bb_jmp_max = 0;
+ debug_kfree(bb_reg_state);
+ bb_reg_state = NULL;
+ bb_reg_state_max = 0;
+ debug_kfree(bb_exit_state);
+ bb_exit_state = NULL;
+ bb_reg_params = bb_memory_params = 0;
+ bb_giveup = 0;
+}
+
+static int
+bb_spurious_global_label(const char *func_name)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ if (strcmp(bb_spurious[i], func_name) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* Given the current actual register contents plus the exit state deduced from
+ * a basic block analysis of the current function, rollback the actual register
+ * contents to the values they had on entry to this function.
+ */
+
+static void
+bb_actual_rollback(const struct kdb_activation_record *ar)
+{
+ int i, offset_address;
+ struct bb_memory_contains *c;
+ enum bb_reg_code reg;
+ unsigned long address, osp = 0;
+ struct bb_actual new[ARRAY_SIZE(bb_actual)];
+
+
+ if (!bb_exit_state) {
+ kdb_printf("%s: no bb_exit_state, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ memcpy(bb_reg_state, bb_exit_state, bb_reg_state_size(bb_exit_state));
+ memset(new, 0, sizeof(new));
+
+ /* The most important register for obtaining saved state is rsp so get
+ * its new value first. Prefer rsp if it is valid, then other
+ * registers. Saved values of rsp in memory are unusable without a
+ * register that points to memory.
+ */
+ if (!bb_actual_valid(BBRG_RSP)) {
+ kdb_printf("%s: no starting value for RSP, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: rsp " kdb_bfd_vma_fmt0,
+ __FUNCTION__, bb_actual_value(BBRG_RSP));
+ i = BBRG_RSP;
+ if (!bb_is_osp_defined(i)) {
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ if (bb_is_osp_defined(i) && bb_actual_valid(i))
+ break;
+ }
+ }
+ if (bb_is_osp_defined(i) && bb_actual_valid(i)) {
+ osp = new[BBRG_RSP - BBRG_RAX].value =
+ bb_actual_value(i) - bb_reg_code_offset(i);
+ new[BBRG_RSP - BBRG_RAX].valid = 1;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(" -> osp " kdb_bfd_vma_fmt0 "\n", osp);
+ } else {
+ bb_actual_set_valid(BBRG_RSP, 0);
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(" -> undefined\n");
+ kdb_printf("%s: no ending value for RSP, cannot rollback\n",
+ __FUNCTION__);
+ bb_giveup = 1;
+ return;
+ }
+
+ /* Now the other registers. First look at register values that have
+ * been copied to other registers.
+ */
+ for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ reg = bb_reg_code_value(i);
+ if (bb_is_int_reg(reg)) {
+ new[reg - BBRG_RAX] = bb_actual[i - BBRG_RAX];
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: %s is in %s ",
+ __FUNCTION__,
+ bbrg_name[reg],
+ bbrg_name[i]);
+ if (bb_actual_valid(i))
+ kdb_printf(" -> " kdb_bfd_vma_fmt0 "\n",
+ bb_actual_value(i));
+ else
+ kdb_printf("(invalid)\n");
+ }
+ }
+ }
+
+ /* Finally register values that have been saved on stack */
+ for (i = 0, c = bb_reg_state->memory;
+ i < bb_reg_state->mem_count;
+ ++i, ++c) {
+ offset_address = c->offset_address;
+ reg = c->value;
+ if (!bb_is_int_reg(reg))
+ continue;
+ address = osp + offset_address;
+ if (address < ar->stack.logical_start ||
+ address >= ar->stack.logical_end) {
+ new[reg - BBRG_RAX].value = 0;
+ new[reg - BBRG_RAX].valid = 0;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf("%s: %s -> undefined\n",
+ __FUNCTION__,
+ bbrg_name[reg]);
+ } else {
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ kdb_printf("%s: %s -> *(osp",
+ __FUNCTION__,
+ bbrg_name[reg]);
+ KDB_DEBUG_BB_OFFSET_PRINTF(offset_address, "", " ");
+ kdb_printf(kdb_bfd_vma_fmt0, address);
+ }
+ new[reg - BBRG_RAX].value = *(bfd_vma *)address;
+ new[reg - BBRG_RAX].valid = 1;
+ if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ kdb_printf(") = " kdb_bfd_vma_fmt0 "\n",
+ new[reg - BBRG_RAX].value);
+ }
+ }
+
+ memcpy(bb_actual, new, sizeof(bb_actual));
+}
+
+/* Return true if the current function is an interrupt handler */
+
+static bool
+bb_interrupt_handler(kdb_machreg_t rip)
+{
+ unsigned long disp8, disp32, target, addr = (unsigned long)rip;
+ unsigned char code[5];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bb_hardware_handlers); ++i)
+ if (strcmp(bb_func_name, bb_hardware_handlers[i]) == 0)
+ return 1;
+
+ /* Given the large number of interrupt handlers, it is easiest to look
+ * at the next instruction and see if it is a jmp to the common exit
+ * routines.
+ */
+ if (kdb_getarea(code, addr) ||
+ kdb_getword(&disp32, addr+1, 4) ||
+ kdb_getword(&disp8, addr+1, 1))
+ return 0; /* not a valid code address */
+ if (code[0] == 0xe9) {
+ target = addr + (s32) disp32 + 5; /* jmp disp32 */
+ if (target == bb_ret_from_intr ||
+ target == bb_common_interrupt ||
+ target == bb_error_entry)
+ return 1;
+ }
+ if (code[0] == 0xeb) {
+ target = addr + (s8) disp8 + 2; /* jmp disp8 */
+ if (target == bb_ret_from_intr ||
+ target == bb_common_interrupt ||
+ target == bb_error_entry)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Copy argument information that was deduced by the basic block analysis and
+ * rollback into the kdb stack activation record.
+ */
+
+static void
+bb_arguments(struct kdb_activation_record *ar)
+{
+ int i;
+ enum bb_reg_code reg;
+ kdb_machreg_t rsp;
+ ar->args = bb_reg_params + bb_memory_params;
+ bitmap_zero(ar->valid.bits, KDBA_MAXARGS);
+ for (i = 0; i < bb_reg_params; ++i) {
+ reg = bb_param_reg[i];
+ if (bb_actual_valid(reg)) {
+ ar->arg[i] = bb_actual_value(reg);
+ set_bit(i, ar->valid.bits);
+ }
+ }
+ if (!bb_actual_valid(BBRG_RSP))
+ return;
+ rsp = bb_actual_value(BBRG_RSP);
+ for (i = bb_reg_params; i < ar->args; ++i) {
+ rsp += KDB_WORD_SIZE;
+ if (kdb_getarea(ar->arg[i], rsp) == 0)
+ set_bit(i, ar->valid.bits);
+ }
+}
+
+/* Given an exit address from a function, decompose the entire function into
+ * basic blocks and determine the register state at the exit point.
+ */
+
+static void
+kdb_bb(unsigned long exit)
+{
+ kdb_symtab_t symtab;
+ if (!kdbnearsym(exit, &symtab)) {
+ kdb_printf("%s: address " kdb_bfd_vma_fmt0 " not recognised\n",
+ __FUNCTION__, exit);
+ bb_giveup = 1;
+ return;
+ }
+ bb_exit_addr = exit;
+ bb_mod_name = symtab.mod_name;
+ bb_func_name = symtab.sym_name;
+ bb_func_start = symtab.sym_start;
+ bb_func_end = symtab.sym_end;
+ /* Various global labels exist in the middle of assembler code and have
+ * a non-standard state. Ignore these labels and use the start of the
+ * previous label instead.
+ */
+ while (bb_spurious_global_label(symtab.sym_name)) {
+ if (!kdbnearsym(symtab.sym_start - 1, &symtab))
+ break;
+ bb_func_start = symtab.sym_start;
+ }
+ bb_mod_name = symtab.mod_name;
+ bb_func_name = symtab.sym_name;
+ bb_func_start = symtab.sym_start;
+ /* Ignore spurious labels past this point and use the next non-spurious
+ * label as the end point.
+ */
+ if (kdbnearsym(bb_func_end, &symtab)) {
+ while (bb_spurious_global_label(symtab.sym_name)) {
+ bb_func_end = symtab.sym_end;
+ if (!kdbnearsym(symtab.sym_end + 1, &symtab))
+ break;
+ }
+ }
+ bb_pass1();
+ if (!bb_giveup)
+ bb_pass2();
+ if (bb_giveup)
+ kdb_printf("%s: " kdb_bfd_vma_fmt0
+ " [%s]%s failed at " kdb_bfd_vma_fmt0 "\n\n",
+ __FUNCTION__, exit,
+ bb_mod_name, bb_func_name, bb_curr_addr);
+}
+
+static int
+kdb_bb1(int argc, const char **argv)
+{
+ int diag, nextarg = 1;
+ kdb_machreg_t addr;
+ unsigned long offset;
+
+ bb_cleanup(); /* in case previous command was interrupted */
+ kdba_id_init(&kdb_di);
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ if (!addr)
+ return KDB_BADADDR;
+ kdb_save_flags();
+ kdb_flags |= KDB_DEBUG_FLAG_BB << KDB_DEBUG_FLAG_SHIFT;
+ kdb_bb(addr);
+ bb_cleanup();
+ kdb_restore_flags();
+ kdbnearsym_cleanup();
+ return 0;
+}
+
+/* Run a basic block analysis on every function in the base kernel. Used as a
+ * global sanity check to find errors in the basic block code.
+ */
+
+static int
+kdb_bb_all(int argc, const char **argv)
+{
+ loff_t pos = 0;
+ const char *symname;
+ unsigned long addr;
+ int i, max_errors = 20;
+ struct bb_name_state *r;
+ kdb_printf("%s: build variables:"
+ " CCVERSION \"" __stringify(CCVERSION) "\""
+#ifdef CONFIG_X86_64
+ " CONFIG_X86_64"
+#endif
+#ifdef CONFIG_4KSTACKS
+ " CONFIG_4KSTACKS"
+#endif
+#ifdef CONFIG_PREEMPT
+ " CONFIG_PREEMPT"
+#endif
+#ifdef CONFIG_VM86
+ " CONFIG_VM86"
+#endif
+#ifdef CONFIG_FRAME_POINTER
+ " CONFIG_FRAME_POINTER"
+#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+ " CONFIG_TRACE_IRQFLAGS"
+#endif
+#ifdef CONFIG_HIBERNATION
+ " CONFIG_HIBERNATION"
+#endif
+#ifdef CONFIG_KPROBES
+ " CONFIG_KPROBES"
+#endif
+#ifdef CONFIG_KEXEC
+ " CONFIG_KEXEC"
+#endif
+#ifdef CONFIG_MATH_EMULATION
+ " CONFIG_MATH_EMULATION"
+#endif
+#ifdef CONFIG_PARAVIRT_XEN
+ " CONFIG_XEN"
+#endif
+#ifdef CONFIG_DEBUG_INFO
+ " CONFIG_DEBUG_INFO"
+#endif
+#ifdef NO_SIBLINGS
+ " NO_SIBLINGS"
+#endif
+ " REGPARM=" __stringify(REGPARM)
+ "\n\n", __FUNCTION__);
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ if (!r->address)
+ kdb_printf("%s: cannot find special_case name %s\n",
+ __FUNCTION__, r->name);
+ }
+ for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ if (!kallsyms_lookup_name(bb_spurious[i]))
+ kdb_printf("%s: cannot find spurious label %s\n",
+ __FUNCTION__, bb_spurious[i]);
+ }
+ while ((symname = kdb_walk_kallsyms(&pos))) {
+ if (strcmp(symname, "_stext") == 0 ||
+ strcmp(symname, "stext") == 0)
+ break;
+ }
+ if (!symname) {
+ kdb_printf("%s: cannot find _stext\n", __FUNCTION__);
+ return 0;
+ }
+ kdba_id_init(&kdb_di);
+ i = 0;
+ while ((symname = kdb_walk_kallsyms(&pos))) {
+ if (strcmp(symname, "_etext") == 0)
+ break;
+ if (i++ % 100 == 0)
+ kdb_printf(".");
+ /* x86_64 has some 16 bit functions that appear between stext
+ * and _etext. Skip them.
+ */
+ if (strcmp(symname, "verify_cpu") == 0 ||
+ strcmp(symname, "verify_cpu_noamd") == 0 ||
+ strcmp(symname, "verify_cpu_sse_test") == 0 ||
+ strcmp(symname, "verify_cpu_no_longmode") == 0 ||
+ strcmp(symname, "verify_cpu_sse_ok") == 0 ||
+ strcmp(symname, "mode_seta") == 0 ||
+ strcmp(symname, "bad_address") == 0 ||
+ strcmp(symname, "wakeup_code") == 0 ||
+ strcmp(symname, "wakeup_code_start") == 0 ||
+ strcmp(symname, "wakeup_start") == 0 ||
+ strcmp(symname, "wakeup_32_vector") == 0 ||
+ strcmp(symname, "wakeup_32") == 0 ||
+ strcmp(symname, "wakeup_long64_vector") == 0 ||
+ strcmp(symname, "wakeup_long64") == 0 ||
+ strcmp(symname, "gdta") == 0 ||
+ strcmp(symname, "idt_48a") == 0 ||
+ strcmp(symname, "gdt_48a") == 0 ||
+ strcmp(symname, "bogus_real_magic") == 0 ||
+ strcmp(symname, "bogus_64_magic") == 0 ||
+ strcmp(symname, "no_longmode") == 0 ||
+ strcmp(symname, "mode_set") == 0 ||
+ strcmp(symname, "mode_seta") == 0 ||
+ strcmp(symname, "setbada") == 0 ||
+ strcmp(symname, "check_vesa") == 0 ||
+ strcmp(symname, "check_vesaa") == 0 ||
+ strcmp(symname, "_setbada") == 0 ||
+ strcmp(symname, "wakeup_stack_begin") == 0 ||
+ strcmp(symname, "wakeup_stack") == 0 ||
+ strcmp(symname, "wakeup_level4_pgt") == 0 ||
+ strcmp(symname, "acpi_copy_wakeup_routine") == 0 ||
+ strcmp(symname, "wakeup_end") == 0 ||
+ strcmp(symname, "do_suspend_lowlevel_s4bios") == 0 ||
+ strcmp(symname, "do_suspend_lowlevel") == 0 ||
+ strcmp(symname, "wakeup_pmode_return") == 0 ||
+ strcmp(symname, "restore_registers") == 0)
+ continue;
+ /* __kprobes_text_end contains branches to the middle of code,
+ * with undefined states.
+ */
+ if (strcmp(symname, "__kprobes_text_end") == 0)
+ continue;
+ /* Data in the middle of the text segment :( */
+ if (strcmp(symname, "level2_kernel_pgt") == 0 ||
+ strcmp(symname, "level3_kernel_pgt") == 0)
+ continue;
+ if (bb_spurious_global_label(symname))
+ continue;
+ if ((addr = kallsyms_lookup_name(symname)) == 0)
+ continue;
+ // kdb_printf("BB " kdb_bfd_vma_fmt0 " %s\n", addr, symname);
+ bb_cleanup(); /* in case previous command was interrupted */
+ kdbnearsym_cleanup();
+ kdb_bb(addr);
+ touch_nmi_watchdog();
+ if (bb_giveup) {
+ if (max_errors-- == 0) {
+ kdb_printf("%s: max_errors reached, giving up\n",
+ __FUNCTION__);
+ break;
+ } else {
+ bb_giveup = 0;
+ }
+ }
+ }
+ kdb_printf("\n");
+ bb_cleanup();
+ kdbnearsym_cleanup();
+ return 0;
+}
+
+/*
+ *=============================================================================
+ *
+ * Everything above this line is doing basic block analysis, function by
+ * function. Everything below this line uses the basic block data to do a
+ * complete backtrace over all functions that are used by a process.
+ *
+ *=============================================================================
+ */
+
+
+/*============================================================================*/
+/* */
+/* Most of the backtrace code and data is common to x86_64 and i386. This */
+/* large ifdef contains all of the differences between the two architectures. */
+/* */
+/* Make sure you update the correct section of this ifdef. */
+/* */
+/*============================================================================*/
+#define XCS "cs"
+#define RSP "sp"
+#define RIP "ip"
+#define ARCH_RSP sp
+#define ARCH_RIP ip
+
+#ifdef CONFIG_X86_64
+
+#define ARCH_NORMAL_PADDING (16 * 8)
+
+/* x86_64 has multiple alternate stacks, with different sizes and different
+ * offsets to get the link from one stack to the next. All of the stacks are
+ * in the per_cpu area: either in the orig_ist or irq_stack_ptr. Debug events
+ * can even have multiple nested stacks within the single physical stack,
+ * each nested stack has its own link and some of those links are wrong.
+ *
+ * Consistent it's not!
+ *
+ * Do not assume that these stacks are aligned on their size.
+ */
+#define INTERRUPT_STACK (N_EXCEPTION_STACKS + 1)
+void
+kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ struct kdb_activation_record *ar)
+{
+ static struct {
+ const char *id;
+ unsigned int total_size;
+ unsigned int nested_size;
+ unsigned int next;
+ } *sdp, stack_data[] = {
+ [STACKFAULT_STACK - 1] = { "stackfault", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [DOUBLEFAULT_STACK - 1] = { "doublefault", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [NMI_STACK - 1] = { "nmi", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [DEBUG_STACK - 1] = { "debug", DEBUG_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [MCE_STACK - 1] = { "machine check", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ [INTERRUPT_STACK - 1] = { "interrupt", IRQ_STACK_SIZE, IRQ_STACK_SIZE, IRQ_STACK_SIZE - sizeof(void *) },
+ };
+ unsigned long total_start = 0, total_size, total_end;
+ int sd, found = 0;
+ extern unsigned long kdba_orig_ist(int, int);
+
+ for (sd = 0, sdp = stack_data;
+ sd < ARRAY_SIZE(stack_data);
+ ++sd, ++sdp) {
+ total_size = sdp->total_size;
+ if (!total_size)
+ continue; /* in case stack_data[] has any holes */
+ if (cpu < 0) {
+ /* Arbitrary address which can be on any cpu, see if it
+ * falls within any of the alternate stacks
+ */
+ int c;
+ for_each_online_cpu(c) {
+ if (sd == INTERRUPT_STACK - 1)
+ total_end = (unsigned long)per_cpu(irq_stack_ptr, c);
+ else
+ total_end = per_cpu(orig_ist, c).ist[sd];
+ total_start = total_end - total_size;
+ if (addr >= total_start && addr < total_end) {
+ found = 1;
+ cpu = c;
+ break;
+ }
+ }
+ if (!found)
+ continue;
+ }
+ /* Only check the supplied or found cpu */
+ if (sd == INTERRUPT_STACK - 1)
+ total_end = (unsigned long)per_cpu(irq_stack_ptr, cpu);
+ else
+ total_end = per_cpu(orig_ist, cpu).ist[sd];
+ total_start = total_end - total_size;
+ if (addr >= total_start && addr < total_end) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ /* find which nested stack the address is in */
+ while (addr > total_start + sdp->nested_size)
+ total_start += sdp->nested_size;
+ ar->stack.physical_start = total_start;
+ ar->stack.physical_end = total_start + sdp->nested_size;
+ ar->stack.logical_start = total_start;
+ ar->stack.logical_end = total_start + sdp->next;
+ ar->stack.next = *(unsigned long *)ar->stack.logical_end;
+ ar->stack.id = sdp->id;
+
+ /* Nasty: when switching to the interrupt stack, the stack state of the
+ * caller is split over two stacks, the original stack and the
+ * interrupt stack. One word (the previous frame pointer) is stored on
+ * the interrupt stack, the rest of the interrupt data is in the old
+ * frame. To make the interrupted stack state look as though it is
+ * contiguous, copy the missing word from the interrupt stack to the
+ * original stack and adjust the new stack pointer accordingly.
+ */
+
+ if (sd == INTERRUPT_STACK - 1) {
+ *(unsigned long *)(ar->stack.next - KDB_WORD_SIZE) =
+ ar->stack.next;
+ ar->stack.next -= KDB_WORD_SIZE;
+ }
+}
+
+/* rip is not in the thread struct for x86_64. We know that the stack value
+ * was saved in schedule near the label thread_return. Setting rip to
+ * thread_return lets the stack trace find that we are in schedule and
+ * correctly decode its prologue.
+ */
+
+static kdb_machreg_t
+kdba_bt_stack_rip(const struct task_struct *p)
+{
+ return bb_thread_return;
+}
+
+#else /* !CONFIG_X86_64 */
+
+#define ARCH_NORMAL_PADDING (19 * 4)
+
+#ifdef CONFIG_4KSTACKS
+static struct thread_info **kdba_hardirq_ctx, **kdba_softirq_ctx;
+#endif /* CONFIG_4KSTACKS */
+
+/* On a 4K stack kernel, hardirq_ctx and softirq_ctx are [NR_CPUS] arrays. The
+ * first element of each per-cpu stack is a struct thread_info.
+ */
+void
+kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ struct kdb_activation_record *ar)
+{
+#ifdef CONFIG_4KSTACKS
+ struct thread_info *tinfo;
+ tinfo = (struct thread_info *)(addr & -THREAD_SIZE);
+ if (cpu < 0) {
+ /* Arbitrary address, see if it falls within any of the irq
+ * stacks
+ */
+ int found = 0;
+ for_each_online_cpu(cpu) {
+ if (tinfo == kdba_hardirq_ctx[cpu] ||
+ tinfo == kdba_softirq_ctx[cpu]) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ }
+ if (tinfo == kdba_hardirq_ctx[cpu] ||
+ tinfo == kdba_softirq_ctx[cpu]) {
+ ar->stack.physical_start = (kdb_machreg_t)tinfo;
+ ar->stack.physical_end = ar->stack.physical_start + THREAD_SIZE;
+ ar->stack.logical_start = ar->stack.physical_start +
+ sizeof(struct thread_info);
+ ar->stack.logical_end = ar->stack.physical_end;
+ ar->stack.next = tinfo->previous_esp;
+ if (tinfo == kdba_hardirq_ctx[cpu])
+ ar->stack.id = "hardirq_ctx";
+ else
+ ar->stack.id = "softirq_ctx";
+ }
+#endif /* CONFIG_4KSTACKS */
+}
+
+/* rip is in the thread struct for i386 */
+
+static kdb_machreg_t
+kdba_bt_stack_rip(const struct task_struct *p)
+{
+ return p->thread.ip;
+}
+
+#endif /* CONFIG_X86_64 */
+
+/* Given an address which claims to be on a stack, an optional cpu number and
+ * an optional task address, get information about the stack.
+ *
+ * t == NULL, cpu < 0 indicates an arbitrary stack address with no associated
+ * struct task, the address can be in an alternate stack or any task's normal
+ * stack.
+ *
+ * t != NULL, cpu >= 0 indicates a running task, the address can be in an
+ * alternate stack or that task's normal stack.
+ *
+ * t != NULL, cpu < 0 indicates a blocked task, the address can only be in that
+ * task's normal stack.
+ *
+ * t == NULL, cpu >= 0 is not a valid combination.
+ */
+
+static void
+kdba_get_stack_info(kdb_machreg_t rsp, int cpu,
+ struct kdb_activation_record *ar,
+ const struct task_struct *t)
+{
+ struct thread_info *tinfo;
+ struct task_struct *g, *p;
+ memset(&ar->stack, 0, sizeof(ar->stack));
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: " RSP "=0x%lx cpu=%d task=%p\n",
+ __FUNCTION__, rsp, cpu, t);
+ if (t == NULL || cpu >= 0) {
+ kdba_get_stack_info_alternate(rsp, cpu, ar);
+ if (ar->stack.logical_start)
+ goto out;
+ }
+ rsp &= -THREAD_SIZE;
+ tinfo = (struct thread_info *)rsp;
+ if (t == NULL) {
+ /* Arbitrary stack address without an associated task, see if
+ * it falls within any normal process stack, including the idle
+ * tasks.
+ */
+ kdb_do_each_thread(g, p) {
+ if (tinfo == task_thread_info(p)) {
+ t = p;
+ goto found;
+ }
+ } kdb_while_each_thread(g, p);
+ for_each_online_cpu(cpu) {
+ p = idle_task(cpu);
+ if (tinfo == task_thread_info(p)) {
+ t = p;
+ goto found;
+ }
+ }
+ found:
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: found task %p\n", __FUNCTION__, t);
+ } else if (cpu >= 0) {
+ /* running task */
+ struct kdb_running_process *krp = kdb_running_process + cpu;
+ if (krp->p != t || tinfo != task_thread_info(t))
+ t = NULL;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: running task %p\n", __FUNCTION__, t);
+ } else {
+ /* blocked task */
+ if (tinfo != task_thread_info(t))
+ t = NULL;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: blocked task %p\n", __FUNCTION__, t);
+ }
+ if (t) {
+ ar->stack.physical_start = rsp;
+ ar->stack.physical_end = rsp + THREAD_SIZE;
+ ar->stack.logical_start = rsp + sizeof(struct thread_info);
+ ar->stack.logical_end = ar->stack.physical_end - ARCH_NORMAL_PADDING;
+ ar->stack.next = 0;
+ ar->stack.id = "normal";
+ }
+out:
+ if (ar->stack.physical_start && KDB_DEBUG(ARA)) {
+ kdb_printf("%s: ar->stack\n", __FUNCTION__);
+ kdb_printf(" physical_start=0x%lx\n", ar->stack.physical_start);
+ kdb_printf(" physical_end=0x%lx\n", ar->stack.physical_end);
+ kdb_printf(" logical_start=0x%lx\n", ar->stack.logical_start);
+ kdb_printf(" logical_end=0x%lx\n", ar->stack.logical_end);
+ kdb_printf(" next=0x%lx\n", ar->stack.next);
+ kdb_printf(" id=%s\n", ar->stack.id);
+ kdb_printf(" set MDCOUNT %ld\n",
+ (ar->stack.physical_end - ar->stack.physical_start) /
+ KDB_WORD_SIZE);
+ kdb_printf(" mds " kdb_machreg_fmt0 "\n",
+ ar->stack.physical_start);
+ }
+}
+
+static void
+bt_print_one(kdb_machreg_t rip, kdb_machreg_t rsp,
+ const struct kdb_activation_record *ar,
+ const kdb_symtab_t *symtab, int argcount)
+{
+ int btsymarg = 0;
+ int nosect = 0;
+
+ kdbgetintenv("BTSYMARG", &btsymarg);
+ kdbgetintenv("NOSECT", &nosect);
+
+ kdb_printf(kdb_machreg_fmt0, rsp);
+ kdb_symbol_print(rip, symtab,
+ KDB_SP_SPACEB|KDB_SP_VALUE);
+ if (argcount && ar->args) {
+ int i, argc = ar->args;
+ kdb_printf(" (");
+ if (argc > argcount)
+ argc = argcount;
+ for (i = 0; i < argc; i++) {
+ if (i)
+ kdb_printf(", ");
+ if (test_bit(i, ar->valid.bits))
+ kdb_printf("0x%lx", ar->arg[i]);
+ else
+ kdb_printf("invalid");
+ }
+ kdb_printf(")");
+ }
+ kdb_printf("\n");
+ if (symtab->sym_name) {
+ if (!nosect) {
+ kdb_printf(" %s",
+ symtab->mod_name);
+ if (symtab->sec_name && symtab->sec_start)
+ kdb_printf(" 0x%lx 0x%lx",
+ symtab->sec_start, symtab->sec_end);
+ kdb_printf(" 0x%lx 0x%lx\n",
+ symtab->sym_start, symtab->sym_end);
+ }
+ }
+ if (argcount && ar->args && btsymarg) {
+ int i, argc = ar->args;
+ kdb_symtab_t arg_symtab;
+ for (i = 0; i < argc; i++) {
+ kdb_machreg_t arg = ar->arg[i];
+ if (test_bit(i, ar->valid.bits) &&
+ kdbnearsym(arg, &arg_symtab)) {
+ kdb_printf(" ARG %2d ", i);
+ kdb_symbol_print(arg, &arg_symtab,
+ KDB_SP_DEFAULT|KDB_SP_NEWLINE);
+ }
+ }
+ }
+}
+
+static void
+kdba_bt_new_stack(struct kdb_activation_record *ar, kdb_machreg_t *rsp,
+ int *count, int *suppress)
+{
+ /* Nasty: save_args builds a partial pt_regs, with r15 through
+ * rbx not being filled in. It passes struct pt_regs* to do_IRQ (in
+ * rdi) but the stack pointer is not adjusted to account for r15
+ * through rbx. This has two effects :-
+ *
+ * (1) struct pt_regs on an external interrupt actually overlaps with
+ * the local stack area used by do_IRQ. Not only are r15-rbx
+ * undefined, the area that claims to hold their values can even
+ * change as the irq is processed.
+ *
+ * (2) The back stack pointer saved for the new frame is not pointing
+ * at pt_regs, it is pointing at rbx within the pt_regs passed to
+ * do_IRQ.
+ *
+ * There is nothing that I can do about (1) but I have to fix (2)
+ * because kdb backtrace looks for the "start" address of pt_regs as it
+ * walks back through the stacks. When switching from the interrupt
+ * stack to another stack, we have to assume that pt_regs has been
+ * seen and turn off backtrace supression.
+ */
+ int probable_pt_regs = strcmp(ar->stack.id, "interrupt") == 0;
+ *rsp = ar->stack.next;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("new " RSP "=" kdb_machreg_fmt0 "\n", *rsp);
+ bb_actual_set_value(BBRG_RSP, *rsp);
+ kdba_get_stack_info(*rsp, -1, ar, NULL);
+ if (!ar->stack.physical_start) {
+ kdb_printf("+++ Cannot resolve next stack\n");
+ } else if (!*suppress) {
+ kdb_printf(" ======================= <%s>\n",
+ ar->stack.id);
+ ++*count;
+ }
+ if (probable_pt_regs)
+ *suppress = 0;
+}
+
+/*
+ * kdba_bt_stack
+ *
+ * Inputs:
+ * addr Address provided to 'bt' command, if any.
+ * argcount
+ * p Pointer to task for 'btp' command.
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Ultimately all the bt* commands come through this routine. If
+ * old_style is 0 then it uses the basic block analysis to get an accurate
+ * backtrace with arguments, otherwise it falls back to the old method of
+ * printing anything on stack that looks like a kernel address.
+ *
+ * Allowing for the stack data pushed by the hardware is tricky. We
+ * deduce the presence of hardware pushed data by looking for interrupt
+ * handlers, either by name or by the code that they contain. This
+ * information must be applied to the next function up the stack, because
+ * the hardware data is above the saved rip for the interrupted (next)
+ * function.
+ *
+ * To make things worse, the amount of data pushed is arch specific and
+ * may depend on the rsp for the next function, not the current function.
+ * The number of bytes pushed by hardware cannot be calculated until we
+ * are actually processing the stack for the interrupted function and have
+ * its rsp.
+ *
+ * It is also possible for an interrupt to occur in user space and for the
+ * interrupt handler to also be interrupted. Check the code selector
+ * whenever the previous function is an interrupt handler and stop
+ * backtracing if the interrupt was not in kernel space.
+ */
+
+static int
+kdba_bt_stack(kdb_machreg_t addr, int argcount, const struct task_struct *p,
+ int old_style)
+{
+ struct kdb_activation_record ar;
+ kdb_machreg_t rip = 0, rsp = 0, prev_rsp, cs;
+ kdb_symtab_t symtab;
+ int rip_at_rsp = 0, count = 0, btsp = 0, suppress,
+ interrupt_handler = 0, prev_interrupt_handler = 0, hardware_pushed,
+ prev_noret = 0;
+ struct pt_regs *regs = NULL;
+
+ kdbgetintenv("BTSP", &btsp);
+ suppress = !btsp;
+ memset(&ar, 0, sizeof(ar));
+ if (old_style)
+ kdb_printf("Using old style backtrace, unreliable with no arguments\n");
+
+ /*
+ * The caller may have supplied an address at which the stack traceback
+ * operation should begin. This address is assumed by this code to
+ * point to a return address on the stack to be traced back.
+ *
+ * Warning: type in the wrong address and you will get garbage in the
+ * backtrace.
+ */
+ if (addr) {
+ rsp = addr;
+ kdb_getword(&rip, rsp, sizeof(rip));
+ rip_at_rsp = 1;
+ suppress = 0;
+ kdba_get_stack_info(rsp, -1, &ar, NULL);
+ } else {
+ if (task_curr(p)) {
+ struct kdb_running_process *krp =
+ kdb_running_process + task_cpu(p);
+ kdb_machreg_t cs;
+ regs = krp->regs;
+ if (krp->seqno &&
+ krp->p == p &&
+ krp->seqno >= kdb_seqno - 1 &&
+ !KDB_NULL_REGS(regs)) {
+ /* valid saved state, continue processing */
+ } else {
+ kdb_printf
+ ("Process did not save state, cannot backtrace\n");
+ kdb_ps1(p);
+ return 0;
+ }
+ kdba_getregcontents(XCS, regs, &cs);
+ if ((cs & 0xffff) != __KERNEL_CS) {
+ kdb_printf("Stack is not in kernel space, backtrace not available\n");
+ return 0;
+ }
+ rip = krp->arch.ARCH_RIP;
+ rsp = krp->arch.ARCH_RSP;
+ kdba_get_stack_info(rsp, kdb_process_cpu(p), &ar, p);
+ } else {
+ /* Not on cpu, assume blocked. Blocked tasks do not
+ * have pt_regs. p->thread contains some data, alas
+ * what it contains differs between i386 and x86_64.
+ */
+ rip = kdba_bt_stack_rip(p);
+ rsp = p->thread.sp;
+ suppress = 0;
+ kdba_get_stack_info(rsp, -1, &ar, p);
+ }
+ }
+ if (!ar.stack.physical_start) {
+ kdb_printf(RSP "=0x%lx is not in a valid kernel stack, backtrace not available\n",
+ rsp);
+ return 0;
+ }
+ memset(&bb_actual, 0, sizeof(bb_actual));
+ bb_actual_set_value(BBRG_RSP, rsp);
+ bb_actual_set_valid(BBRG_RSP, 1);
+
+ kdb_printf(RSP "%*s" RIP "%*sFunction (args)\n",
+ 2*KDB_WORD_SIZE, " ",
+ 2*KDB_WORD_SIZE, " ");
+ if (ar.stack.next && !suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+
+ bb_cleanup();
+ /* Run through all the stacks */
+ while (ar.stack.physical_start) {
+ if (rip_at_rsp) {
+ rip = *(kdb_machreg_t *)rsp;
+ /* I wish that gcc was fixed to include a nop
+ * instruction after ATTRIB_NORET functions. The lack
+ * of a nop means that the return address points to the
+ * start of next function, so fudge it to point to one
+ * byte previous.
+ *
+ * No, we cannot just decrement all rip values.
+ * Sometimes an rip legally points to the start of a
+ * function, e.g. interrupted code or hand crafted
+ * assembler.
+ */
+ if (prev_noret) {
+ kdbnearsym(rip, &symtab);
+ if (rip == symtab.sym_start) {
+ --rip;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("\tprev_noret, " RIP
+ "=0x%lx\n", rip);
+ }
+ }
+ }
+ kdbnearsym(rip, &symtab);
+ if (old_style) {
+ if (__kernel_text_address(rip) && !suppress) {
+ bt_print_one(rip, rsp, &ar, &symtab, 0);
+ ++count;
+ }
+ if (rsp == (unsigned long)regs) {
+ if (ar.stack.next && suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+ ++count;
+ suppress = 0;
+ }
+ rsp += sizeof(rip);
+ rip_at_rsp = 1;
+ if (rsp >= ar.stack.logical_end) {
+ if (!ar.stack.next)
+ break;
+ kdba_bt_new_stack(&ar, &rsp, &count, &suppress);
+ rip_at_rsp = 0;
+ continue;
+ }
+ } else {
+ /* Start each analysis with no dynamic data from the
+ * previous kdb_bb() run.
+ */
+ bb_cleanup();
+ kdb_bb(rip);
+ if (bb_giveup)
+ break;
+ prev_interrupt_handler = interrupt_handler;
+ interrupt_handler = bb_interrupt_handler(rip);
+ prev_rsp = rsp;
+ if (rip_at_rsp) {
+ if (prev_interrupt_handler) {
+ cs = *((kdb_machreg_t *)rsp + 1) & 0xffff;
+ hardware_pushed =
+ bb_hardware_pushed_arch(rsp, &ar);
+ } else {
+ cs = __KERNEL_CS;
+ hardware_pushed = 0;
+ }
+ rsp += sizeof(rip) + hardware_pushed;
+ if (KDB_DEBUG(ARA))
+ kdb_printf("%s: " RSP " "
+ kdb_machreg_fmt0
+ " -> " kdb_machreg_fmt0
+ " hardware_pushed %d"
+ " prev_interrupt_handler %d"
+ " cs 0x%lx\n",
+ __FUNCTION__,
+ prev_rsp,
+ rsp,
+ hardware_pushed,
+ prev_interrupt_handler,
+ cs);
+ if (rsp >= ar.stack.logical_end &&
+ ar.stack.next) {
+ kdba_bt_new_stack(&ar, &rsp, &count,
+ &suppress);
+ rip_at_rsp = 0;
+ continue;
+ }
+ bb_actual_set_value(BBRG_RSP, rsp);
+ } else {
+ cs = __KERNEL_CS;
+ }
+ rip_at_rsp = 1;
+ bb_actual_rollback(&ar);
+ if (bb_giveup)
+ break;
+ if (bb_actual_value(BBRG_RSP) < rsp) {
+ kdb_printf("%s: " RSP " is going backwards, "
+ kdb_machreg_fmt0 " -> "
+ kdb_machreg_fmt0 "\n",
+ __FUNCTION__,
+ rsp,
+ bb_actual_value(BBRG_RSP));
+ bb_giveup = 1;
+ break;
+ }
+ bb_arguments(&ar);
+ if (!suppress) {
+ bt_print_one(rip, prev_rsp, &ar, &symtab, argcount);
+ ++count;
+ }
+ /* Functions that terminate the backtrace */
+ if (strcmp(bb_func_name, "cpu_idle") == 0 ||
+ strcmp(bb_func_name, "child_rip") == 0)
+ break;
+ if (rsp >= ar.stack.logical_end &&
+ !ar.stack.next)
+ break;
+ if (rsp <= (unsigned long)regs &&
+ bb_actual_value(BBRG_RSP) > (unsigned long)regs) {
+ if (ar.stack.next && suppress)
+ kdb_printf(" ======================= <%s>\n",
+ ar.stack.id);
+ ++count;
+ suppress = 0;
+ }
+ if (cs != __KERNEL_CS) {
+ kdb_printf("Reached user space\n");
+ break;
+ }
+ rsp = bb_actual_value(BBRG_RSP);
+ }
+ prev_noret = bb_noret(bb_func_name);
+ if (count > 200)
+ break;
+ }
+ if (bb_giveup)
+ return 1;
+ bb_cleanup();
+ kdbnearsym_cleanup();
+
+ if (count > 200) {
+ kdb_printf("bt truncated, count limit reached\n");
+ return 1;
+ } else if (suppress) {
+ kdb_printf
+ ("bt did not find pt_regs - no trace produced. Suggest 'set BTSP 1'\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * kdba_bt_address
+ *
+ * Do a backtrace starting at a specified stack address. Use this if the
+ * heuristics get the stack decode wrong.
+ *
+ * Inputs:
+ * addr Address provided to 'bt' command.
+ * argcount
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * mds %rsp comes in handy when examining the stack to do a manual
+ * traceback.
+ */
+
+int kdba_bt_address(kdb_machreg_t addr, int argcount)
+{
+ int ret;
+ kdba_id_init(&kdb_di); /* kdb_bb needs this done once */
+ ret = kdba_bt_stack(addr, argcount, NULL, 0);
+ if (ret == 1)
+ ret = kdba_bt_stack(addr, argcount, NULL, 1);
+ return ret;
+}
+
+/*
+ * kdba_bt_process
+ *
+ * Do a backtrace for a specified process.
+ *
+ * Inputs:
+ * p Struct task pointer extracted by 'bt' command.
+ * argcount
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ */
+
+int kdba_bt_process(const struct task_struct *p, int argcount)
+{
+ int ret;
+ kdba_id_init(&kdb_di); /* kdb_bb needs this done once */
+ ret = kdba_bt_stack(0, argcount, p, 0);
+ if (ret == 1)
+ ret = kdba_bt_stack(0, argcount, p, 1);
+ return ret;
+}
+
+static int __init kdba_bt_x86_init(void)
+{
+ int i, c, cp = -1;
+ struct bb_name_state *r;
+
+ kdb_register_repeat("bb1", kdb_bb1, "<vaddr>", "Analyse one basic block", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bb_all", kdb_bb_all, "", "Backtrace check on all built in functions", 0, KDB_REPEAT_NONE);
+
+ /* Split the opcode usage table by the first letter of each set of
+ * opcodes, for faster mapping of opcode to its operand usage.
+ */
+ for (i = 0; i < ARRAY_SIZE(bb_opcode_usage_all); ++i) {
+ c = bb_opcode_usage_all[i].opcode[0] - 'a';
+ if (c != cp) {
+ cp = c;
+ bb_opcode_usage[c].opcode = bb_opcode_usage_all + i;
+ }
+ ++bb_opcode_usage[c].size;
+ }
+
+ bb_common_interrupt = kallsyms_lookup_name("common_interrupt");
+ bb_error_entry = kallsyms_lookup_name("error_entry");
+ bb_ret_from_intr = kallsyms_lookup_name("ret_from_intr");
+ bb_thread_return = kallsyms_lookup_name("thread_return");
+ bb_sync_regs = kallsyms_lookup_name("sync_regs");
+ bb_save_v86_state = kallsyms_lookup_name("save_v86_state");
+ bb__sched_text_start = kallsyms_lookup_name("__sched_text_start");
+ bb__sched_text_end = kallsyms_lookup_name("__sched_text_end");
+ bb_save_args = kallsyms_lookup_name("save_args");
+ bb_save_rest = kallsyms_lookup_name("save_rest");
+ bb_save_paranoid = kallsyms_lookup_name("save_paranoid");
+ for (i = 0, r = bb_special_cases;
+ i < ARRAY_SIZE(bb_special_cases);
+ ++i, ++r) {
+ r->address = kallsyms_lookup_name(r->name);
+ }
+
+#ifdef CONFIG_4KSTACKS
+ kdba_hardirq_ctx = (struct thread_info **)kallsyms_lookup_name("hardirq_ctx");
+ kdba_softirq_ctx = (struct thread_info **)kallsyms_lookup_name("softirq_ctx");
+#endif /* CONFIG_4KSTACKS */
+
+ return 0;
+}
+
+static void __exit kdba_bt_x86_exit(void)
+{
+ kdb_unregister("bb1");
+ kdb_unregister("bb_all");
+}
+
+module_init(kdba_bt_x86_init)
+module_exit(kdba_bt_x86_exit)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
+ static __init void reserve_ibft_region(void)
+ {
+ unsigned long addr, size = 0;
+
+ addr = find_ibft_region(&size);
+
++#ifndef CONFIG_XEN
+ if (size)
+ reserve_early_overlap_ok(addr, addr + size, "ibft");
++#endif
+ }
+
#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/smp_lock.h>
+ #include <linux/gfp.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
config DRM_VMWGFX
tristate "DRM driver for VMware Virtual GPU"
- depends on DRM && PCI && !XEN
- depends on DRM && PCI && FB
++ depends on DRM && PCI && FB && !XEN
select FB_DEFERRED_IO
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
--- /dev/null
+/*
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
+ *
+ * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
+ *
+ * Device-mapper memory object handling:
+ *
+ * o allocate/free total_pages in a per client page pool.
+ *
+ * o allocate/free memory objects with chunks (1..n) of
+ * pages_per_chunk pages hanging off.
+ *
+ * This file is released under the GPL.
+ */
+
+#define DM_MEM_CACHE_VERSION "0.2"
+
+#include "dm.h"
+#include "dm-memcache.h"
+#include <linux/dm-io.h>
++#include <linux/slab.h>
+
+struct dm_mem_cache_client {
+ spinlock_t lock;
+ mempool_t *objs_pool;
+ struct page_list *free_list;
+ unsigned objects;
+ unsigned chunks;
+ unsigned pages_per_chunk;
+ unsigned free_pages;
+ unsigned total_pages;
+};
+
+/*
+ * Free pages and page_list elements of client.
+ */
+static void free_cache_pages(struct page_list *list)
+{
+ while (list) {
+ struct page_list *pl = list;
+
+ list = pl->next;
+ BUG_ON(!pl->page);
+ __free_page(pl->page);
+ kfree(pl);
+ }
+}
+
+/*
+ * Alloc number of pages and page_list elements as required by client.
+ */
+static struct page_list *alloc_cache_pages(unsigned pages)
+{
+ struct page_list *pl, *ret = NULL;
+ struct page *page;
+
+ while (pages--) {
+ page = alloc_page(GFP_NOIO);
+ if (!page)
+ goto err;
+
+ pl = kmalloc(sizeof(*pl), GFP_NOIO);
+ if (!pl) {
+ __free_page(page);
+ goto err;
+ }
+
+ pl->page = page;
+ pl->next = ret;
+ ret = pl;
+ }
+
+ return ret;
+
+err:
+ free_cache_pages(ret);
+ return NULL;
+}
+
+/*
+ * Allocate page_list elements from the pool to chunks of the memory object.
+ */
+static void alloc_chunks(struct dm_mem_cache_client *cl,
+ struct dm_mem_cache_object *obj)
+{
+ unsigned chunks = cl->chunks;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ local_irq_disable();
+ while (chunks--) {
+ unsigned p = cl->pages_per_chunk;
+
+ obj[chunks].pl = NULL;
+
+ while (p--) {
+ struct page_list *pl;
+
+ /* Take next element from free list */
+ spin_lock(&cl->lock);
+ pl = cl->free_list;
+ BUG_ON(!pl);
+ cl->free_list = pl->next;
+ spin_unlock(&cl->lock);
+
+ pl->next = obj[chunks].pl;
+ obj[chunks].pl = pl;
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Free page_list elements putting them back onto free list
+ */
+static void free_chunks(struct dm_mem_cache_client *cl,
+ struct dm_mem_cache_object *obj)
+{
+ unsigned chunks = cl->chunks;
+ unsigned long flags;
+ struct page_list *next, *pl;
+
+ local_irq_save(flags);
+ local_irq_disable();
+ while (chunks--) {
+ for (pl = obj[chunks].pl; pl; pl = next) {
+ next = pl->next;
+
+ spin_lock(&cl->lock);
+ pl->next = cl->free_list;
+ cl->free_list = pl;
+ cl->free_pages++;
+ spin_unlock(&cl->lock);
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Create/destroy dm memory cache client resources.
+ */
+struct dm_mem_cache_client *
+dm_mem_cache_client_create(unsigned objects, unsigned chunks,
+ unsigned pages_per_chunk)
+{
+ unsigned total_pages = objects * chunks * pages_per_chunk;
+ struct dm_mem_cache_client *client;
+
+ BUG_ON(!total_pages);
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client)
+ return ERR_PTR(-ENOMEM);
+
+ client->objs_pool = mempool_create_kmalloc_pool(objects,
+ chunks * sizeof(struct dm_mem_cache_object));
+ if (!client->objs_pool)
+ goto err;
+
+ client->free_list = alloc_cache_pages(total_pages);
+ if (!client->free_list)
+ goto err1;
+
+ spin_lock_init(&client->lock);
+ client->objects = objects;
+ client->chunks = chunks;
+ client->pages_per_chunk = pages_per_chunk;
+ client->free_pages = client->total_pages = total_pages;
+ return client;
+
+err1:
+ mempool_destroy(client->objs_pool);
+err:
+ kfree(client);
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(dm_mem_cache_client_create);
+
+void dm_mem_cache_client_destroy(struct dm_mem_cache_client *cl)
+{
+ BUG_ON(cl->free_pages != cl->total_pages);
+ free_cache_pages(cl->free_list);
+ mempool_destroy(cl->objs_pool);
+ kfree(cl);
+}
+EXPORT_SYMBOL(dm_mem_cache_client_destroy);
+
+/*
+ * Grow a clients cache by an amount of pages.
+ *
+ * Don't call from interrupt context!
+ */
+int dm_mem_cache_grow(struct dm_mem_cache_client *cl, unsigned objects)
+{
+ unsigned pages = objects * cl->chunks * cl->pages_per_chunk;
+ struct page_list *pl, *last;
+
+ BUG_ON(!pages);
+ pl = alloc_cache_pages(pages);
+ if (!pl)
+ return -ENOMEM;
+
+ last = pl;
+ while (last->next)
+ last = last->next;
+
+ spin_lock_irq(&cl->lock);
+ last->next = cl->free_list;
+ cl->free_list = pl;
+ cl->free_pages += pages;
+ cl->total_pages += pages;
+ cl->objects++;
+ spin_unlock_irq(&cl->lock);
+
+ mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
+ return 0;
+}
+EXPORT_SYMBOL(dm_mem_cache_grow);
+
+/* Shrink a clients cache by an amount of pages */
+int dm_mem_cache_shrink(struct dm_mem_cache_client *cl, unsigned objects)
+{
+ int r;
+ unsigned pages = objects * cl->chunks * cl->pages_per_chunk, p = pages;
+ unsigned long flags;
+ struct page_list *last = NULL, *pl, *pos;
+
+ BUG_ON(!pages);
+
+ spin_lock_irqsave(&cl->lock, flags);
+ pl = pos = cl->free_list;
+ while (p-- && pos->next) {
+ last = pos;
+ pos = pos->next;
+ }
+
+ if (++p)
+ r = -ENOMEM;
+ else {
+ r = 0;
+ cl->free_list = pos;
+ cl->free_pages -= pages;
+ cl->total_pages -= pages;
+ cl->objects--;
+ last->next = NULL;
+ }
+ spin_unlock_irqrestore(&cl->lock, flags);
+
+ if (!r) {
+ free_cache_pages(pl);
+ mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
+ }
+
+ return r;
+}
+EXPORT_SYMBOL(dm_mem_cache_shrink);
+
+/*
+ * Allocate/free a memory object
+ *
+ * Can be called from interrupt context
+ */
+struct dm_mem_cache_object *dm_mem_cache_alloc(struct dm_mem_cache_client *cl)
+{
+ int r = 0;
+ unsigned pages = cl->chunks * cl->pages_per_chunk;
+ unsigned long flags;
+ struct dm_mem_cache_object *obj;
+
+ obj = mempool_alloc(cl->objs_pool, GFP_NOIO);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&cl->lock, flags);
+ if (pages > cl->free_pages)
+ r = -ENOMEM;
+ else
+ cl->free_pages -= pages;
+ spin_unlock_irqrestore(&cl->lock, flags);
+
+ if (r) {
+ mempool_free(obj, cl->objs_pool);
+ return ERR_PTR(r);
+ }
+
+ alloc_chunks(cl, obj);
+ return obj;
+}
+EXPORT_SYMBOL(dm_mem_cache_alloc);
+
+void dm_mem_cache_free(struct dm_mem_cache_client *cl,
+ struct dm_mem_cache_object *obj)
+{
+ free_chunks(cl, obj);
+ mempool_free(obj, cl->objs_pool);
+}
+EXPORT_SYMBOL(dm_mem_cache_free);
+
+MODULE_DESCRIPTION(DM_NAME " dm memory cache");
+MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+ *
+ * Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.com>
+ *
+ * This file is released under the GPL.
+ *
+ *
+ * Linux 2.6 Device Mapper RAID4 and RAID5 target.
+ *
+ * Supports:
+ * o RAID4 with dedicated and selectable parity device
+ * o RAID5 with rotating parity (left+right, symmetric+asymmetric)
+ * o run time optimization of xor algorithm used to calculate parity
+ *
+ *
+ * Thanks to MD for:
+ * o the raid address calculation algorithm
+ * o the base of the biovec <-> page list copier.
+ *
+ *
+ * Uses region hash to keep track of how many writes are in flight to
+ * regions in order to use dirty log to keep state of regions to recover:
+ *
+ * o clean regions (those which are synchronized
+ * and don't have write io in flight)
+ * o dirty regions (those with write io in flight)
+ *
+ *
+ * On startup, any dirty regions are migrated to the 'nosync' state
+ * and are subject to recovery by the daemon.
+ *
+ * See raid_ctr() for table definition.
+ *
+ *
+ * FIXME:
+ * o add virtual interface for locking
+ * o remove instrumentation (REMOVEME:)
+ *
+ */
+
+static const char *version = "v0.2431";
+
+#include "dm.h"
+#include "dm-memcache.h"
+#include "dm-message.h"
+#include "dm-raid45.h"
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
++#include <linux/slab.h>
+
+#include <linux/dm-io.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/dm-region-hash.h>
+
+/* # of parallel recovered regions */
+/* FIXME: cope with multiple recovery stripes in raid_set struct. */
+#define MAX_RECOVER 1 /* needs to be 1! */
+
+/*
+ * Configurable parameters
+ */
+#define INLINE
+
+/* Default # of stripes if not set in constructor. */
+#define STRIPES 64
+
+/* Minimum/maximum # of selectable stripes. */
+#define STRIPES_MIN 8
+#define STRIPES_MAX 16384
+
+/* Default chunk size in sectors if not set in constructor. */
+#define CHUNK_SIZE 64
+
+/* Default io size in sectors if not set in constructor. */
+#define IO_SIZE_MIN SECTORS_PER_PAGE
+#define IO_SIZE IO_SIZE_MIN
+
+/* Maximum setable chunk size in sectors. */
+#define CHUNK_SIZE_MAX 16384
+
+/* Recover io size default in sectors. */
+#define RECOVER_IO_SIZE_MIN 64
+#define RECOVER_IO_SIZE 256
+
+/* Default percentage recover io bandwidth. */
+#define BANDWIDTH 10
+#define BANDWIDTH_MIN 1
+#define BANDWIDTH_MAX 100
+/*
+ * END Configurable parameters
+ */
+
+#define TARGET "dm-raid45"
+#define DAEMON "kraid45d"
+#define DM_MSG_PREFIX TARGET
+
+#define SECTORS_PER_PAGE (PAGE_SIZE >> SECTOR_SHIFT)
+
+/* Amount/size for __xor(). */
+#define SECTORS_PER_XOR SECTORS_PER_PAGE
+#define XOR_SIZE PAGE_SIZE
+
+/* Derive raid_set from stripe_cache pointer. */
+#define RS(x) container_of(x, struct raid_set, sc)
+
+/* Check value in range. */
+#define range_ok(i, min, max) (i >= min && i <= max)
+
+/* Page reference. */
+#define PAGE(stripe, p) ((stripe)->obj[p].pl->page)
+
+/* Bio list reference. */
+#define BL(stripe, p, rw) (stripe->ss[p].bl + rw)
+
+/* Page list reference. */
+#define PL(stripe, p) (stripe->obj[p].pl)
+
+/* Check argument is power of 2. */
+#define POWER_OF_2(a) (!(a & (a - 1)))
+
+/* Factor out to dm-bio-list.h */
+static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = bl->head;
+ bl->head = bio;
+
+ if (!bl->tail)
+ bl->tail = bio;
+}
+
+/* Factor out to dm.h */
+#define TI_ERR_RET(str, ret) \
+ do { ti->error = DM_MSG_PREFIX ": " str; return ret; } while (0);
+#define TI_ERR(str) TI_ERR_RET(str, -EINVAL)
+
+/*-----------------------------------------------------------------
+ * Stripe cache
+ *
+ * Cache for all reads and writes to raid sets (operational or degraded)
+ *
+ * We need to run all data to and from a RAID set through this cache,
+ * because parity chunks need to get calculated from data chunks
+ * or, in the degraded/resynchronization case, missing chunks need
+ * to be reconstructed using the other chunks of the stripe.
+ *---------------------------------------------------------------*/
+/* Protect kmem cache # counter. */
+static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
+
+/* A stripe set (holds bios hanging off). */
+struct stripe_set {
+ struct stripe *stripe; /* Backpointer to stripe for endio(). */
+ struct bio_list bl[3]; /* Reads, writes, and writes merged. */
+#define WRITE_MERGED 2
+};
+
+#if READ != 0 || WRITE != 1
+#error dm-raid45: READ/WRITE != 0/1 used as index!!!
+#endif
+
+/*
+ * Stripe linked list indexes. Keep order, because the stripe
+ * and the stripe cache rely on the first 3!
+ */
+enum list_types {
+ LIST_IO = 0, /* Stripes with io pending. */
+ LIST_ENDIO, /* Stripes to endio. */
+ LIST_LRU, /* Least recently used stripes. */
+ LIST_HASH, /* Hashed stripes. */
+ LIST_RECOVER = LIST_HASH, /* For recovery type stripes only. */
+ NR_LISTS, /* To size array in struct stripe. */
+};
+
+enum lock_types {
+ LOCK_ENDIO = 0, /* Protect endio list. */
+ LOCK_LRU, /* Protect lru list. */
+ NR_LOCKS, /* To size array in struct stripe_cache. */
+};
+
+/* A stripe: the io object to handle all reads and writes to a RAID set. */
+struct stripe {
+ struct stripe_cache *sc; /* Backpointer to stripe cache. */
+
+ sector_t key; /* Hash key. */
+ region_t region; /* Region stripe is mapped to. */
+
+ /* Reference count. */
+ atomic_t cnt;
+
+ struct {
+ unsigned long flags; /* flags (see below). */
+
+ /*
+ * Pending ios in flight:
+ *
+ * used as a 'lock' to control move of stripe to endio list
+ */
+ atomic_t pending; /* Pending ios in flight. */
+
+ /* Sectors to read and write for multi page stripe sets. */
+ unsigned size;
+ } io;
+
+ /* Lock on stripe (for clustering). */
+ void *lock;
+
+ /*
+ * 4 linked lists:
+ * o io list to flush io
+ * o endio list
+ * o LRU list to put stripes w/o reference count on
+ * o stripe cache hash
+ */
+ struct list_head lists[NR_LISTS];
+
+ struct {
+ unsigned short parity; /* Parity chunk index. */
+ short recover; /* Recovery chunk index. */
+ } idx;
+
+ /* This sets memory cache object (dm-mem-cache). */
+ struct dm_mem_cache_object *obj;
+
+ /* Array of stripe sets (dynamically allocated). */
+ struct stripe_set ss[0];
+};
+
+/* States stripes can be in (flags field). */
+enum stripe_states {
+ STRIPE_ACTIVE, /* Active io on stripe. */
+ STRIPE_ERROR, /* io error on stripe. */
+ STRIPE_MERGED, /* Writes got merged. */
+ STRIPE_READ, /* Read. */
+ STRIPE_RBW, /* Read-before-write. */
+ STRIPE_RECONSTRUCT, /* reconstruct of a missing chunk required. */
+ STRIPE_RECOVER, /* Stripe used for RAID set recovery. */
+};
+
+/* ... and macros to access them. */
+#define BITOPS(name, what, var, flag) \
+static inline int TestClear ## name ## what(struct var *v) \
+{ return test_and_clear_bit(flag, &v->io.flags); } \
+static inline int TestSet ## name ## what(struct var *v) \
+{ return test_and_set_bit(flag, &v->io.flags); } \
+static inline void Clear ## name ## what(struct var *v) \
+{ clear_bit(flag, &v->io.flags); } \
+static inline void Set ## name ## what(struct var *v) \
+{ set_bit(flag, &v->io.flags); } \
+static inline int name ## what(struct var *v) \
+{ return test_bit(flag, &v->io.flags); }
+
+
+BITOPS(Stripe, Active, stripe, STRIPE_ACTIVE)
+BITOPS(Stripe, Merged, stripe, STRIPE_MERGED)
+BITOPS(Stripe, Error, stripe, STRIPE_ERROR)
+BITOPS(Stripe, Read, stripe, STRIPE_READ)
+BITOPS(Stripe, RBW, stripe, STRIPE_RBW)
+BITOPS(Stripe, Reconstruct, stripe, STRIPE_RECONSTRUCT)
+BITOPS(Stripe, Recover, stripe, STRIPE_RECOVER)
+
+/* A stripe hash. */
+struct stripe_hash {
+ struct list_head *hash;
+ unsigned buckets;
+ unsigned mask;
+ unsigned prime;
+ unsigned shift;
+};
+
+/* A stripe cache. */
+struct stripe_cache {
+ /* Stripe hash. */
+ struct stripe_hash hash;
+
+ /* Stripes with io to flush, stripes to endio and LRU lists. */
+ struct list_head lists[3];
+
+ /* Locks to protect endio and lru lists. */
+ spinlock_t locks[NR_LOCKS];
+
+ /* Slab cache to allocate stripes from. */
+ struct {
+ struct kmem_cache *cache; /* Cache itself. */
+ char name[32]; /* Unique name. */
+ } kc;
+
+ struct dm_io_client *dm_io_client; /* dm-io client resource context. */
+
+ /* dm-mem-cache client resource context. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ int stripes_parm; /* # stripes parameter from constructor. */
+ atomic_t stripes; /* actual # of stripes in cache. */
+ atomic_t stripes_to_shrink; /* # of stripes to shrink cache by. */
+ atomic_t stripes_last; /* last # of stripes in cache. */
+ atomic_t active_stripes; /* actual # of active stripes in cache. */
+
+ /* REMOVEME: */
+ atomic_t max_active_stripes; /* actual # of active stripes in cache. */
+};
+
+/* Flag specs for raid_dev */ ;
+enum raid_dev_flags { DEVICE_FAILED, IO_QUEUED };
+
+/* The raid device in a set. */
+struct raid_dev {
+ struct dm_dev *dev;
+ unsigned long flags; /* raid_dev_flags. */
+ sector_t start; /* offset to map to. */
+};
+
+/* Flags spec for raid_set. */
+enum raid_set_flags {
+ RS_CHECK_OVERWRITE, /* Check for chunk overwrites. */
+ RS_DEAD, /* RAID set inoperational. */
+ RS_DEVEL_STATS, /* REMOVEME: display status information. */
+ RS_IO_ERROR, /* io error on set. */
+ RS_RECOVER, /* Do recovery. */
+ RS_RECOVERY_BANDWIDTH, /* Allow recovery bandwidth (delayed bios). */
+ RS_REGION_GET, /* get a region to recover. */
+ RS_SC_BUSY, /* stripe cache busy -> send an event. */
+ RS_SUSPENDED, /* RAID set suspendedn. */
+};
+
+/* REMOVEME: devel stats counters. */
+enum stats_types {
+ S_BIOS_READ,
+ S_BIOS_ADDED_READ,
+ S_BIOS_ENDIO_READ,
+ S_BIOS_WRITE,
+ S_BIOS_ADDED_WRITE,
+ S_BIOS_ENDIO_WRITE,
+ S_CAN_MERGE,
+ S_CANT_MERGE,
+ S_CONGESTED,
+ S_DM_IO_READ,
+ S_DM_IO_WRITE,
+ S_ACTIVE_READS,
+ S_BANDWIDTH,
+ S_BARRIER,
+ S_BIO_COPY_PL_NEXT,
+ S_DEGRADED,
+ S_DELAYED_BIOS,
+ S_EVICT,
+ S_FLUSHS,
+ S_HITS_1ST,
+ S_IOS_POST,
+ S_INSCACHE,
+ S_MAX_LOOKUP,
+ S_MERGE_PAGE_LOCKED,
+ S_NO_BANDWIDTH,
+ S_NOT_CONGESTED,
+ S_NO_RW,
+ S_NOSYNC,
+ S_PROHIBITPAGEIO,
+ S_RECONSTRUCT_EI,
+ S_RECONSTRUCT_DEV,
+ S_REDO,
+ S_REQUEUE,
+ S_STRIPE_ERROR,
+ S_SUM_DELAYED_BIOS,
+ S_XORS,
+ S_NR_STATS, /* # of stats counters. */
+};
+
+/* Status type -> string mappings. */
+struct stats_map {
+ const enum stats_types type;
+ const char *str;
+};
+
+static struct stats_map stats_map[] = {
+ { S_BIOS_READ, "r=" },
+ { S_BIOS_ADDED_READ, "/" },
+ { S_BIOS_ENDIO_READ, "/" },
+ { S_BIOS_WRITE, " w=" },
+ { S_BIOS_ADDED_WRITE, "/" },
+ { S_BIOS_ENDIO_WRITE, "/" },
+ { S_DM_IO_READ, " rc=" },
+ { S_DM_IO_WRITE, " wc=" },
+ { S_ACTIVE_READS, " active_reads=" },
+ { S_BANDWIDTH, " bandwidth=" },
+ { S_NO_BANDWIDTH, " no_bandwidth=" },
+ { S_BARRIER, " barrier=" },
+ { S_BIO_COPY_PL_NEXT, " bio_copy_pl_next=" },
+ { S_CAN_MERGE, " can_merge=" },
+ { S_MERGE_PAGE_LOCKED, "/page_locked=" },
+ { S_CANT_MERGE, "/cant_merge=" },
+ { S_CONGESTED, " congested=" },
+ { S_NOT_CONGESTED, "/not_congested=" },
+ { S_DEGRADED, " degraded=" },
+ { S_DELAYED_BIOS, " delayed_bios=" },
+ { S_SUM_DELAYED_BIOS, "/sum_delayed_bios=" },
+ { S_EVICT, " evict=" },
+ { S_FLUSHS, " flushs=" },
+ { S_HITS_1ST, " hits_1st=" },
+ { S_IOS_POST, " ios_post=" },
+ { S_INSCACHE, " inscache=" },
+ { S_MAX_LOOKUP, " max_lookup=" },
+ { S_NO_RW, " no_rw=" },
+ { S_NOSYNC, " nosync=" },
+ { S_PROHIBITPAGEIO, " ProhibitPageIO=" },
+ { S_RECONSTRUCT_EI, " reconstruct_ei=" },
+ { S_RECONSTRUCT_DEV, " reconstruct_dev=" },
+ { S_REDO, " redo=" },
+ { S_REQUEUE, " requeue=" },
+ { S_STRIPE_ERROR, " stripe_error=" },
+ { S_XORS, " xors=" },
+};
+
+/*
+ * A RAID set.
+ */
+typedef void (*xor_function_t)(unsigned count, unsigned long **data);
+struct raid_set {
+ struct dm_target *ti; /* Target pointer. */
+
+ struct {
+ unsigned long flags; /* State flags. */
+ spinlock_t in_lock; /* Protects central input list below. */
+ struct bio_list in; /* Pending ios (central input list). */
+ struct bio_list work; /* ios work set. */
+ wait_queue_head_t suspendq; /* suspend synchronization. */
+ atomic_t in_process; /* counter of queued bios (suspendq). */
+ atomic_t in_process_max;/* counter of queued bios max. */
+
+ /* io work. */
+ struct workqueue_struct *wq;
+ struct delayed_work dws;
+ } io;
+
+ /* External locking. */
+ struct dm_raid45_locking_type *locking;
+
+ struct stripe_cache sc; /* Stripe cache for this set. */
+
+ /* Xor optimization. */
+ struct {
+ struct xor_func *f;
+ unsigned chunks;
+ unsigned speed;
+ } xor;
+
+ /* Recovery parameters. */
+ struct recover {
+ struct dm_dirty_log *dl; /* Dirty log. */
+ struct dm_region_hash *rh; /* Region hash. */
+
+ /* dm-mem-cache client resource context for recovery stripes. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ struct list_head stripes; /* List of recovery stripes. */
+
+ region_t nr_regions;
+ region_t nr_regions_to_recover;
+ region_t nr_regions_recovered;
+ unsigned long start_jiffies;
+ unsigned long end_jiffies;
+
+ unsigned bandwidth; /* Recovery bandwidth [%]. */
+ unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
+ unsigned bandwidth_parm; /* " constructor parm. */
+ unsigned io_size; /* io size <= chunk size. */
+ unsigned io_size_parm; /* io size ctr parameter. */
+
+ /* recovery io throttling. */
+ atomic_t io_count[2]; /* counter recover/regular io. */
+ unsigned long last_jiffies;
+
+ struct dm_region *reg; /* Actual region to recover. */
+ sector_t pos; /* Position within region to recover. */
+ sector_t end; /* End of region to recover. */
+ } recover;
+
+ /* RAID set parameters. */
+ struct {
+ struct raid_type *raid_type; /* RAID type (eg, RAID4). */
+ unsigned raid_parms; /* # variable raid parameters. */
+
+ unsigned chunk_size; /* Sectors per chunk. */
+ unsigned chunk_size_parm;
+ unsigned chunk_mask; /* Mask for amount. */
+ unsigned chunk_shift; /* rsector chunk size shift. */
+
+ unsigned io_size; /* Sectors per io. */
+ unsigned io_size_parm;
+ unsigned io_mask; /* Mask for amount. */
+ unsigned io_shift_mask; /* Mask for raid_address(). */
+ unsigned io_shift; /* rsector io size shift. */
+ unsigned pages_per_io; /* Pages per io. */
+
+ sector_t sectors_per_dev; /* Sectors per device. */
+
+ atomic_t failed_devs; /* Amount of devices failed. */
+
+ /* Index of device to initialize. */
+ int dev_to_init;
+ int dev_to_init_parm;
+
+ /* Raid devices dynamically allocated. */
+ unsigned raid_devs; /* # of RAID devices below. */
+ unsigned data_devs; /* # of RAID data devices. */
+
+ int ei; /* index of failed RAID device. */
+
+ /* index of dedicated parity device (i.e. RAID4). */
+ int pi;
+ int pi_parm; /* constructor parm for status output. */
+ } set;
+
+ /* REMOVEME: devel stats counters. */
+ atomic_t stats[S_NR_STATS];
+
+ /* Dynamically allocated temporary pointers for xor(). */
+ unsigned long **data;
+
+ /* Dynamically allocated RAID devices. Alignment? */
+ struct raid_dev dev[0];
+};
+
+
+BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
+BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
+BITOPS(RS, Dead, raid_set, RS_DEAD)
+BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
+BITOPS(RS, IoError, raid_set, RS_IO_ERROR)
+BITOPS(RS, Recover, raid_set, RS_RECOVER)
+BITOPS(RS, RegionGet, raid_set, RS_REGION_GET)
+BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
+BITOPS(RS, Suspended, raid_set, RS_SUSPENDED)
+#undef BITOPS
+
+#define PageIO(page) PageChecked(page)
+#define AllowPageIO(page) SetPageChecked(page)
+#define ProhibitPageIO(page) ClearPageChecked(page)
+
+/*-----------------------------------------------------------------
+ * Raid-4/5 set structures.
+ *---------------------------------------------------------------*/
+/* RAID level definitions. */
+enum raid_level {
+ raid4,
+ raid5,
+};
+
+/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
+enum raid_algorithm {
+ none,
+ left_asym,
+ right_asym,
+ left_sym,
+ right_sym,
+};
+
+struct raid_type {
+ const char *name; /* RAID algorithm. */
+ const char *descr; /* Descriptor text for logging. */
+ const unsigned parity_devs; /* # of parity devices. */
+ const unsigned minimal_devs; /* minimal # of devices in set. */
+ const enum raid_level level; /* RAID level. */
+ const enum raid_algorithm algorithm; /* RAID algorithm. */
+};
+
+/* Supported raid types and properties. */
+static struct raid_type raid_types[] = {
+ {"raid4", "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
+ {"raid5_la", "RAID5 (left asymmetric)", 1, 3, raid5, left_asym},
+ {"raid5_ra", "RAID5 (right asymmetric)", 1, 3, raid5, right_asym},
+ {"raid5_ls", "RAID5 (left symmetric)", 1, 3, raid5, left_sym},
+ {"raid5_rs", "RAID5 (right symmetric)", 1, 3, raid5, right_sym},
+};
+
+/* Address as calculated by raid_address(). */
+struct address {
+ sector_t key; /* Hash key (start address of stripe). */
+ unsigned di, pi; /* Data and parity disks index. */
+};
+
+/* REMOVEME: reset statistics counters. */
+static void stats_reset(struct raid_set *rs)
+{
+ unsigned s = S_NR_STATS;
+
+ while (s--)
+ atomic_set(rs->stats + s, 0);
+}
+
+/*----------------------------------------------------------------
+ * RAID set management routines.
+ *--------------------------------------------------------------*/
+/*
+ * Begin small helper functions.
+ */
+/* Queue (optionally delayed) io work. */
+static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
+{
+ struct delayed_work *dws = &rs->io.dws;
+
+ cancel_delayed_work(dws);
+ queue_delayed_work(rs->io.wq, dws, delay);
+}
+
+/* Queue io work immediately (called from region hash too). */
+static INLINE void wake_do_raid(void *context)
+{
+ wake_do_raid_delayed(context, 0);
+}
+
+/* Wait until all io has been processed. */
+static INLINE void wait_ios(struct raid_set *rs)
+{
+ wait_event(rs->io.suspendq, !atomic_read(&rs->io.in_process));
+}
+
+/* Declare io queued to device. */
+static INLINE void io_dev_queued(struct raid_dev *dev)
+{
+ set_bit(IO_QUEUED, &dev->flags);
+}
+
+/* Io on device and reset ? */
+static inline int io_dev_clear(struct raid_dev *dev)
+{
+ return test_and_clear_bit(IO_QUEUED, &dev->flags);
+}
+
+/* Get an io reference. */
+static INLINE void io_get(struct raid_set *rs)
+{
+ int p = atomic_inc_return(&rs->io.in_process);
+
+ if (p > atomic_read(&rs->io.in_process_max))
+ atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
+}
+
+/* Put the io reference and conditionally wake io waiters. */
+static INLINE void io_put(struct raid_set *rs)
+{
+ /* Intel: rebuild data corrupter? */
+ if (!atomic_read(&rs->io.in_process)) {
+ DMERR("%s would go negative!!!", __func__);
+ return;
+ }
+
+ if (atomic_dec_and_test(&rs->io.in_process))
+ wake_up(&rs->io.suspendq);
+}
+
+/* Calculate device sector offset. */
+static INLINE sector_t _sector(struct raid_set *rs, struct bio *bio)
+{
+ sector_t sector = bio->bi_sector;
+
+ sector_div(sector, rs->set.data_devs);
+ return sector;
+}
+
+/* Test device operational. */
+static INLINE int dev_operational(struct raid_set *rs, unsigned p)
+{
+ return !test_bit(DEVICE_FAILED, &rs->dev[p].flags);
+}
+
+/* Return # of active stripes in stripe cache. */
+static INLINE int sc_active(struct stripe_cache *sc)
+{
+ return atomic_read(&sc->active_stripes);
+}
+
+/* Test io pending on stripe. */
+static INLINE int stripe_io(struct stripe *stripe)
+{
+ return atomic_read(&stripe->io.pending);
+}
+
+static INLINE void stripe_io_inc(struct stripe *stripe)
+{
+ atomic_inc(&stripe->io.pending);
+}
+
+static INLINE void stripe_io_dec(struct stripe *stripe)
+{
+ atomic_dec(&stripe->io.pending);
+}
+
+/* Wrapper needed by for_each_io_dev(). */
+static void _stripe_io_inc(struct stripe *stripe, unsigned p)
+{
+ stripe_io_inc(stripe);
+}
+
+/* Error a stripe. */
+static INLINE void stripe_error(struct stripe *stripe, struct page *page)
+{
+ SetStripeError(stripe);
+ SetPageError(page);
+ atomic_inc(RS(stripe->sc)->stats + S_STRIPE_ERROR);
+}
+
+/* Page IOed ok. */
+enum dirty_type { CLEAN, DIRTY };
+static INLINE void page_set(struct page *page, enum dirty_type type)
+{
+ switch (type) {
+ case DIRTY:
+ SetPageDirty(page);
+ AllowPageIO(page);
+ break;
+
+ case CLEAN:
+ ClearPageDirty(page);
+ break;
+
+ default:
+ BUG();
+ }
+
+ SetPageUptodate(page);
+ ClearPageError(page);
+}
+
+/* Return region state for a sector. */
+static INLINE int
+region_state(struct raid_set *rs, sector_t sector, unsigned long state)
+{
+ struct dm_region_hash *rh = rs->recover.rh;
+
+ return RSRecover(rs) ?
+ (dm_rh_get_state(rh, dm_rh_sector_to_region(rh, sector), 1) &
+ state) : 0;
+}
+
+/* Check maximum devices which may fail in a raid set. */
+static inline int raid_set_degraded(struct raid_set *rs)
+{
+ return RSIoError(rs);
+}
+
+/* Check # of devices which may fail in a raid set. */
+static INLINE int raid_set_operational(struct raid_set *rs)
+{
+ /* Too many failed devices -> BAD. */
+ return atomic_read(&rs->set.failed_devs) <=
+ rs->set.raid_type->parity_devs;
+}
+
+/*
+ * Return true in case a page_list should be read/written
+ *
+ * Conditions to read/write:
+ * o 1st page in list not uptodate
+ * o 1st page in list dirty
+ * o if we optimized io away, we flag it using the pages checked bit.
+ */
+static INLINE unsigned page_io(struct page *page)
+{
+ /* Optimization: page was flagged to need io during first run. */
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ return 1;
+ }
+
+ /* Avoid io if prohibited or a locked page. */
+ if (!PageIO(page) || PageLocked(page))
+ return 0;
+
+ if (!PageUptodate(page) || PageDirty(page)) {
+ /* Flag page needs io for second run optimization. */
+ SetPagePrivate(page);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Call a function on each page list needing io. */
+static INLINE unsigned
+for_each_io_dev(struct raid_set *rs, struct stripe *stripe,
+ void (*f_io)(struct stripe *stripe, unsigned p))
+{
+ unsigned p = rs->set.raid_devs, r = 0;
+
+ while (p--) {
+ if (page_io(PAGE(stripe, p))) {
+ f_io(stripe, p);
+ r++;
+ }
+ }
+
+ return r;
+}
+
+/* Reconstruct a particular device ?. */
+static INLINE int dev_to_init(struct raid_set *rs)
+{
+ return rs->set.dev_to_init > -1;
+}
+
+/*
+ * Index of device to calculate parity on.
+ * Either the parity device index *or* the selected device to init
+ * after a spare replacement.
+ */
+static INLINE unsigned dev_for_parity(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ return dev_to_init(rs) ? rs->set.dev_to_init : stripe->idx.parity;
+}
+
+/* Return the index of the device to be recovered. */
+static int idx_get(struct raid_set *rs)
+{
+ /* Avoid to read in the pages to be reconstructed anyway. */
+ if (dev_to_init(rs))
+ return rs->set.dev_to_init;
+ else if (rs->set.raid_type->level == raid4)
+ return rs->set.pi;
+
+ return -1;
+}
+
+/* RAID set congested function. */
+static int raid_set_congested(void *congested_data, int bdi_bits)
+{
+ struct raid_set *rs = congested_data;
+ int r = 0; /* Assume uncongested. */
+ unsigned p = rs->set.raid_devs;
+
+ /* If any of our component devices are overloaded. */
+ while (p--) {
+ struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+
+ r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ }
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
+ return r;
+}
+
+/* Display RAID set dead message once. */
+static void raid_set_dead(struct raid_set *rs)
+{
+ if (!TestSetRSDead(rs)) {
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("FATAL: too many devices failed -> RAID set dead");
+
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ if (!dev_operational(rs, p))
+ DMERR("device /dev/%s failed",
+ bdevname(rs->dev[p].dev->bdev, buf));
+ }
+ }
+}
+
+/* RAID set degrade check. */
+static INLINE int
+raid_set_check_and_degrade(struct raid_set *rs,
+ struct stripe *stripe, unsigned p)
+{
+ if (test_and_set_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ return -EPERM;
+
+ /* Through an event in case of member device errors. */
+ dm_table_event(rs->ti->table);
+ atomic_inc(&rs->set.failed_devs);
+
+ /* Only log the first member error. */
+ if (!TestSetRSIoError(rs)) {
+ char buf[BDEVNAME_SIZE];
+
+ /* Store index for recovery. */
+ mb();
+ rs->set.ei = p;
+ mb();
+
+ DMERR("CRITICAL: %sio error on device /dev/%s "
+ "in region=%llu; DEGRADING RAID set",
+ stripe ? "" : "FAKED ",
+ bdevname(rs->dev[p].dev->bdev, buf),
+ (unsigned long long) (stripe ? stripe->key : 0));
+ DMERR("further device error messages suppressed");
+ }
+
+ return 0;
+}
+
+static void
+raid_set_check_degrade(struct raid_set *rs, struct stripe *stripe)
+{
+ unsigned p = rs->set.raid_devs;
+
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ if (PageError(page)) {
+ ClearPageError(page);
+ raid_set_check_and_degrade(rs, stripe, p);
+ }
+ }
+}
+
+/* RAID set upgrade check. */
+static int raid_set_check_and_upgrade(struct raid_set *rs, unsigned p)
+{
+ if (!test_and_clear_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ return -EPERM;
+
+ if (atomic_dec_and_test(&rs->set.failed_devs)) {
+ ClearRSIoError(rs);
+ rs->set.ei = -1;
+ }
+
+ return 0;
+}
+
+/* Lookup a RAID device by name or by major:minor number. */
+union dev_lookup {
+ const char *dev_name;
+ struct raid_dev *dev;
+};
+enum lookup_type { byname, bymajmin, bynumber };
+static int raid_dev_lookup(struct raid_set *rs, enum lookup_type by,
+ union dev_lookup *dl)
+{
+ unsigned p;
+
+ /*
+ * Must be an incremental loop, because the device array
+ * can have empty slots still on calls from raid_ctr()
+ */
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ char buf[BDEVNAME_SIZE];
+ struct raid_dev *dev = rs->dev + p;
+
+ if (!dev->dev)
+ break;
+
+ /* Format dev string appropriately if necessary. */
+ if (by == byname)
+ bdevname(dev->dev->bdev, buf);
+ else if (by == bymajmin)
+ format_dev_t(buf, dev->dev->bdev->bd_dev);
+
+ /* Do the actual check. */
+ if (by == bynumber) {
+ if (dl->dev->dev->bdev->bd_dev ==
+ dev->dev->bdev->bd_dev)
+ return p;
+ } else if (!strcmp(dl->dev_name, buf))
+ return p;
+ }
+
+ return -ENODEV;
+}
+
+/* End io wrapper. */
+static INLINE void
+_bio_endio(struct raid_set *rs, struct bio *bio, int error)
+{
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (bio_data_dir(bio) == WRITE ?
+ S_BIOS_ENDIO_WRITE : S_BIOS_ENDIO_READ));
+ bio_endio(bio, error);
+ io_put(rs); /* Wake any suspend waiters. */
+}
+
+/*
+ * End small helper functions.
+ */
+
+
+/*
+ * Stripe hash functions
+ */
+/* Initialize/destroy stripe hash. */
+static int hash_init(struct stripe_hash *hash, unsigned stripes)
+{
+ unsigned buckets = 2, max_buckets = stripes / 4;
+ unsigned hash_primes[] = {
+ /* Table of primes for hash_fn/table size optimization. */
+ 3, 7, 13, 27, 53, 97, 193, 389, 769,
+ 1543, 3079, 6151, 12289, 24593,
+ };
+
+ /* Calculate number of buckets (2^^n <= stripes / 4). */
+ while (buckets < max_buckets)
+ buckets <<= 1;
+
+ /* Allocate stripe hash. */
+ hash->hash = vmalloc(buckets * sizeof(*hash->hash));
+ if (!hash->hash)
+ return -ENOMEM;
+
+ hash->buckets = buckets;
+ hash->mask = buckets - 1;
+ hash->shift = ffs(buckets);
+ if (hash->shift > ARRAY_SIZE(hash_primes) + 1)
+ hash->shift = ARRAY_SIZE(hash_primes) + 1;
+
+ BUG_ON(hash->shift - 2 > ARRAY_SIZE(hash_primes) + 1);
+ hash->prime = hash_primes[hash->shift - 2];
+
+ /* Initialize buckets. */
+ while (buckets--)
+ INIT_LIST_HEAD(hash->hash + buckets);
+
+ return 0;
+}
+
+static INLINE void hash_exit(struct stripe_hash *hash)
+{
+ if (hash->hash) {
+ vfree(hash->hash);
+ hash->hash = NULL;
+ }
+}
+
+/* List add (head/tail/locked/unlocked) inlines. */
+enum list_lock_type { LIST_LOCKED, LIST_UNLOCKED };
+#define LIST_DEL(name, list) \
+static void stripe_ ## name ## _del(struct stripe *stripe, \
+ enum list_lock_type lock) { \
+ struct list_head *lh = stripe->lists + (list); \
+ spinlock_t *l = NULL; \
+\
+ if (lock == LIST_LOCKED) { \
+ l = stripe->sc->locks + LOCK_LRU; \
+ spin_lock_irq(l); \
+ } \
+\
+\
+ if (!list_empty(lh)) \
+ list_del_init(lh); \
+\
+ if (lock == LIST_LOCKED) \
+ spin_unlock_irq(l); \
+}
+
+LIST_DEL(hash, LIST_HASH)
+LIST_DEL(lru, LIST_LRU)
+#undef LIST_DEL
+
+enum list_pos_type { POS_HEAD, POS_TAIL };
+#define LIST_ADD(name, list) \
+static void stripe_ ## name ## _add(struct stripe *stripe, \
+ enum list_pos_type pos, \
+ enum list_lock_type lock) { \
+ struct list_head *lh = stripe->lists + (list); \
+ struct stripe_cache *sc = stripe->sc; \
+ spinlock_t *l = NULL; \
+\
+ if (lock == LIST_LOCKED) { \
+ l = sc->locks + LOCK_LRU; \
+ spin_lock_irq(l); \
+ } \
+\
+ if (list_empty(lh)) { \
+ if (pos == POS_HEAD) \
+ list_add(lh, sc->lists + (list)); \
+ else \
+ list_add_tail(lh, sc->lists + (list)); \
+ } \
+\
+ if (lock == LIST_LOCKED) \
+ spin_unlock_irq(l); \
+}
+
+LIST_ADD(endio, LIST_ENDIO)
+LIST_ADD(io, LIST_IO)
+LIST_ADD(lru, LIST_LRU)
+#undef LIST_ADD
+
+#define POP(list) \
+ do { \
+ if (list_empty(sc->lists + list)) \
+ stripe = NULL; \
+ else { \
+ stripe = list_first_entry(&sc->lists[list], \
+ struct stripe, \
+ lists[list]); \
+ list_del_init(&stripe->lists[list]); \
+ } \
+ } while (0);
+
+/* Pop an available stripe off the lru list. */
+static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+ spinlock_t *lock = sc->locks + LOCK_LRU;
+
+ spin_lock_irq(lock);
+ POP(LIST_LRU);
+ spin_unlock_irq(lock);
+
+ if (stripe)
+ /* Remove from hash before reuse. */
+ stripe_hash_del(stripe, LIST_UNLOCKED);
+
+ return stripe;
+}
+
+static inline unsigned hash_fn(struct stripe_hash *hash, sector_t key)
+{
+ return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
+}
+
+static inline struct list_head *
+hash_bucket(struct stripe_hash *hash, sector_t key)
+{
+ return hash->hash + hash_fn(hash, key);
+}
+
+/* Insert an entry into a hash. */
+static inline void hash_insert(struct stripe_hash *hash, struct stripe *stripe)
+{
+ list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
+}
+
+/* Insert an entry into the stripe hash. */
+static inline void
+sc_insert(struct stripe_cache *sc, struct stripe *stripe)
+{
+ hash_insert(&sc->hash, stripe);
+}
+
+/* Lookup an entry in the stripe hash. */
+static inline struct stripe *
+stripe_lookup(struct stripe_cache *sc, sector_t key)
+{
+ unsigned c = 0;
+ struct stripe *stripe;
+ struct list_head *bucket = hash_bucket(&sc->hash, key);
+
+ list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
+ /* REMOVEME: statisics. */
+ if (++c > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
+ atomic_set(RS(sc)->stats + S_MAX_LOOKUP, c);
+
+ if (stripe->key == key)
+ return stripe;
+ }
+
+ return NULL;
+}
+
+/* Resize the stripe cache hash on size changes. */
+static int hash_resize(struct stripe_cache *sc)
+{
+ /* Resize threshold reached? */
+ if (atomic_read(&sc->stripes) > 2 * atomic_read(&sc->stripes_last)
+ || atomic_read(&sc->stripes) < atomic_read(&sc->stripes_last) / 4) {
+ int r;
+ struct stripe_hash hash, hash_tmp;
+ spinlock_t *lock;
+
+ r = hash_init(&hash, atomic_read(&sc->stripes));
+ if (r)
+ return r;
+
+ lock = sc->locks + LOCK_LRU;
+ spin_lock_irq(lock);
+ if (sc->hash.hash) {
+ unsigned b = sc->hash.buckets;
+ struct list_head *pos, *tmp;
+
+ /* Walk old buckets and insert into new. */
+ while (b--) {
+ list_for_each_safe(pos, tmp, sc->hash.hash + b)
+ hash_insert(&hash,
+ list_entry(pos, struct stripe,
+ lists[LIST_HASH]));
+ }
+
+ }
+
+ memcpy(&hash_tmp, &sc->hash, sizeof(hash_tmp));
+ memcpy(&sc->hash, &hash, sizeof(sc->hash));
+ atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
+ spin_unlock_irq(lock);
+
+ hash_exit(&hash_tmp);
+ }
+
+ return 0;
+}
+
+/*
+ * Stripe cache locking functions
+ */
+/* Dummy lock function for local RAID4+5. */
+static void *no_lock(sector_t key, enum dm_lock_type type)
+{
+ return &no_lock;
+}
+
+/* Dummy unlock function for local RAID4+5. */
+static void no_unlock(void *lock_handle)
+{
+}
+
+/* No locking (for local RAID 4+5). */
+static struct dm_raid45_locking_type locking_none = {
+ .lock = no_lock,
+ .unlock = no_unlock,
+};
+
+/* Clustered RAID 4+5. */
+/* FIXME: code this. */
+static struct dm_raid45_locking_type locking_cluster = {
+ .lock = no_lock,
+ .unlock = no_unlock,
+};
+
+/* Lock a stripe (for clustering). */
+static int
+stripe_lock(struct raid_set *rs, struct stripe *stripe, int rw, sector_t key)
+{
+ stripe->lock = rs->locking->lock(key, rw == READ ? DM_RAID45_SHARED :
+ DM_RAID45_EX);
+ return stripe->lock ? 0 : -EPERM;
+}
+
+/* Unlock a stripe (for clustering). */
+static void stripe_unlock(struct raid_set *rs, struct stripe *stripe)
+{
+ rs->locking->unlock(stripe->lock);
+ stripe->lock = NULL;
+}
+
+/*
+ * Stripe cache functions.
+ */
+/*
+ * Invalidate all page lists pages of a stripe.
+ *
+ * I only keep state for the whole list in the first page.
+ */
+static INLINE void
+stripe_pages_invalidate(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ ProhibitPageIO(page);
+ ClearPageChecked(page);
+ ClearPageDirty(page);
+ ClearPageError(page);
+ __clear_page_locked(page);
+ ClearPagePrivate(page);
+ ClearPageUptodate(page);
+ }
+}
+
+/* Prepare stripe for (re)use. */
+static INLINE void stripe_invalidate(struct stripe *stripe)
+{
+ stripe->io.flags = 0;
+ stripe_pages_invalidate(stripe);
+}
+
+/* Allow io on all chunks of a stripe. */
+static INLINE void stripe_allow_io(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ AllowPageIO(PAGE(stripe, p));
+}
+
+/* Initialize a stripe. */
+static void
+stripe_init(struct stripe_cache *sc, struct stripe *stripe)
+{
+ unsigned p = RS(sc)->set.raid_devs;
+ unsigned i;
+
+ /* Work all io chunks. */
+ while (p--) {
+ struct stripe_set *ss = stripe->ss + p;
+
+ stripe->obj[p].private = ss;
+ ss->stripe = stripe;
+
+ i = ARRAY_SIZE(ss->bl);
+ while (i--)
+ bio_list_init(ss->bl + i);
+ }
+
+ stripe->sc = sc;
+
+ i = ARRAY_SIZE(stripe->lists);
+ while (i--)
+ INIT_LIST_HEAD(stripe->lists + i);
+
+ atomic_set(&stripe->cnt, 0);
+ atomic_set(&stripe->io.pending, 0);
+
+ stripe_invalidate(stripe);
+}
+
+/* Number of pages per chunk. */
+static inline unsigned chunk_pages(unsigned io_size)
+{
+ return dm_div_up(io_size, SECTORS_PER_PAGE);
+}
+
+/* Number of pages per stripe. */
+static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
+{
+ return chunk_pages(io_size) * rs->set.raid_devs;
+}
+
+/* Initialize part of page_list (recovery). */
+static INLINE void stripe_zero_pl_part(struct stripe *stripe, unsigned p,
+ unsigned start, unsigned count)
+{
+ unsigned pages = chunk_pages(count);
+ /* Get offset into the page_list. */
+ struct page_list *pl = pl_elem(PL(stripe, p), start / SECTORS_PER_PAGE);
+
+ BUG_ON(!pl);
+ while (pl && pages--) {
+ BUG_ON(!pl->page);
+ memset(page_address(pl->page), 0, PAGE_SIZE);
+ pl = pl->next;
+ }
+}
+
+/* Initialize parity chunk of stripe. */
+static INLINE void stripe_zero_chunk(struct stripe *stripe, unsigned p)
+{
+ stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
+}
+
+/* Return dynamic stripe structure size. */
+static INLINE size_t stripe_size(struct raid_set *rs)
+{
+ return sizeof(struct stripe) +
+ rs->set.raid_devs * sizeof(struct stripe_set);
+}
+
+/* Allocate a stripe and its memory object. */
+/* XXX adjust to cope with stripe cache and recovery stripe caches. */
+enum grow { SC_GROW, SC_KEEP };
+static struct stripe *stripe_alloc(struct stripe_cache *sc,
+ struct dm_mem_cache_client *mc,
+ enum grow grow)
+{
+ int r;
+ struct stripe *stripe;
+
+ stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
+ if (stripe) {
+ /* Grow the dm-mem-cache by one object. */
+ if (grow == SC_GROW) {
+ r = dm_mem_cache_grow(mc, 1);
+ if (r)
+ goto err_free;
+ }
+
+ stripe->obj = dm_mem_cache_alloc(mc);
+ if (!stripe->obj)
+ goto err_shrink;
+
+ stripe_init(sc, stripe);
+ }
+
+ return stripe;
+
+err_shrink:
+ if (grow == SC_GROW)
+ dm_mem_cache_shrink(mc, 1);
+err_free:
+ kmem_cache_free(sc->kc.cache, stripe);
+ return NULL;
+}
+
+/*
+ * Free a stripes memory object, shrink the
+ * memory cache and free the stripe itself
+ */
+static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
+{
+ dm_mem_cache_free(mc, stripe->obj);
+ dm_mem_cache_shrink(mc, 1);
+ kmem_cache_free(stripe->sc->kc.cache, stripe);
+}
+
+/* Free the recovery stripe. */
+static void stripe_recover_free(struct raid_set *rs)
+{
+ struct recover *rec = &rs->recover;
+ struct list_head *stripes = &rec->stripes;
+
+ while (!list_empty(stripes)) {
+ struct stripe *stripe = list_first_entry(stripes, struct stripe,
+ lists[LIST_RECOVER]);
+ list_del(stripe->lists + LIST_RECOVER);
+ stripe_free(stripe, rec->mem_cache_client);
+ }
+}
+
+/* Push a stripe safely onto the endio list to be handled by do_endios(). */
+static INLINE void stripe_endio_push(struct stripe *stripe)
+{
+ int wake;
+ unsigned long flags;
+ struct stripe_cache *sc = stripe->sc;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ spin_lock_irqsave(lock, flags);
+ wake = list_empty(sc->lists + LIST_ENDIO);
+ stripe_endio_add(stripe, POS_HEAD, LIST_UNLOCKED);
+ spin_unlock_irqrestore(lock, flags);
+
+ if (wake)
+ wake_do_raid(RS(sc));
+}
+
+/* Protected check for stripe cache endio list empty. */
+static INLINE int stripe_endio_empty(struct stripe_cache *sc)
+{
+ int r;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ spin_lock_irq(lock);
+ r = list_empty(sc->lists + LIST_ENDIO);
+ spin_unlock_irq(lock);
+
+ return r;
+}
+
+/* Pop a stripe off safely off the endio list. */
+static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ /* This runs in parallel with endio(). */
+ spin_lock_irq(lock);
+ POP(LIST_ENDIO)
+ spin_unlock_irq(lock);
+ return stripe;
+}
+
+#undef POP
+
+/* Evict stripe from cache. */
+static void stripe_evict(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ stripe_hash_del(stripe, LIST_UNLOCKED); /* Take off hash. */
+
+ if (list_empty(stripe->lists + LIST_LRU)) {
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ atomic_inc(rs->stats + S_EVICT); /* REMOVEME: statistics. */
+ }
+}
+
+/* Grow stripe cache. */
+static int
+sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
+{
+ int r = 0;
+ struct raid_set *rs = RS(sc);
+
+ /* Try to allocate this many (additional) stripes. */
+ while (stripes--) {
+ struct stripe *stripe =
+ stripe_alloc(sc, sc->mem_cache_client, grow);
+
+ if (likely(stripe)) {
+ stripe->io.size = rs->set.io_size;
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ atomic_inc(&sc->stripes);
+ } else {
+ r = -ENOMEM;
+ break;
+ }
+ }
+
+ ClearRSScBusy(rs);
+ return r ? r : hash_resize(sc);
+}
+
+/* Shrink stripe cache. */
+static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
+{
+ int r = 0;
+
+ /* Try to get unused stripe from LRU list. */
+ while (stripes--) {
+ struct stripe *stripe;
+
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ /* An lru stripe may never have ios pending! */
+ BUG_ON(stripe_io(stripe));
+ stripe_free(stripe, sc->mem_cache_client);
+ atomic_dec(&sc->stripes);
+ } else {
+ r = -ENOENT;
+ break;
+ }
+ }
+
+ /* Check if stats are still sane. */
+ if (atomic_read(&sc->max_active_stripes) >
+ atomic_read(&sc->stripes))
+ atomic_set(&sc->max_active_stripes, 0);
+
+ if (r)
+ return r;
+
+ ClearRSScBusy(RS(sc));
+ return hash_resize(sc);
+}
+
+/* Create stripe cache. */
+static int sc_init(struct raid_set *rs, unsigned stripes)
+{
+ unsigned i, nr;
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+ struct recover *rec = &rs->recover;
+
+ /* Initialize lists and locks. */
+ i = ARRAY_SIZE(sc->lists);
+ while (i--)
+ INIT_LIST_HEAD(sc->lists + i);
+
+ i = NR_LOCKS;
+ while (i--)
+ spin_lock_init(sc->locks + i);
+
+ /* Initialize atomic variables. */
+ atomic_set(&sc->stripes, 0);
+ atomic_set(&sc->stripes_last, 0);
+ atomic_set(&sc->stripes_to_shrink, 0);
+ atomic_set(&sc->active_stripes, 0);
+ atomic_set(&sc->max_active_stripes, 0); /* REMOVEME: statistics. */
+
+ /*
+ * We need a runtime unique # to suffix the kmem cache name
+ * because we'll have one for each active RAID set.
+ */
+ nr = atomic_inc_return(&_stripe_sc_nr);
+ sprintf(sc->kc.name, "%s_%d", TARGET, nr);
+ sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
+ 0, 0, NULL);
+ if (!sc->kc.cache)
+ return -ENOMEM;
+
+ /* Create memory cache client context for RAID stripe cache. */
+ sc->mem_cache_client =
+ dm_mem_cache_client_create(stripes, rs->set.raid_devs,
+ chunk_pages(rs->set.io_size));
+ if (IS_ERR(sc->mem_cache_client))
+ return PTR_ERR(sc->mem_cache_client);
+
+ /* Create memory cache client context for RAID recovery stripe(s). */
+ rec->mem_cache_client =
+ dm_mem_cache_client_create(MAX_RECOVER, rs->set.raid_devs,
+ chunk_pages(rec->io_size));
+ if (IS_ERR(rec->mem_cache_client))
+ return PTR_ERR(rec->mem_cache_client);
+
+ /* Allocate stripe for set recovery. */
+ /* XXX: cope with MAX_RECOVERY. */
+ INIT_LIST_HEAD(&rec->stripes);
+ for (i = 0; i < MAX_RECOVER; i++) {
+ stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
+ if (!stripe)
+ return -ENOMEM;
+
+ SetStripeRecover(stripe);
+ stripe->io.size = rec->io_size;
+ list_add(stripe->lists + LIST_RECOVER, &rec->stripes);
+ }
+
+ /*
+ * Allocate the stripe objetcs from the
+ * cache and add them to the LRU list.
+ */
+ return sc_grow(sc, stripes, SC_KEEP);
+}
+
+/* Destroy the stripe cache. */
+static void sc_exit(struct stripe_cache *sc)
+{
+ if (sc->kc.cache) {
+ BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
+ kmem_cache_destroy(sc->kc.cache);
+ }
+
+ if (sc->mem_cache_client)
+ dm_mem_cache_client_destroy(sc->mem_cache_client);
+
+ ClearRSRecover(RS(sc));
+ stripe_recover_free(RS(sc));
+ if (RS(sc)->recover.mem_cache_client)
+ dm_mem_cache_client_destroy(RS(sc)->recover.mem_cache_client);
+
+ hash_exit(&sc->hash);
+}
+
+/*
+ * Calculate RAID address
+ *
+ * Delivers tuple with the index of the data disk holding the chunk
+ * in the set, the parity disks index and the start of the stripe
+ * within the address space of the set (used as the stripe cache hash key).
+ */
+/* thx MD. */
+static struct address *
+raid_address(struct raid_set *rs, sector_t sector, struct address *addr)
+{
+ unsigned data_devs = rs->set.data_devs, di, pi,
+ raid_devs = rs->set.raid_devs;
+ sector_t stripe, tmp;
+
+ /*
+ * chunk_number = sector / chunk_size
+ * stripe = chunk_number / data_devs
+ * di = stripe % data_devs;
+ */
+ stripe = sector >> rs->set.chunk_shift;
+ di = sector_div(stripe, data_devs);
+
+ switch (rs->set.raid_type->level) {
+ case raid5:
+ tmp = stripe;
+ pi = sector_div(tmp, raid_devs);
+
+ switch (rs->set.raid_type->algorithm) {
+ case left_asym: /* Left asymmetric. */
+ pi = data_devs - pi;
+ case right_asym: /* Right asymmetric. */
+ if (di >= pi)
+ di++;
+ break;
+
+ case left_sym: /* Left symmetric. */
+ pi = data_devs - pi;
+ case right_sym: /* Right symmetric. */
+ di = (pi + di + 1) % raid_devs;
+ break;
+
+ default:
+ DMERR("Unknown RAID algorithm %d",
+ rs->set.raid_type->algorithm);
+ goto out;
+ }
+
+ break;
+
+ case raid4:
+ pi = rs->set.pi;
+ if (di >= pi)
+ di++;
+ break;
+
+ default:
+ DMERR("Unknown RAID level %d", rs->set.raid_type->level);
+ goto out;
+ }
+
+ /*
+ * Hash key = start offset on any single device of the RAID set;
+ * adjusted in case io size differs from chunk size.
+ */
+ addr->key = (stripe << rs->set.chunk_shift) +
+ (sector & rs->set.io_shift_mask);
+ addr->di = di;
+ addr->pi = pi;
+
+out:
+ return addr;
+}
+
+/*
+ * Copy data across between stripe pages and bio vectors.
+ *
+ * Pay attention to data alignment in stripe and bio pages.
+ */
+static void
+bio_copy_page_list(int rw, struct stripe *stripe,
+ struct page_list *pl, struct bio *bio)
+{
+ unsigned i, page_offset;
+ void *page_addr;
+ struct raid_set *rs = RS(stripe->sc);
+ struct bio_vec *bv;
+
+ /* Get start page in page list for this sector. */
+ i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
+ pl = pl_elem(pl, i);
+
+ page_addr = page_address(pl->page);
+ page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
+
+ /* Walk all segments and copy data across between bio_vecs and pages. */
+ bio_for_each_segment(bv, bio, i) {
+ int len = bv->bv_len, size;
+ unsigned bio_offset = 0;
+ void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
+redo:
+ size = (page_offset + len > PAGE_SIZE) ?
+ PAGE_SIZE - page_offset : len;
+
+ if (rw == READ)
+ memcpy(bio_addr + bio_offset,
+ page_addr + page_offset, size);
+ else
+ memcpy(page_addr + page_offset,
+ bio_addr + bio_offset, size);
+
+ page_offset += size;
+ if (page_offset == PAGE_SIZE) {
+ /*
+ * We reached the end of the chunk page ->
+ * need refer to the next one to copy more data.
+ */
+ len -= size;
+ if (len) {
+ /* Get next page. */
+ pl = pl->next;
+ BUG_ON(!pl);
+ page_addr = page_address(pl->page);
+ page_offset = 0;
+ bio_offset += size;
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
+ goto redo;
+ }
+ }
+
+ __bio_kunmap_atomic(bio_addr, KM_USER0);
+ }
+}
+
+/*
+ * Xor optimization macros.
+ */
+/* Xor data pointer declaration and initialization macros. */
+#define DECLARE_2 unsigned long *d0 = data[0], *d1 = data[1]
+#define DECLARE_3 DECLARE_2, *d2 = data[2]
+#define DECLARE_4 DECLARE_3, *d3 = data[3]
+#define DECLARE_5 DECLARE_4, *d4 = data[4]
+#define DECLARE_6 DECLARE_5, *d5 = data[5]
+#define DECLARE_7 DECLARE_6, *d6 = data[6]
+#define DECLARE_8 DECLARE_7, *d7 = data[7]
+
+/* Xor unrole macros. */
+#define D2(n) d0[n] = d0[n] ^ d1[n]
+#define D3(n) D2(n) ^ d2[n]
+#define D4(n) D3(n) ^ d3[n]
+#define D5(n) D4(n) ^ d4[n]
+#define D6(n) D5(n) ^ d5[n]
+#define D7(n) D6(n) ^ d6[n]
+#define D8(n) D7(n) ^ d7[n]
+
+#define X_2(macro, offset) macro(offset); macro(offset + 1);
+#define X_4(macro, offset) X_2(macro, offset); X_2(macro, offset + 2);
+#define X_8(macro, offset) X_4(macro, offset); X_4(macro, offset + 4);
+#define X_16(macro, offset) X_8(macro, offset); X_8(macro, offset + 8);
+#define X_32(macro, offset) X_16(macro, offset); X_16(macro, offset + 16);
+#define X_64(macro, offset) X_32(macro, offset); X_32(macro, offset + 32);
+
+/* Define a _xor_#chunks_#xors_per_run() function. */
+#define _XOR(chunks, xors_per_run) \
+static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
+{ \
+ unsigned end = XOR_SIZE / sizeof(data[0]), i; \
+ DECLARE_ ## chunks; \
+\
+ for (i = 0; i < end; i += xors_per_run) { \
+ X_ ## xors_per_run(D ## chunks, i); \
+ } \
+}
+
+/* Define xor functions for 2 - 8 chunks. */
+#define MAKE_XOR_PER_RUN(xors_per_run) \
+ _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
+ _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
+ _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
+ _XOR(8, xors_per_run);
+
+MAKE_XOR_PER_RUN(8) /* Define _xor_*_8() functions. */
+MAKE_XOR_PER_RUN(16) /* Define _xor_*_16() functions. */
+MAKE_XOR_PER_RUN(32) /* Define _xor_*_32() functions. */
+MAKE_XOR_PER_RUN(64) /* Define _xor_*_64() functions. */
+
+#define MAKE_XOR(xors_per_run) \
+struct { \
+ void (*f)(unsigned long **); \
+} static xor_funcs ## xors_per_run[] = { \
+ { NULL }, \
+ { NULL }, \
+ { _xor2_ ## xors_per_run }, \
+ { _xor3_ ## xors_per_run }, \
+ { _xor4_ ## xors_per_run }, \
+ { _xor5_ ## xors_per_run }, \
+ { _xor6_ ## xors_per_run }, \
+ { _xor7_ ## xors_per_run }, \
+ { _xor8_ ## xors_per_run }, \
+}; \
+\
+static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
+{ \
+ /* Call respective function for amount of chunks. */ \
+ xor_funcs ## xors_per_run[n].f(data); \
+}
+
+/* Define xor_8() - xor_64 functions. */
+MAKE_XOR(8)
+MAKE_XOR(16)
+MAKE_XOR(32)
+MAKE_XOR(64)
+
+/* Maximum number of chunks, which can be xor'ed in one go. */
+#define XOR_CHUNKS_MAX (ARRAY_SIZE(xor_funcs8) - 1)
+
+struct xor_func {
+ xor_function_t f;
+ const char *name;
+} static xor_funcs[] = {
+ {xor_8, "xor_8"},
+ {xor_16, "xor_16"},
+ {xor_32, "xor_32"},
+ {xor_64, "xor_64"},
+};
+
+/*
+ * Calculate crc.
+ *
+ * This indexes into the page list of the stripe.
+ *
+ * All chunks will be xored into the parity chunk
+ * in maximum groups of xor.chunks.
+ *
+ * FIXME: try mapping the pages on discontiguous memory.
+ */
+static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned max_chunks = rs->xor.chunks, n, p;
+ unsigned o = sector / SECTORS_PER_PAGE; /* Offset into the page_list. */
+ unsigned long **d = rs->data;
+ xor_function_t xor_f = rs->xor.f->f;
+
+ /* Address of parity page to xor into. */
+ d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
+
+ /* Preset pointers to data pages. */
+ for (n = 1, p = rs->set.raid_devs; p--; ) {
+ if (p != pi && PageIO(PAGE(stripe, p)))
+ d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
+
+ /* If max chunks -> xor .*/
+ if (n == max_chunks) {
+ xor_f(n, d);
+ n = 1;
+ }
+ }
+
+ /* If chunks -> xor. */
+ if (n > 1)
+ xor_f(n, d);
+
+ /* Set parity page uptodate and clean. */
+ page_set(PAGE(stripe, pi), CLEAN);
+}
+
+/* Common xor loop through all stripe page lists. */
+static void common_xor(struct stripe *stripe, sector_t count,
+ unsigned off, unsigned p)
+{
+ unsigned sector;
+
+ for (sector = off; sector < count; sector += SECTORS_PER_XOR)
+ xor(stripe, p, sector);
+
+ atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
+}
+
+/*
+ * Calculate parity sectors on intact stripes.
+ *
+ * Need to calculate raid address for recover stripe, because its
+ * chunk sizes differs and is typically larger than io chunk size.
+ */
+static void parity_xor(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned chunk_size = rs->set.chunk_size,
+ io_size = stripe->io.size,
+ xor_size = chunk_size > io_size ? io_size : chunk_size;
+ sector_t off;
+
+ /* This can be the recover stripe with a larger io size. */
+ for (off = 0; off < io_size; off += xor_size) {
+ unsigned pi;
+
+ /*
+ * Recover stripe likely is bigger than regular io
+ * ones and has no precalculated parity disk index ->
+ * need to calculate RAID address.
+ */
+ if (unlikely(StripeRecover(stripe))) {
+ struct address addr;
+
+ raid_address(rs,
+ (stripe->key + off) * rs->set.data_devs,
+ &addr);
+ pi = addr.pi;
+ stripe_zero_pl_part(stripe, pi, off,
+ rs->set.chunk_size);
+ } else
+ pi = stripe->idx.parity;
+
+ common_xor(stripe, xor_size, off, pi);
+ page_set(PAGE(stripe, pi), DIRTY);
+ }
+}
+
+/* Reconstruct missing chunk. */
+static void reconstruct_xor(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int p = stripe->idx.recover;
+
+ BUG_ON(p < 0);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (raid_set_degraded(rs) ?
+ S_RECONSTRUCT_EI : S_RECONSTRUCT_DEV));
+
+ /* Zero chunk to be reconstructed. */
+ stripe_zero_chunk(stripe, p);
+ common_xor(stripe, stripe->io.size, 0, p);
+}
+
+/*
+ * Try getting a stripe either from the hash or from the lru list
+ */
+static inline void _stripe_get(struct stripe *stripe)
+{
+ atomic_inc(&stripe->cnt);
+}
+
+static struct stripe *stripe_get(struct raid_set *rs, struct address *addr)
+{
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+
+ stripe = stripe_lookup(sc, addr->key);
+ if (stripe) {
+ _stripe_get(stripe);
+ /* Remove from the lru list if on. */
+ stripe_lru_del(stripe, LIST_LOCKED);
+ atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
+ } else {
+ /* Second try to get an LRU stripe. */
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ _stripe_get(stripe);
+ /* Invalidate before reinserting with changed key. */
+ stripe_invalidate(stripe);
+ stripe->key = addr->key;
+ stripe->region = dm_rh_sector_to_region(rs->recover.rh,
+ addr->key);
+ stripe->idx.parity = addr->pi;
+ sc_insert(sc, stripe);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_INSCACHE);
+ }
+ }
+
+ return stripe;
+}
+
+/*
+ * Decrement reference count on a stripe.
+ *
+ * Move it to list of LRU stripes if zero.
+ */
+static void stripe_put(struct stripe *stripe)
+{
+ if (atomic_dec_and_test(&stripe->cnt)) {
+ if (TestClearStripeActive(stripe))
+ atomic_dec(&stripe->sc->active_stripes);
+
+ /* Put stripe onto the LRU list. */
+ stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ }
+
+ BUG_ON(atomic_read(&stripe->cnt) < 0);
+}
+
+/*
+ * Process end io
+ *
+ * I need to do it here because I can't in interrupt
+ *
+ * Read and write functions are split in order to avoid
+ * conditionals in the main loop for performamce reasons.
+ */
+
+/* Helper read bios on a page list. */
+static void _bio_copy_page_list(struct stripe *stripe, struct page_list *pl,
+ struct bio *bio)
+{
+ bio_copy_page_list(READ, stripe, pl, bio);
+}
+
+/* Helper write bios on a page list. */
+static void _rh_dec(struct stripe *stripe, struct page_list *pl,
+ struct bio *bio)
+{
+ dm_rh_dec(RS(stripe->sc)->recover.rh, stripe->region);
+}
+
+/* End io all bios on a page list. */
+static inline int
+page_list_endio(int rw, struct stripe *stripe, unsigned p, unsigned *count)
+{
+ int r = 0;
+ struct bio_list *bl = BL(stripe, p, rw);
+
+ if (!bio_list_empty(bl)) {
+ struct page_list *pl = PL(stripe, p);
+ struct page *page = pl->page;
+
+ if (PageLocked(page))
+ r = -EBUSY;
+ /*
+ * FIXME: PageUptodate() not cleared
+ * properly for missing chunks ?
+ */
+ else if (PageUptodate(page)) {
+ struct bio *bio;
+ struct raid_set *rs = RS(stripe->sc);
+ void (*h_f)(struct stripe *, struct page_list *,
+ struct bio *) =
+ (rw == READ) ? _bio_copy_page_list : _rh_dec;
+
+ while ((bio = bio_list_pop(bl))) {
+ h_f(stripe, pl, bio);
+ _bio_endio(rs, bio, 0);
+ stripe_put(stripe);
+ if (count)
+ (*count)++;
+ }
+ } else
+ r = -EAGAIN;
+ }
+
+ return r;
+}
+
+/*
+ * End io all reads/writes on a stripe copying
+ * read date accross from stripe to bios.
+ */
+static int stripe_endio(int rw, struct stripe *stripe, unsigned *count)
+{
+ int r = 0;
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ int rr = page_list_endio(rw, stripe, p, count);
+
+ if (rr && r != -EIO)
+ r = rr;
+ }
+
+ return r;
+}
+
+/* Fail all ios on a bio list and return # of bios. */
+static unsigned
+bio_list_fail(struct raid_set *rs, struct stripe *stripe, struct bio_list *bl)
+{
+ unsigned r;
+ struct bio *bio;
+
+ raid_set_dead(rs);
+
+ /* Update region counters. */
+ if (stripe) {
+ struct dm_region_hash *rh = rs->recover.rh;
+
+ bio_list_for_each(bio, bl) {
+ if (bio_data_dir(bio) == WRITE)
+ dm_rh_dec(rh, stripe->region);
+ }
+ }
+
+ /* Error end io all bios. */
+ for (r = 0; (bio = bio_list_pop(bl)); r++)
+ _bio_endio(rs, bio, -EIO);
+
+ return r;
+}
+
+/* Fail all ios of a bio list of a stripe and drop io pending count. */
+static void
+stripe_bio_list_fail(struct raid_set *rs, struct stripe *stripe,
+ struct bio_list *bl)
+{
+ unsigned put = bio_list_fail(rs, stripe, bl);
+
+ while (put--)
+ stripe_put(stripe);
+}
+
+/* Fail all ios hanging off all bio lists of a stripe. */
+static void stripe_fail_io(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p = rs->set.raid_devs;
+
+ stripe_evict(stripe);
+
+ while (p--) {
+ struct stripe_set *ss = stripe->ss + p;
+ int i = ARRAY_SIZE(ss->bl);
+
+ while (i--)
+ stripe_bio_list_fail(rs, stripe, ss->bl + i);
+ }
+}
+
+/*
+ * Handle all stripes by handing them to the daemon, because we can't
+ * map their pages to copy the data in interrupt context.
+ *
+ * We don't want to handle them here either, while interrupts are disabled.
+ */
+
+/* Read/write endio function for dm-io (interrupt context). */
+static void endio(unsigned long error, void *context)
+{
+ struct dm_mem_cache_object *obj = context;
+ struct stripe_set *ss = obj->private;
+ struct stripe *stripe = ss->stripe;
+ struct page *page = obj->pl->page;
+
+ if (unlikely(error))
+ stripe_error(stripe, page);
+ else
+ page_set(page, CLEAN);
+
+ __clear_page_locked(page);
+ stripe_io_dec(stripe);
+
+ /* Add stripe to endio list and wake daemon. */
+ stripe_endio_push(stripe);
+}
+
+/*
+ * Recovery io throttling
+ */
+/* Conditionally reset io counters. */
+enum count_type { IO_WORK = 0, IO_RECOVER };
+static int recover_io_reset(struct raid_set *rs)
+{
+ unsigned long j = jiffies;
+
+ /* Pay attention to jiffies overflows. */
+ if (j > rs->recover.last_jiffies + HZ
+ || j < rs->recover.last_jiffies) {
+ rs->recover.last_jiffies = j;
+ atomic_set(rs->recover.io_count + IO_WORK, 0);
+ atomic_set(rs->recover.io_count + IO_RECOVER, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Count ios. */
+static INLINE void
+recover_io_count(struct raid_set *rs, struct stripe *stripe)
+{
+ if (RSRecover(rs)) {
+ recover_io_reset(rs);
+ atomic_inc(rs->recover.io_count +
+ (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
+ }
+}
+
+/* Read/Write a page_list asynchronously. */
+static void page_list_rw(struct stripe *stripe, unsigned p)
+{
+ struct stripe_cache *sc = stripe->sc;
+ struct raid_set *rs = RS(sc);
+ struct dm_mem_cache_object *obj = stripe->obj + p;
+ struct page_list *pl = obj->pl;
+ struct page *page = pl->page;
+ struct raid_dev *dev = rs->dev + p;
+ struct dm_io_region io = {
+ .bdev = dev->dev->bdev,
+ .sector = stripe->key,
+ .count = stripe->io.size,
+ };
+ struct dm_io_request control = {
+ .bi_rw = PageDirty(page) ? WRITE : READ,
+ .mem.type = DM_IO_PAGE_LIST,
+ .mem.ptr.pl = pl,
+ .mem.offset = 0,
+ .notify.fn = endio,
+ .notify.context = obj,
+ .client = sc->dm_io_client,
+ };
+
+ BUG_ON(PageLocked(page));
+
+ /*
+ * Don't rw past end of device, which can happen, because
+ * typically sectors_per_dev isn't divisable by io_size.
+ */
+ if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
+ io.count = rs->set.sectors_per_dev - io.sector;
+
+ io.sector += dev->start; /* Add <offset>. */
+ recover_io_count(rs, stripe); /* Recovery io accounting. */
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats +
+ (PageDirty(page) ? S_DM_IO_WRITE : S_DM_IO_READ));
+
+ ClearPageError(page);
+ __set_page_locked(page);
+ io_dev_queued(dev);
+ BUG_ON(dm_io(&control, 1, &io, NULL));
+}
+
+/*
+ * Write dirty / read not uptodate page lists of a stripe.
+ */
+static unsigned stripe_page_lists_rw(struct raid_set *rs, struct stripe *stripe)
+{
+ unsigned r;
+
+ /*
+ * Increment the pending count on the stripe
+ * first, so that we don't race in endio().
+ *
+ * An inc (IO) is needed for any page:
+ *
+ * o not uptodate
+ * o dirtied by writes merged
+ * o dirtied by parity calculations
+ */
+ r = for_each_io_dev(rs, stripe, _stripe_io_inc);
+ if (r) {
+ /* io needed: chunks are not uptodate/dirty. */
+ int max; /* REMOVEME: */
+ struct stripe_cache *sc = &rs->sc;
+
+ if (!TestSetStripeActive(stripe))
+ atomic_inc(&sc->active_stripes);
+
+ /* Take off the lru list in case it got added there. */
+ stripe_lru_del(stripe, LIST_LOCKED);
+
+ /* Submit actual io. */
+ for_each_io_dev(rs, stripe, page_list_rw);
+
+ /* REMOVEME: statistics */
+ max = sc_active(sc);
+ if (atomic_read(&sc->max_active_stripes) < max)
+ atomic_set(&sc->max_active_stripes, max);
+
+ atomic_inc(rs->stats + S_FLUSHS);
+ /* END REMOVEME: statistics */
+ }
+
+ return r;
+}
+
+/* Work in all pending writes. */
+static INLINE void _writes_merge(struct stripe *stripe, unsigned p)
+{
+ struct bio_list *write = BL(stripe, p, WRITE);
+
+ if (!bio_list_empty(write)) {
+ struct page_list *pl = stripe->obj[p].pl;
+ struct bio *bio;
+ struct bio_list *write_merged = BL(stripe, p, WRITE_MERGED);
+
+ /*
+ * We can play with the lists without holding a lock,
+ * because it is just us accessing them anyway.
+ */
+ bio_list_for_each(bio, write)
+ bio_copy_page_list(WRITE, stripe, pl, bio);
+
+ bio_list_merge(write_merged, write);
+ bio_list_init(write);
+ page_set(pl->page, DIRTY);
+ }
+}
+
+/* Merge in all writes hence dirtying respective pages. */
+static INLINE void writes_merge(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ _writes_merge(stripe, p);
+}
+
+/* Check, if a chunk gets completely overwritten. */
+static INLINE int stripe_check_overwrite(struct stripe *stripe, unsigned p)
+{
+ unsigned sectors = 0;
+ struct bio *bio;
+ struct bio_list *bl = BL(stripe, p, WRITE);
+
+ bio_list_for_each(bio, bl)
+ sectors += bio_sectors(bio);
+
+ return sectors == RS(stripe->sc)->set.io_size;
+}
+
+/*
+ * Prepare stripe to avoid io on broken/reconstructed
+ * drive in order to reconstruct date on endio.
+ */
+enum prepare_type { IO_ALLOW, IO_PROHIBIT };
+static void stripe_prepare(struct stripe *stripe, unsigned p,
+ enum prepare_type type)
+{
+ struct page *page = PAGE(stripe, p);
+
+ switch (type) {
+ case IO_PROHIBIT:
+ /*
+ * In case we prohibit, we gotta make sure, that
+ * io on all other chunks than the one which failed
+ * or is being reconstructed is allowed and that it
+ * doesn't have state uptodate.
+ */
+ stripe_allow_io(stripe);
+ ClearPageUptodate(page);
+ ProhibitPageIO(page);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats + S_PROHIBITPAGEIO);
+ stripe->idx.recover = p;
+ SetStripeReconstruct(stripe);
+ break;
+
+ case IO_ALLOW:
+ AllowPageIO(page);
+ stripe->idx.recover = -1;
+ ClearStripeReconstruct(stripe);
+ break;
+
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Degraded/reconstruction mode.
+ *
+ * Check stripe state to figure which chunks don't need IO.
+ */
+static INLINE void stripe_check_reconstruct(struct stripe *stripe,
+ int prohibited)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ /*
+ * Degraded mode (device(s) failed) ->
+ * avoid io on the failed device.
+ */
+ if (unlikely(raid_set_degraded(rs))) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_DEGRADED);
+ stripe_prepare(stripe, rs->set.ei, IO_PROHIBIT);
+ return;
+ } else {
+ /*
+ * Reconstruction mode (ie. a particular device or
+ * some (rotating) parity chunk is being resynchronized) ->
+ * o make sure all needed pages are read in
+ * o writes are allowed to go through
+ */
+ int r = region_state(rs, stripe->key, DM_RH_NOSYNC);
+
+ if (r) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NOSYNC);
+ stripe_prepare(stripe, dev_for_parity(stripe),
+ IO_PROHIBIT);
+ return;
+ }
+ }
+
+ /*
+ * All disks good. Avoid reading parity chunk and reconstruct it
+ * unless we have prohibited io to chunk(s).
+ */
+ if (!prohibited) {
+ if (StripeMerged(stripe))
+ stripe_prepare(stripe, stripe->idx.parity, IO_ALLOW);
+ else {
+ stripe_prepare(stripe, stripe->idx.parity, IO_PROHIBIT);
+
+ /*
+ * Overrule stripe_prepare to reconstruct the
+ * parity chunk, because it'll be created new anyway.
+ */
+ ClearStripeReconstruct(stripe);
+ }
+ }
+}
+
+/* Check, if stripe is ready to merge writes. */
+static INLINE int stripe_check_merge(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int prohibited = 0;
+ unsigned chunks = 0, p = rs->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ /* Can't merge active chunks. */
+ if (PageLocked(page)) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_MERGE_PAGE_LOCKED);
+ break;
+ }
+
+ /* Can merge uptodate chunks and have to count parity chunk. */
+ if (PageUptodate(page) || p == stripe->idx.parity) {
+ chunks++;
+ continue;
+ }
+
+ /* Read before write ordering. */
+ if (RSCheckOverwrite(rs) &&
+ bio_list_empty(BL(stripe, p, READ))) {
+ int r = stripe_check_overwrite(stripe, p);
+
+ if (r) {
+ chunks++;
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats +
+ S_PROHIBITPAGEIO);
+ ProhibitPageIO(page);
+ prohibited = 1;
+ }
+ }
+ }
+
+ if (chunks == rs->set.raid_devs) {
+ /* All pages are uptodate or get written over or mixture. */
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_CAN_MERGE);
+ return 0;
+ } else
+ /* REMOVEME: statistics.*/
+ atomic_inc(rs->stats + S_CANT_MERGE);
+
+ return prohibited ? 1 : -EPERM;
+}
+
+/* Check, if stripe is ready to merge writes. */
+static INLINE int stripe_check_read(struct stripe *stripe)
+{
+ int r = 0;
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct page *page = PAGE(stripe, p);
+
+ if (!PageLocked(page) &&
+ bio_list_empty(BL(stripe, p, READ))) {
+ ProhibitPageIO(page);
+ r = 1;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Read/write a stripe.
+ *
+ * All stripe read/write activity goes through this function.
+ *
+ * States to cover:
+ * o stripe to read and/or write
+ * o stripe with error to reconstruct
+ */
+static int stripe_rw(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int prohibited = 0, r;
+
+ /*
+ * Check the state of the RAID set and if degraded (or
+ * resynchronizing for reads), read in all other chunks but
+ * the one on the dead/resynchronizing device in order to be
+ * able to reconstruct the missing one.
+ *
+ * Merge all writes hanging off uptodate pages of the stripe.
+ */
+
+ /* Initially allow io on all chunks and prohibit below, if necessary. */
+ stripe_allow_io(stripe);
+
+ if (StripeRBW(stripe)) {
+ r = stripe_check_merge(stripe);
+ if (!r) {
+ /*
+ * If I could rely on valid parity (which would only
+ * be sure in case of a full synchronization),
+ * I could xor a fraction of chunks out of
+ * parity and back in.
+ *
+ * For the time being, I got to redo parity...
+ */
+ /* parity_xor(stripe); */ /* Xor chunks out. */
+ stripe_zero_chunk(stripe, stripe->idx.parity);
+ writes_merge(stripe); /* Merge writes in. */
+ parity_xor(stripe); /* Update parity. */
+ ClearStripeRBW(stripe); /* Disable RBW. */
+ SetStripeMerged(stripe); /* Writes merged. */
+ }
+
+ if (r > 0)
+ prohibited = 1;
+ } else if (!raid_set_degraded(rs))
+ /* Only allow for read avoidance if not degraded. */
+ prohibited = stripe_check_read(stripe);
+
+ /*
+ * Check, if io needs to be allowed/prohibeted on certain chunks
+ * because of a degraded set or reconstruction on a region.
+ */
+ stripe_check_reconstruct(stripe, prohibited);
+
+ /* Now submit any reads/writes. */
+ r = stripe_page_lists_rw(rs, stripe);
+ if (!r) {
+ /*
+ * No io submitted because of chunk io prohibited or
+ * locked pages -> push to end io list for processing.
+ */
+ atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
+ stripe_endio_push(stripe);
+ wake_do_raid(rs); /* Wake myself. */
+ }
+
+ return 0;
+}
+
+/* Flush stripe either via flush list or imeediately. */
+enum flush_type { FLUSH_DELAY, FLUSH_NOW };
+static int stripe_flush(struct stripe *stripe, enum flush_type type)
+{
+ int r = 0;
+
+ stripe_lru_del(stripe, LIST_LOCKED);
+
+ /* Immediately flush. */
+ if (type == FLUSH_NOW) {
+ if (likely(raid_set_operational(RS(stripe->sc))))
+ r = stripe_rw(stripe); /* Read/write stripe. */
+ else
+ /* Optimization: Fail early on failed sets. */
+ stripe_fail_io(stripe);
+ /* Delay flush by putting it on io list for later processing. */
+ } else if (type == FLUSH_DELAY)
+ stripe_io_add(stripe, POS_TAIL, LIST_UNLOCKED);
+ else
+ BUG();
+
+ return r;
+}
+
+/*
+ * Queue reads and writes to a stripe by hanging
+ * their bios off the stripsets read/write lists.
+ *
+ * Endio reads on uptodate chunks.
+ */
+static INLINE int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
+ struct bio_list *reject)
+{
+ int r = 0;
+ struct address addr;
+ struct stripe *stripe =
+ stripe_get(rs, raid_address(rs, bio->bi_sector, &addr));
+
+ if (stripe) {
+ int rr, rw = bio_data_dir(bio);
+
+ rr = stripe_lock(rs, stripe, rw, addr.key); /* Lock stripe */
+ if (rr) {
+ stripe_put(stripe);
+ goto out;
+ }
+
+ /* Distinguish read and write cases. */
+ bio_list_add(BL(stripe, addr.di, rw), bio);
+
+ /* REMOVEME: statistics */
+ atomic_inc(rs->stats + (rw == WRITE ?
+ S_BIOS_ADDED_WRITE : S_BIOS_ADDED_READ));
+
+ if (rw == READ)
+ SetStripeRead(stripe);
+ else {
+ SetStripeRBW(stripe);
+
+ /* Inrement pending write count on region. */
+ dm_rh_inc(rs->recover.rh, stripe->region);
+ r = 1; /* Region hash needs a flush. */
+ }
+
+ /*
+ * Optimize stripe flushing:
+ *
+ * o directly start io for read stripes.
+ *
+ * o put stripe onto stripe caches io_list for RBW,
+ * so that do_flush() can belabour it after we put
+ * more bios to the stripe for overwrite optimization.
+ */
+ stripe_flush(stripe,
+ StripeRead(stripe) ? FLUSH_NOW : FLUSH_DELAY);
+
+ /* Got no stripe from cache -> reject bio. */
+ } else {
+out:
+ bio_list_add(reject, bio);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_IOS_POST);
+ }
+
+ return r;
+}
+
+/*
+ * Recovery functions
+ */
+/* Read a stripe off a raid set for recovery. */
+static int recover_read(struct raid_set *rs, struct stripe *stripe, int idx)
+{
+ /* Invalidate all pages so that they get read in. */
+ stripe_pages_invalidate(stripe);
+
+ /* Allow io on all recovery chunks. */
+ stripe_allow_io(stripe);
+
+ if (idx > -1)
+ ProhibitPageIO(PAGE(stripe, idx));
+
+ stripe->key = rs->recover.pos;
+ return stripe_page_lists_rw(rs, stripe);
+}
+
+/* Write a stripe to a raid set for recovery. */
+static int recover_write(struct raid_set *rs, struct stripe *stripe, int idx)
+{
+ /*
+ * If this is a reconstruct of a particular device, then
+ * reconstruct the respective page(s), else create parity page(s).
+ */
+ if (idx > -1) {
+ struct page *page = PAGE(stripe, idx);
+
+ AllowPageIO(page);
+ stripe_zero_chunk(stripe, idx);
+ common_xor(stripe, stripe->io.size, 0, idx);
+ page_set(page, DIRTY);
+ } else
+ parity_xor(stripe);
+
+ return stripe_page_lists_rw(rs, stripe);
+}
+
+/* Recover bandwidth available ?. */
+static int recover_bandwidth(struct raid_set *rs)
+{
+ int r, work;
+
+ /* On reset -> allow recovery. */
+ r = recover_io_reset(rs);
+ if (r || RSBandwidth(rs))
+ goto out;
+
+ work = atomic_read(rs->recover.io_count + IO_WORK);
+ if (work) {
+ /* Pay attention to larger recover stripe size. */
+ int recover =
+ atomic_read(rs->recover.io_count + IO_RECOVER) *
+ rs->recover.io_size /
+ rs->set.io_size;
+
+ /*
+ * Don't use more than given bandwidth of
+ * the work io for recovery.
+ */
+ if (recover > work / rs->recover.bandwidth_work) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NO_BANDWIDTH);
+ return 0;
+ }
+ }
+
+out:
+ atomic_inc(rs->stats + S_BANDWIDTH); /* REMOVEME: statistics. */
+ return 1;
+}
+
+/* Try to get a region to recover. */
+static int recover_get_region(struct raid_set *rs)
+{
+ struct recover *rec = &rs->recover;
+ struct dm_region_hash *rh = rec->rh;
+
+ /* Start quiescing some regions. */
+ if (!RSRegionGet(rs)) {
+ int r = recover_bandwidth(rs); /* Enough bandwidth ?. */
+
+ if (r) {
+ r = dm_rh_recovery_prepare(rh);
+ if (r < 0) {
+ DMINFO("No %sregions to recover",
+ rec->nr_regions_to_recover ?
+ "more " : "");
+ return -ENOENT;
+ }
+ } else
+ return -EAGAIN;
+
+ SetRSRegionGet(rs);
+ }
+
+ if (!rec->reg) {
+ rec->reg = dm_rh_recovery_start(rh);
+ if (rec->reg) {
+ /*
+ * A reference for the the region I'll
+ * keep till I've completely synced it.
+ */
+ io_get(rs);
+ rec->pos = dm_rh_region_to_sector(rh,
+ dm_rh_get_region_key(rec->reg));
+ rec->end = rec->pos + dm_rh_get_region_size(rh);
+ return 1;
+ } else
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Read/write a recovery stripe. */
+static INLINE int recover_stripe_rw(struct raid_set *rs, struct stripe *stripe)
+{
+ /* Read/write flip-flop. */
+ if (TestClearStripeRBW(stripe)) {
+ SetStripeRead(stripe);
+ return recover_read(rs, stripe, idx_get(rs));
+ } else if (TestClearStripeRead(stripe))
+ return recover_write(rs, stripe, idx_get(rs));
+
+ return 0;
+}
+
+/* Reset recovery variables. */
+static void recovery_region_reset(struct raid_set *rs)
+{
+ rs->recover.reg = NULL;
+ ClearRSRegionGet(rs);
+}
+
+/* Update region hash state. */
+static void recover_rh_update(struct raid_set *rs, int error)
+{
+ struct recover *rec = &rs->recover;
+ struct dm_region *reg = rec->reg;
+
+ if (reg) {
+ dm_rh_recovery_end(reg, error);
+ if (!error)
+ rec->nr_regions_recovered++;
+
+ recovery_region_reset(rs);
+ }
+
+ dm_rh_update_states(reg->rh, 1);
+ dm_rh_flush(reg->rh);
+ io_put(rs); /* Release the io reference for the region. */
+}
+
+/* Called by main io daemon to recover regions. */
+/* FIXME: cope with MAX_RECOVER > 1. */
+static INLINE void _do_recovery(struct raid_set *rs, struct stripe *stripe)
+{
+ int r;
+ struct recover *rec = &rs->recover;
+
+ /* If recovery is active -> return. */
+ if (StripeActive(stripe))
+ return;
+
+ /* io error is fatal for recovery -> stop it. */
+ if (unlikely(StripeError(stripe)))
+ goto err;
+
+ /* Get a region to recover. */
+ r = recover_get_region(rs);
+ switch (r) {
+ case 1: /* Got a new region. */
+ /* Flag read before write. */
+ ClearStripeRead(stripe);
+ SetStripeRBW(stripe);
+ break;
+
+ case 0:
+ /* Got a region in the works. */
+ r = recover_bandwidth(rs);
+ if (r) /* Got enough bandwidth. */
+ break;
+
+ case -EAGAIN:
+ /* No bandwidth/quiesced region yet, try later. */
+ wake_do_raid_delayed(rs, HZ / 10);
+ return;
+
+ case -ENOENT: /* No more regions. */
+ dm_table_event(rs->ti->table);
+ goto free;
+ }
+
+ /* Read/write a recover stripe. */
+ r = recover_stripe_rw(rs, stripe);
+ if (r) {
+ /* IO initiated, get another reference for the IO. */
+ io_get(rs);
+ return;
+ }
+
+ /* Update recovery position within region. */
+ rec->pos += stripe->io.size;
+
+ /* If we're at end of region, update region hash. */
+ if (rec->pos >= rec->end ||
+ rec->pos >= rs->set.sectors_per_dev)
+ recover_rh_update(rs, 0);
+ else
+ SetStripeRBW(stripe);
+
+ /* Schedule myself for another round... */
+ wake_do_raid(rs);
+ return;
+
+err:
+ raid_set_check_degrade(rs, stripe);
+
+ {
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("stopping recovery due to "
+ "ERROR on /dev/%s, stripe at offset %llu",
+ bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
+ (unsigned long long) stripe->key);
+
+ }
+
+ /* Make sure, that all quiesced regions get released. */
+ do {
+ if (rec->reg)
+ dm_rh_recovery_end(rec->reg, -EIO);
+
+ rec->reg = dm_rh_recovery_start(rec->rh);
+ } while (rec->reg);
+
+ recover_rh_update(rs, -EIO);
+free:
+ rs->set.dev_to_init = -1;
+
+ /* Check for jiffies overrun. */
+ rs->recover.end_jiffies = jiffies;
+ if (rs->recover.end_jiffies < rs->recover.start_jiffies)
+ rs->recover.end_jiffies = ~0;
+
+ ClearRSRecover(rs);
+}
+
+static INLINE void do_recovery(struct raid_set *rs)
+{
+ struct stripe *stripe;
+
+ list_for_each_entry(stripe, &rs->recover.stripes, lists[LIST_RECOVER])
+ _do_recovery(rs, stripe);
+
+ if (!RSRecover(rs))
+ stripe_recover_free(rs);
+}
+
+/*
+ * END recovery functions
+ */
+
+/* End io process all stripes handed in by endio() callback. */
+static void do_endios(struct raid_set *rs)
+{
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+
+ while ((stripe = stripe_endio_pop(sc))) {
+ unsigned count;
+
+ /* Recovery stripe special case. */
+ if (unlikely(StripeRecover(stripe))) {
+ if (stripe_io(stripe))
+ continue;
+
+ io_put(rs); /* Release region io reference. */
+ ClearStripeActive(stripe);
+
+ /* REMOVEME: statistics*/
+ atomic_dec(&sc->active_stripes);
+ continue;
+ }
+
+ /* Early end io all reads on any uptodate chunks. */
+ stripe_endio(READ, stripe, (count = 0, &count));
+ if (stripe_io(stripe)) {
+ if (count) /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_ACTIVE_READS);
+
+ continue;
+ }
+
+ /* Set stripe inactive after all io got processed. */
+ if (TestClearStripeActive(stripe))
+ atomic_dec(&sc->active_stripes);
+
+ /* Unlock stripe (for clustering). */
+ stripe_unlock(rs, stripe);
+
+ /*
+ * If an io error on a stripe occured and the RAID set
+ * is still operational, requeue the stripe for io.
+ */
+ if (TestClearStripeError(stripe)) {
+ raid_set_check_degrade(rs, stripe);
+ ClearStripeReconstruct(stripe);
+
+ if (!StripeMerged(stripe) &&
+ raid_set_operational(rs)) {
+ stripe_pages_invalidate(stripe);
+ stripe_flush(stripe, FLUSH_DELAY);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_REQUEUE);
+ continue;
+ }
+ }
+
+ /* Check if the RAID set is inoperational to error ios. */
+ if (!raid_set_operational(rs)) {
+ ClearStripeReconstruct(stripe);
+ stripe_fail_io(stripe);
+ BUG_ON(atomic_read(&stripe->cnt));
+ continue;
+ }
+
+ /* Got to reconstruct a missing chunk. */
+ if (TestClearStripeReconstruct(stripe))
+ reconstruct_xor(stripe);
+
+ /*
+ * Now that we've got a complete stripe, we can
+ * process the rest of the end ios on reads.
+ */
+ BUG_ON(stripe_endio(READ, stripe, NULL));
+ ClearStripeRead(stripe);
+
+ /*
+ * Read-before-write stripes need to be flushed again in
+ * order to work the write data into the pages *after*
+ * they were read in.
+ */
+ if (TestClearStripeMerged(stripe))
+ /* End io all bios which got merged already. */
+ BUG_ON(stripe_endio(WRITE_MERGED, stripe, NULL));
+
+ /* Got to put on flush list because of new writes. */
+ if (StripeRBW(stripe))
+ stripe_flush(stripe, FLUSH_DELAY);
+ }
+}
+
+/*
+ * Stripe cache shrinking.
+ */
+static INLINE void do_sc_shrink(struct raid_set *rs)
+{
+ unsigned shrink = atomic_read(&rs->sc.stripes_to_shrink);
+
+ if (shrink) {
+ unsigned cur = atomic_read(&rs->sc.stripes);
+
+ sc_shrink(&rs->sc, shrink);
+ shrink -= cur - atomic_read(&rs->sc.stripes);
+ atomic_set(&rs->sc.stripes_to_shrink, shrink);
+
+ /*
+ * Wake myself up in case we failed to shrink the
+ * requested amount in order to try again later.
+ */
+ if (shrink)
+ wake_do_raid(rs);
+ }
+}
+
+
+/*
+ * Process all ios
+ *
+ * We do different things with the io depending on the
+ * state of the region that it's in:
+ *
+ * o reads: hang off stripe cache or postpone if full
+ *
+ * o writes:
+ *
+ * CLEAN/DIRTY/NOSYNC: increment pending and hang io off stripe's stripe set.
+ * In case stripe cache is full or busy, postpone the io.
+ *
+ * RECOVERING: delay the io until recovery of the region completes.
+ *
+ */
+static INLINE void do_ios(struct raid_set *rs, struct bio_list *ios)
+{
+ int r;
+ unsigned flush = 0;
+ struct dm_region_hash *rh = rs->recover.rh;
+ struct bio *bio;
+ struct bio_list delay, reject;
+
+ bio_list_init(&delay);
+ bio_list_init(&reject);
+
+ /*
+ * Classify each io:
+ * o delay to recovering regions
+ * o queue to all other regions
+ */
+ while ((bio = bio_list_pop(ios))) {
+ /*
+ * In case we get a barrier bio, push it back onto
+ * the input queue unless all work queues are empty
+ * and the stripe cache is inactive.
+ */
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BARRIER);
+ if (!list_empty(rs->sc.lists + LIST_IO) ||
+ !bio_list_empty(&delay) ||
+ !bio_list_empty(&reject) ||
+ sc_active(&rs->sc)) {
+ bio_list_push(ios, bio);
+ break;
+ }
+ }
+
+ r = region_state(rs, _sector(rs, bio), DM_RH_RECOVERING);
+ if (unlikely(r)) {
+ /* Got to wait for recovering regions. */
+ bio_list_add(&delay, bio);
+ SetRSBandwidth(rs);
+ } else {
+ /*
+ * Process ios to non-recovering regions by queueing
+ * them to stripes (does rh_inc()) for writes).
+ */
+ flush += stripe_queue_bio(rs, bio, &reject);
+ }
+ }
+
+ if (flush) {
+ r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
+ if (r)
+ DMERR("dirty log flush");
+ }
+
+ /* Delay ios to regions which are recovering. */
+ while ((bio = bio_list_pop(&delay))) {
+ /* REMOVEME: statistics.*/
+ atomic_inc(rs->stats + S_DELAYED_BIOS);
+ atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
+ dm_rh_delay(rh, bio);
+
+ }
+
+ /* Merge any rejected bios back to the head of the input list. */
+ bio_list_merge_head(ios, &reject);
+}
+
+/* Flush any stripes on the io list. */
+static INLINE void do_flush(struct raid_set *rs)
+{
+ struct list_head *list = rs->sc.lists + LIST_IO, *pos, *tmp;
+
+ list_for_each_safe(pos, tmp, list) {
+ int r = stripe_flush(list_entry(pos, struct stripe,
+ lists[LIST_IO]), FLUSH_NOW);
+
+ /* Remove from the list only if the stripe got processed. */
+ if (!r)
+ list_del_init(pos);
+ }
+}
+
+/* Send an event in case we're getting too busy. */
+static INLINE void do_busy_event(struct raid_set *rs)
+{
+ if ((sc_active(&rs->sc) > atomic_read(&rs->sc.stripes) * 4 / 5)) {
+ if (!TestSetRSScBusy(rs))
+ dm_table_event(rs->ti->table);
+ } else
+ ClearRSScBusy(rs);
+}
+
+/* Unplug: let the io role on the sets devices. */
+static INLINE void do_unplug(struct raid_set *rs)
+{
+ struct raid_dev *dev = rs->dev + rs->set.raid_devs;
+
+ while (dev-- > rs->dev) {
+ /* Only call any device unplug function, if io got queued. */
+ if (io_dev_clear(dev))
+ blk_unplug(bdev_get_queue(dev->dev->bdev));
+ }
+}
+
+/*-----------------------------------------------------------------
+ * RAID daemon
+ *---------------------------------------------------------------*/
+/*
+ * o belabour all end ios
+ * o optionally shrink the stripe cache
+ * o update the region hash states
+ * o optionally do recovery
+ * o grab the input queue
+ * o work an all requeued or new ios and perform stripe cache flushs
+ * unless the RAID set is inoperational (when we error ios)
+ * o check, if the stripe cache gets too busy and throw an event if so
+ * o unplug any component raid devices with queued bios
+ */
+static void do_raid(struct work_struct *ws)
+{
+ struct raid_set *rs = container_of(ws, struct raid_set, io.dws.work);
+ struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
+ spinlock_t *lock = &rs->io.in_lock;
+
+ /*
+ * We always need to end io, so that ios
+ * can get errored in case the set failed
+ * and the region counters get decremented
+ * before we update the region hash states.
+ */
+redo:
+ do_endios(rs);
+
+ /*
+ * Now that we've end io'd, which may have put stripes on
+ * the LRU list, we shrink the stripe cache if requested.
+ */
+ do_sc_shrink(rs);
+
+ /* Update region hash states before we go any further. */
+ dm_rh_update_states(rs->recover.rh, 1);
+
+ /* Try to recover regions. */
+ if (RSRecover(rs))
+ do_recovery(rs);
+
+ /* More endios -> process. */
+ if (!stripe_endio_empty(&rs->sc)) {
+ atomic_inc(rs->stats + S_REDO);
+ goto redo;
+ }
+
+ /* Quickly grab all new ios queued and add them to the work list. */
+ spin_lock_irq(lock);
+ bio_list_merge(ios, ios_in);
+ bio_list_init(ios_in);
+ spin_unlock_irq(lock);
+
+ /* Let's assume we're operational most of the time ;-). */
+ if (likely(raid_set_operational(rs))) {
+ /* If we got ios, work them into the cache. */
+ if (!bio_list_empty(ios)) {
+ do_ios(rs, ios);
+ do_unplug(rs); /* Unplug the sets device queues. */
+ }
+
+ do_flush(rs); /* Flush any stripes on io list. */
+ do_unplug(rs); /* Unplug the sets device queues. */
+ do_busy_event(rs); /* Check if we got too busy. */
+
+ /* More endios -> process. */
+ if (!stripe_endio_empty(&rs->sc)) {
+ atomic_inc(rs->stats + S_REDO);
+ goto redo;
+ }
+ } else
+ /* No way to reconstruct data with too many devices failed. */
+ bio_list_fail(rs, NULL, ios);
+}
+
+/*
+ * Callback for region hash to dispatch
+ * delayed bios queued to recovered regions
+ * (Gets called via rh_update_states()).
+ */
+static void dispatch_delayed_bios(void *context, struct bio_list *bl)
+{
+ struct raid_set *rs = context;
+ struct bio *bio;
+
+ /* REMOVEME: decrement pending delayed bios counter. */
+ bio_list_for_each(bio, bl)
+ atomic_dec(rs->stats + S_DELAYED_BIOS);
+
+ /* Merge region hash private list to work list. */
+ bio_list_merge_head(&rs->io.work, bl);
+ bio_list_init(bl);
+ ClearRSBandwidth(rs);
+}
+
+/*************************************************************
+ * Constructor helpers
+ *************************************************************/
+/* Calculate MB/sec. */
+static INLINE unsigned mbpers(struct raid_set *rs, unsigned speed)
+{
+ return to_bytes(speed * rs->set.data_devs *
+ rs->recover.io_size * HZ >> 10) >> 10;
+}
+
+/*
+ * Discover fastest xor algorithm and # of chunks combination.
+ */
+/* Calculate speed for algorithm and # of chunks. */
+static INLINE unsigned xor_speed(struct stripe *stripe)
+{
+ unsigned r = 0;
+ unsigned long j;
+
+ /* Wait for next tick. */
+ for (j = jiffies; j == jiffies;)
+ ;
+
+ /* Do xors for a full tick. */
+ for (j = jiffies; j == jiffies;) {
+ mb();
+ common_xor(stripe, stripe->io.size, 0, 0);
+ mb();
+ r++;
+ mb();
+ }
+
+ return r;
+}
+
+/* Optimize xor algorithm for this RAID set. */
+static unsigned xor_optimize(struct raid_set *rs)
+{
+ unsigned chunks_max = 2, speed_max = 0;
+ struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
+ struct stripe *stripe;
+
+ BUG_ON(list_empty(&rs->recover.stripes));
+ stripe = list_first_entry(&rs->recover.stripes, struct stripe,
+ lists[LIST_RECOVER]);
+
+ /*
+ * Got to allow io on all chunks, so that
+ * xor() will actually work on them.
+ */
+ stripe_allow_io(stripe);
+
+ /* Try all xor functions. */
+ while (f-- > xor_funcs) {
+ unsigned speed;
+
+ /* Set actual xor function for common_xor(). */
+ rs->xor.f = f;
+ rs->xor.chunks = XOR_CHUNKS_MAX + 1;
+
+ while (rs->xor.chunks-- > 2) {
+ speed = xor_speed(stripe);
+ if (speed > speed_max) {
+ speed_max = speed;
+ chunks_max = rs->xor.chunks;
+ f_max = f;
+ }
+ }
+ }
+
+ /* Memorize optimum parameters. */
+ rs->xor.f = f_max;
+ rs->xor.chunks = chunks_max;
+ return speed_max;
+}
+
+static inline int array_too_big(unsigned long fixed, unsigned long obj,
+ unsigned long num)
+{
+ return (num > (ULONG_MAX - fixed) / obj);
+}
+
+static void wakeup_all_recovery_waiters(void *context)
+{
+}
+
+/*
+ * Allocate a RAID context (a RAID set)
+ */
+static int
+context_alloc(struct raid_set **raid_set, struct raid_type *raid_type,
+ unsigned stripes, unsigned chunk_size, unsigned io_size,
+ unsigned recover_io_size, unsigned raid_devs,
+ sector_t sectors_per_dev,
+ struct dm_target *ti, unsigned dl_parms, char **argv)
+{
+ int r;
+ unsigned p;
+ size_t len;
+ sector_t region_size, ti_len;
+ struct raid_set *rs = NULL;
+ struct dm_dirty_log *dl;
+ struct recover *rec;
+
+ /*
+ * Create the dirty log
+ *
+ * We need to change length for the dirty log constructor,
+ * because we want an amount of regions for all stripes derived
+ * from the single device size, so that we can keep region
+ * size = 2^^n independant of the number of devices
+ */
+ ti_len = ti->len;
+ ti->len = sectors_per_dev;
+ dl = dm_dirty_log_create(argv[0], ti, NULL, dl_parms, argv + 2);
+ ti->len = ti_len;
+ if (!dl)
+ goto bad_dirty_log;
+
+ /* Chunk size *must* be smaller than region size. */
+ region_size = dl->type->get_region_size(dl);
+ if (chunk_size > region_size)
+ goto bad_chunk_size;
+
+ /* Recover io size *must* be smaller than region size as well. */
+ if (recover_io_size > region_size)
+ goto bad_recover_io_size;
+
+ /* Size and allocate the RAID set structure. */
+ len = sizeof(*rs->data) + sizeof(*rs->dev);
+ if (array_too_big(sizeof(*rs), len, raid_devs))
+ goto bad_array;
+
+ len = sizeof(*rs) + raid_devs * len;
+ rs = kzalloc(len, GFP_KERNEL);
+ if (!rs)
+ goto bad_alloc;
+
+ rec = &rs->recover;
+ atomic_set(&rs->io.in_process, 0);
+ atomic_set(&rs->io.in_process_max, 0);
+ rec->io_size = recover_io_size;
+
+ /* Pointer to data array. */
+ rs->data = (unsigned long **)
+ ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
+ rec->dl = dl;
+ rs->set.raid_devs = p = raid_devs;
+ rs->set.data_devs = raid_devs - raid_type->parity_devs;
+ rs->set.raid_type = raid_type;
+
+ /*
+ * Set chunk and io size and respective shifts
+ * (used to avoid divisions)
+ */
+ rs->set.chunk_size = chunk_size;
+ rs->set.chunk_mask = chunk_size - 1;
+ rs->set.chunk_shift = ffs(chunk_size) - 1;
+
+ rs->set.io_size = io_size;
+ rs->set.io_mask = io_size - 1;
+ rs->set.io_shift = ffs(io_size) - 1;
+ rs->set.io_shift_mask = rs->set.chunk_mask & ~rs->set.io_mask;
+
+ rs->set.pages_per_io = chunk_pages(io_size);
+ rs->set.sectors_per_dev = sectors_per_dev;
+
+ rs->set.ei = -1; /* Indicate no failed device. */
+ atomic_set(&rs->set.failed_devs, 0);
+
+ rs->ti = ti;
+
+ atomic_set(rec->io_count + IO_WORK, 0);
+ atomic_set(rec->io_count + IO_RECOVER, 0);
+
+ /* Initialize io lock and queues. */
+ spin_lock_init(&rs->io.in_lock);
+ bio_list_init(&rs->io.in);
+ bio_list_init(&rs->io.work);
+
+ init_waitqueue_head(&rs->io.suspendq); /* Suspend waiters (dm-io). */
+
+ rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
+
+ rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios, wake_do_raid,
+ wakeup_all_recovery_waiters,
+ rs->ti->begin, MAX_RECOVER, dl,
+ region_size, rs->recover.nr_regions);
+ if (IS_ERR(rec->rh))
+ goto bad_rh;
+
+ /* Initialize stripe cache. */
+ r = sc_init(rs, stripes);
+ if (r)
+ goto bad_sc;
+
+ /* Create dm-io client context. */
+ rs->sc.dm_io_client = dm_io_client_create(rs->set.raid_devs *
+ rs->set.pages_per_io);
+ if (IS_ERR(rs->sc.dm_io_client))
+ goto bad_dm_io_client;
+
+ /* REMOVEME: statistics. */
+ stats_reset(rs);
+ ClearRSDevelStats(rs); /* Disnable development status. */
+
+ *raid_set = rs;
+ return 0;
+
+bad_dirty_log:
+ TI_ERR_RET("Error creating dirty log", -ENOMEM);
+
+
+bad_chunk_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Chunk size larger than region size");
+
+bad_recover_io_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Recover stripe io size larger than region size");
+
+bad_array:
+ dm_dirty_log_destroy(dl);
+ TI_ERR("Arry too big");
+
+bad_alloc:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Cannot allocate raid context", -ENOMEM);
+
+bad_rh:
+ dm_dirty_log_destroy(dl);
+ ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
+ goto free_rs;
+
+bad_sc:
+ ti->error = DM_MSG_PREFIX "Error creating stripe cache";
+ goto free;
+
+bad_dm_io_client:
+ ti->error = DM_MSG_PREFIX "Error allocating dm-io resources";
+free:
+ dm_region_hash_destroy(rec->rh);
+ sc_exit(&rs->sc);
+ dm_region_hash_destroy(rec->rh); /* Destroys dirty log as well. */
+free_rs:
+ kfree(rs);
+ return -ENOMEM;
+}
+
+/* Free a RAID context (a RAID set). */
+static void
+context_free(struct raid_set *rs, struct dm_target *ti, unsigned r)
+{
+ while (r--)
+ dm_put_device(ti, rs->dev[r].dev);
+
+ dm_io_client_destroy(rs->sc.dm_io_client);
+ sc_exit(&rs->sc);
+ dm_region_hash_destroy(rs->recover.rh);
+ dm_dirty_log_destroy(rs->recover.dl);
+ kfree(rs);
+}
+
+/* Create work queue and initialize work. */
+static int rs_workqueue_init(struct raid_set *rs)
+{
+ struct dm_target *ti = rs->ti;
+
+ rs->io.wq = create_singlethread_workqueue(DAEMON);
+ if (!rs->io.wq)
+ TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
+
+ INIT_DELAYED_WORK(&rs->io.dws, do_raid);
+ return 0;
+}
+
+/* Return pointer to raid_type structure for raid name. */
+static struct raid_type *get_raid_type(char *name)
+{
+ struct raid_type *r = ARRAY_END(raid_types);
+
+ while (r-- > raid_types) {
+ if (!strnicmp(STR_LEN(r->name, name)))
+ return r;
+ }
+
+ return NULL;
+}
+
+/* FIXME: factor out to dm core. */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+ sector_t r = a;
+
+ sector_div(r, b);
+ *n = r;
+ return a == r * b;
+}
+
+/* Log RAID set information to kernel log. */
+static void raid_set_log(struct raid_set *rs, unsigned speed)
+{
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ for (p = 0; p < rs->set.raid_devs; p++)
+ DMINFO("/dev/%s is raid disk %u",
+ bdevname(rs->dev[p].dev->bdev, buf), p);
+
+ DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes",
+ rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
+ atomic_read(&rs->sc.stripes));
+ DMINFO("algorithm \"%s\", %u chunks with %uMB/s", rs->xor.f->name,
+ rs->xor.chunks, mbpers(rs, speed));
+ DMINFO("%s set with net %u/%u devices", rs->set.raid_type->descr,
+ rs->set.data_devs, rs->set.raid_devs);
+}
+
+/* Get all devices and offsets. */
+static int
+dev_parms(struct dm_target *ti, struct raid_set *rs,
+ char **argv, int *p)
+{
+ for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
+ int r;
+ unsigned long long tmp;
+ struct raid_dev *dev = rs->dev + *p;
+ union dev_lookup dl = {.dev = dev };
+
+ /* Get offset and device. */
+ r = sscanf(argv[1], "%llu", &tmp);
+ if (r != 1)
+ TI_ERR("Invalid RAID device offset parameter");
+
+ dev->start = tmp;
+ r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+ &dev->dev);
+ if (r)
+ TI_ERR_RET("RAID device lookup failure", r);
+
+ r = raid_dev_lookup(rs, bynumber, &dl);
+ if (r != -ENODEV && r < *p) {
+ (*p)++; /* Ensure dm_put_device() on actual device. */
+ TI_ERR_RET("Duplicate RAID device", -ENXIO);
+ }
+ }
+
+ return 0;
+}
+
+/* Set recovery bandwidth. */
+static INLINE void
+recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
+{
+ rs->recover.bandwidth = bandwidth;
+ rs->recover.bandwidth_work = 100 / bandwidth;
+}
+
+/* Handle variable number of RAID parameters. */
+static int
+raid_variable_parms(struct dm_target *ti, char **argv,
+ unsigned i, int *raid_parms,
+ int *chunk_size, int *chunk_size_parm,
+ int *stripes, int *stripes_parm,
+ int *io_size, int *io_size_parm,
+ int *recover_io_size, int *recover_io_size_parm,
+ int *bandwidth, int *bandwidth_parm)
+{
+ /* Fetch # of variable raid parameters. */
+ if (sscanf(argv[i++], "%d", raid_parms) != 1 ||
+ !range_ok(*raid_parms, 0, 5))
+ TI_ERR("Bad variable raid parameters number");
+
+ if (*raid_parms) {
+ /*
+ * If we've got variable RAID parameters,
+ * chunk size is the first one
+ */
+ if (sscanf(argv[i++], "%d", chunk_size) != 1 ||
+ (*chunk_size != -1 &&
+ (!POWER_OF_2(*chunk_size) ||
+ !range_ok(*chunk_size, IO_SIZE_MIN, CHUNK_SIZE_MAX))))
+ TI_ERR("Invalid chunk size; must be 2^^n and <= 16384");
+
+ *chunk_size_parm = *chunk_size;
+ if (*chunk_size == -1)
+ *chunk_size = CHUNK_SIZE;
+
+ /*
+ * In case we've got 2 or more variable raid
+ * parameters, the number of stripes is the second one
+ */
+ if (*raid_parms > 1) {
+ if (sscanf(argv[i++], "%d", stripes) != 1 ||
+ (*stripes != -1 &&
+ !range_ok(*stripes, STRIPES_MIN,
+ STRIPES_MAX)))
+ TI_ERR("Invalid number of stripes: must "
+ "be >= 8 and <= 8192");
+ }
+
+ *stripes_parm = *stripes;
+ if (*stripes == -1)
+ *stripes = STRIPES;
+
+ /*
+ * In case we've got 3 or more variable raid
+ * parameters, the io size is the third one.
+ */
+ if (*raid_parms > 2) {
+ if (sscanf(argv[i++], "%d", io_size) != 1 ||
+ (*io_size != -1 &&
+ (!POWER_OF_2(*io_size) ||
+ !range_ok(*io_size, IO_SIZE_MIN,
+ min(BIO_MAX_SECTORS / 2,
+ *chunk_size)))))
+ TI_ERR("Invalid io size; must "
+ "be 2^^n and less equal "
+ "min(BIO_MAX_SECTORS/2, chunk size)");
+ } else
+ *io_size = *chunk_size;
+
+ *io_size_parm = *io_size;
+ if (*io_size == -1)
+ *io_size = *chunk_size;
+
+ /*
+ * In case we've got 4 variable raid parameters,
+ * the recovery stripe io_size is the fourth one
+ */
+ if (*raid_parms > 3) {
+ if (sscanf(argv[i++], "%d", recover_io_size) != 1 ||
+ (*recover_io_size != -1 &&
+ (!POWER_OF_2(*recover_io_size) ||
+ !range_ok(*recover_io_size, RECOVER_IO_SIZE_MIN,
+ BIO_MAX_SECTORS / 2))))
+ TI_ERR("Invalid recovery io size; must be "
+ "2^^n and less equal BIO_MAX_SECTORS/2");
+ }
+
+ *recover_io_size_parm = *recover_io_size;
+ if (*recover_io_size == -1)
+ *recover_io_size = RECOVER_IO_SIZE;
+
+ /*
+ * In case we've got 5 variable raid parameters,
+ * the recovery io bandwidth is the fifth one
+ */
+ if (*raid_parms > 4) {
+ if (sscanf(argv[i++], "%d", bandwidth) != 1 ||
+ (*bandwidth != -1 &&
+ !range_ok(*bandwidth, BANDWIDTH_MIN,
+ BANDWIDTH_MAX)))
+ TI_ERR("Invalid recovery bandwidth "
+ "percentage; must be > 0 and <= 100");
+ }
+
+ *bandwidth_parm = *bandwidth;
+ if (*bandwidth == -1)
+ *bandwidth = BANDWIDTH;
+ }
+
+ return 0;
+}
+
+/* Parse optional locking parameters. */
+static int
+raid_locking_parms(struct dm_target *ti, char **argv,
+ unsigned i, int *locking_parms,
+ struct dm_raid45_locking_type **locking_type)
+{
+ *locking_parms = 0;
+ *locking_type = &locking_none;
+
+ if (!strnicmp(argv[i], "none", strlen(argv[i])))
+ *locking_parms = 1;
+ else if (!strnicmp(argv[i + 1], "locking", strlen(argv[i + 1]))) {
+ *locking_type = &locking_none;
+ *locking_parms = 2;
+ } else if (!strnicmp(argv[i + 1], "cluster", strlen(argv[i + 1]))) {
+ *locking_type = &locking_cluster;
+ /* FIXME: namespace. */
+ *locking_parms = 3;
+ }
+
+ return *locking_parms == 1 ? -EINVAL : 0;
+}
+
+/* Set backing device information properties of RAID set. */
+static void rs_set_bdi(struct raid_set *rs, unsigned stripes, unsigned chunks)
+{
+ unsigned p, ra_pages;
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+
+ /* Set read-ahead for the RAID set and the component devices. */
+ bdi->ra_pages = stripes * stripe_pages(rs, rs->set.io_size);
+ ra_pages = chunks * chunk_pages(rs->set.io_size);
+ for (p = rs->set.raid_devs; p--; ) {
+ struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+
+ q->backing_dev_info.ra_pages = ra_pages;
+ }
+
+ /* Set congested function and data. */
+ bdi->congested_fn = raid_set_congested;
+ bdi->congested_data = rs;
+
+ dm_put(md);
+}
+
+/* Get backing device information properties of RAID set. */
+static void rs_get_ra(struct raid_set *rs, unsigned *stripes, unsigned *chunks)
+{
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+
+ *stripes = dm_disk(md)->queue->backing_dev_info.ra_pages
+ / stripe_pages(rs, rs->set.io_size);
+ *chunks = bdev_get_queue(rs->dev->dev->bdev)->backing_dev_info.ra_pages
+ / chunk_pages(rs->set.io_size);
+
+ dm_put(md);
+}
+
+/*
+ * Construct a RAID4/5 mapping:
+ *
+ * log_type #log_params <log_params> \
+ * raid_type [#parity_dev] #raid_variable_params <raid_params> \
+ * [locking "none"/"cluster"]
+ * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
+ *
+ * log_type = "core"/"disk",
+ * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
+ * log_params = [dirty_log_path] region_size [[no]sync])
+ *
+ * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
+ *
+ * #parity_dev = N if raid_type = "raid4"
+ * o N = -1: pick default = last device
+ * o N >= 0 and < #raid_devs: parity device index
+ *
+ * #raid_variable_params = 0-5; raid_params (-1 = default):
+ * [chunk_size [#stripes [io_size [recover_io_size [%recovery_bandwidth]]]]]
+ * o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
+ * and <= CHUNK_SIZE_MAX)
+ * o #stripes is number of stripes allocated to stripe cache
+ * (must be > 1 and < STRIPES_MAX)
+ * o io_size (io unit size per device in sectors; must be 2^^n and > 8)
+ * o recover_io_size (io unit size per device for recovery in sectors;
+ must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
+ * o %recovery_bandwith is the maximum amount spend for recovery during
+ * application io (1-100%)
+ * If raid_variable_params = 0, defaults will be used.
+ * Any raid_variable_param can be set to -1 to apply a default
+ *
+ * #raid_devs = N (N >= 3)
+ *
+ * #dev_to_initialize = N
+ * -1: initialize parity on all devices
+ * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
+ * of a failed devices content after replacement
+ *
+ * <dev_path> = device_path (eg, /dev/sdd1)
+ * <offset> = begin at offset on <dev_path>
+ *
+ */
+#define MIN_PARMS 13
+static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ int bandwidth = BANDWIDTH, bandwidth_parm = -1,
+ chunk_size = CHUNK_SIZE, chunk_size_parm = -1,
+ dev_to_init, dl_parms, locking_parms, parity_parm, pi = -1,
+ i, io_size = IO_SIZE, io_size_parm = -1,
+ r, raid_devs, raid_parms,
+ recover_io_size = RECOVER_IO_SIZE, recover_io_size_parm = -1,
+ stripes = STRIPES, stripes_parm = -1;
+ unsigned speed;
+ sector_t tmp, sectors_per_dev;
+ struct dm_raid45_locking_type *locking;
+ struct raid_set *rs;
+ struct raid_type *raid_type;
+
+ /* Ensure minimum number of parameters. */
+ if (argc < MIN_PARMS)
+ TI_ERR("Not enough parameters");
+
+ /* Fetch # of dirty log parameters. */
+ if (sscanf(argv[1], "%d", &dl_parms) != 1
+ || !range_ok(dl_parms, 1, 4711))
+ TI_ERR("Bad dirty log parameters number");
+
+ /* Check raid_type. */
+ raid_type = get_raid_type(argv[dl_parms + 2]);
+ if (!raid_type)
+ TI_ERR("Bad raid type");
+
+ /* In case of RAID4, parity drive is selectable. */
+ parity_parm = !!(raid_type->level == raid4);
+
+ /* Handle variable number of RAID parameters. */
+ r = raid_variable_parms(ti, argv, dl_parms + parity_parm + 3,
+ &raid_parms,
+ &chunk_size, &chunk_size_parm,
+ &stripes, &stripes_parm,
+ &io_size, &io_size_parm,
+ &recover_io_size, &recover_io_size_parm,
+ &bandwidth, &bandwidth_parm);
+ if (r)
+ return r;
+
+ r = raid_locking_parms(ti, argv,
+ dl_parms + parity_parm + raid_parms + 4,
+ &locking_parms, &locking);
+ if (r)
+ return r;
+
+ /* # of raid devices. */
+ i = dl_parms + parity_parm + raid_parms + locking_parms + 4;
+ if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
+ raid_devs < raid_type->minimal_devs)
+ TI_ERR("Invalid number of raid devices");
+
+ /* In case of RAID4, check parity drive index is in limits. */
+ if (raid_type->level == raid4) {
+ /* Fetch index of parity device. */
+ if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
+ !range_ok(pi, 0, raid_devs - 1))
+ TI_ERR("Invalid RAID4 parity device index");
+ }
+
+ /*
+ * Index of device to initialize starts at 0
+ *
+ * o -1 -> don't initialize a particular device,
+ * o 0..raid_devs-1 -> initialize respective device
+ * (used for reconstruction of a replaced device)
+ */
+ if (sscanf
+ (argv[dl_parms + parity_parm + raid_parms + locking_parms + 5],
+ "%d", &dev_to_init) != 1
+ || !range_ok(dev_to_init, -1, raid_devs - 1))
+ TI_ERR("Invalid number for raid device to initialize");
+
+ /* Check # of raid device arguments. */
+ if (argc - dl_parms - parity_parm - raid_parms - 6 !=
+ 2 * raid_devs)
+ TI_ERR("Wrong number of raid device/offset arguments");
+
+ /*
+ * Check that the table length is devisable
+ * w/o rest by (raid_devs - parity_devs)
+ */
+ if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
+ §ors_per_dev))
+ TI_ERR
+ ("Target length not divisable by number of data devices");
+
+ /*
+ * Check that the device size is
+ * devisable w/o rest by chunk size
+ */
+ if (!multiple(sectors_per_dev, chunk_size, &tmp))
+ TI_ERR("Device length not divisable by chunk_size");
+
+ /****************************************************************
+ * Now that we checked the constructor arguments ->
+ * let's allocate the RAID set
+ ****************************************************************/
+ r = context_alloc(&rs, raid_type, stripes, chunk_size, io_size,
+ recover_io_size, raid_devs, sectors_per_dev,
+ ti, dl_parms, argv);
+ if (r)
+ return r;
+
+ /*
+ * Set these here in order to avoid passing
+ * too many arguments to context_alloc()
+ */
+ rs->set.dev_to_init_parm = dev_to_init;
+ rs->set.dev_to_init = dev_to_init;
+ rs->set.pi_parm = pi;
+ rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
+ rs->set.raid_parms = raid_parms;
+ rs->set.chunk_size_parm = chunk_size_parm;
+ rs->set.io_size_parm = io_size_parm;
+ rs->sc.stripes_parm = stripes_parm;
+ rs->recover.io_size_parm = recover_io_size_parm;
+ rs->recover.bandwidth_parm = bandwidth_parm;
+ recover_set_bandwidth(rs, bandwidth);
+
+ /* Use locking type to lock stripe access. */
+ rs->locking = locking;
+
+ /* Get the device/offset tupels. */
+ argv += dl_parms + 6 + parity_parm + raid_parms;
+ r = dev_parms(ti, rs, argv, &i);
+ if (r)
+ goto err;
+
+ /* Initialize recovery. */
+ rs->recover.start_jiffies = jiffies;
+ rs->recover.end_jiffies = 0;
+ recovery_region_reset(rs);
+
+ /* Allow for recovery of any nosync regions. */
+ SetRSRecover(rs);
+
+ /* Set backing device information (eg. read ahead). */
+ rs_set_bdi(rs, chunk_size * 2, io_size * 4);
+ SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
+
+ speed = xor_optimize(rs); /* Select best xor algorithm. */
+
+ /* Initialize work queue to handle this RAID set's io. */
+ r = rs_workqueue_init(rs);
+ if (r)
+ goto err;
+
+ raid_set_log(rs, speed); /* Log information about RAID set. */
+
+ /*
+ * Make sure that dm core only hands maximum io size
+ * length down and pays attention to io boundaries.
+ */
+ ti->split_io = rs->set.io_size;
+ ti->private = rs;
+ return 0;
+
+err:
+ context_free(rs, ti, i);
+ return r;
+}
+
+/*
+ * Destruct a raid mapping
+ */
+static void raid_dtr(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+
+ /* Indicate recovery end so that ios in flight drain. */
+ ClearRSRecover(rs);
+
+ wake_do_raid(rs); /* Wake daemon. */
+ wait_ios(rs); /* Wait for any io still being processed. */
+ destroy_workqueue(rs->io.wq);
+ context_free(rs, ti, rs->set.raid_devs);
+}
+
+/* Queues ios to RAID sets. */
+static inline void queue_bio(struct raid_set *rs, struct bio *bio)
+{
+ int wake;
+ struct bio_list *in = &rs->io.in;
+ spinlock_t *in_lock = &rs->io.in_lock;
+
+ spin_lock_irq(in_lock);
+ wake = bio_list_empty(in);
+ bio_list_add(in, bio);
+ spin_unlock_irq(in_lock);
+
+ /* Wake daemon if input list was empty. */
+ if (wake)
+ wake_do_raid(rs);
+}
+
+/* Raid mapping function. */
+static int raid_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ /* I don't want to waste stripe cache capacity. */
+ if (bio_rw(bio) == READA)
+ return -EIO;
+ else {
+ struct raid_set *rs = ti->private;
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats +
+ (bio_data_dir(bio) == WRITE ?
+ S_BIOS_WRITE : S_BIOS_READ));
+
+ /*
+ * Get io reference to be waiting for to drop
+ * to zero on device suspension/destruction.
+ */
+ io_get(rs);
+ bio->bi_sector -= ti->begin; /* Remap sector. */
+ queue_bio(rs, bio); /* Queue to the daemon. */
+ return DM_MAPIO_SUBMITTED; /* Handle later. */
+ }
+}
+
+/* Device suspend. */
+static void raid_postsuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct dm_dirty_log *dl = rs->recover.dl;
+
+ SetRSSuspended(rs);
+
+ if (RSRecover(rs))
+ dm_rh_stop_recovery(rs->recover.rh); /* Wakes do_raid(). */
+ else
+ wake_do_raid(rs);
+
+ wait_ios(rs); /* Wait for completion of all ios being processed. */
+ if (dl->type->postsuspend && dl->type->postsuspend(dl))
+ /* Suspend dirty log. */
+ /* FIXME: need better error handling. */
+ DMWARN("log suspend failed");
+}
+
+/* Device resume. */
+static void raid_resume(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct dm_dirty_log *dl = rec->dl;
+
+ if (dl->type->resume && dl->type->resume(dl))
+ /* Resume dirty log. */
+ /* FIXME: need better error handling. */
+ DMWARN("log resume failed");
+
+ rec->nr_regions_to_recover =
+ rec->nr_regions - dl->type->get_sync_count(dl);
+
+ ClearRSSuspended(rs);
+
+ /* Reset any unfinished recovery. */
+ if (RSRecover(rs)) {
+ recovery_region_reset(rs);
+ dm_rh_start_recovery(rec->rh);/* Calls wake_do_raid(). */
+ } else
+ wake_do_raid(rs);
+}
+
+static INLINE unsigned sc_size(struct raid_set *rs)
+{
+ return to_sector(atomic_read(&rs->sc.stripes) *
+ (sizeof(struct stripe) +
+ (sizeof(struct stripe_set) +
+ (sizeof(struct page_list) +
+ to_bytes(rs->set.io_size) *
+ rs->set.raid_devs)) +
+ (rs->recover.
+ end_jiffies ? 0 : to_bytes(rs->set.raid_devs *
+ rs->recover.
+ io_size))));
+}
+
+/* REMOVEME: status output for development. */
+static void
+raid_devel_stats(struct dm_target *ti, char *result,
+ unsigned *size, unsigned maxlen)
+{
+ unsigned chunks, stripes, sz = *size;
+ unsigned long j;
+ char buf[BDEVNAME_SIZE], *p;
+ struct stats_map *sm, *sm_end = ARRAY_END(stats_map);
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct timespec ts;
+
+ DMEMIT("%s ", version);
+ DMEMIT("io_inprocess=%d ", atomic_read(&rs->io.in_process));
+ DMEMIT("io_inprocess_max=%d ", atomic_read(&rs->io.in_process_max));
+
+ for (sm = stats_map; sm < sm_end; sm++)
+ DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
+
+ DMEMIT(" overwrite=%s ", RSCheckOverwrite(rs) ? "on" : "off");
+ DMEMIT("sc=%u/%u/%u/%u/%u ", rs->set.chunk_size, rs->set.io_size,
+ atomic_read(&rs->sc.stripes), rs->sc.hash.buckets,
+ sc_size(rs));
+
+ j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
+ rec->start_jiffies;
+ jiffies_to_timespec(j, &ts);
+ sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
+ p = strchr(buf, '.');
+ p[3] = 0;
+
+ DMEMIT("rg=%llu%s/%llu/%llu/%u %s ",
+ (unsigned long long) rec->nr_regions_recovered,
+ RSRegionGet(rs) ? "+" : "",
+ (unsigned long long) rec->nr_regions_to_recover,
+ (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
+
+ rs_get_ra(rs, &stripes, &chunks);
+ DMEMIT("ra=%u/%u ", stripes, chunks);
+
+ *size = sz;
+}
+
+static int
+raid_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ unsigned i, sz = 0;
+ char buf[BDEVNAME_SIZE];
+ struct raid_set *rs = ti->private;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ /* REMOVEME: statistics. */
+ if (RSDevelStats(rs))
+ raid_devel_stats(ti, result, &sz, maxlen);
+
+ DMEMIT("%u ", rs->set.raid_devs);
+
+ for (i = 0; i < rs->set.raid_devs; i++)
+ DMEMIT("%s ",
+ format_dev_t(buf, rs->dev[i].dev->bdev->bd_dev));
+
+ DMEMIT("1 ");
+ for (i = 0; i < rs->set.raid_devs; i++) {
+ DMEMIT("%c", dev_operational(rs, i) ? 'A' : 'D');
+
+ if (rs->set.raid_type->level == raid4 &&
+ i == rs->set.pi)
+ DMEMIT("p");
+
+ if (rs->set.dev_to_init == i)
+ DMEMIT("i");
+ }
+
+ break;
+
+ case STATUSTYPE_TABLE:
+ sz = rs->recover.dl->type->status(rs->recover.dl, type,
+ result, maxlen);
+ DMEMIT("%s %u ", rs->set.raid_type->name,
+ rs->set.raid_parms);
+
+ if (rs->set.raid_type->level == raid4)
+ DMEMIT("%d ", rs->set.pi_parm);
+
+ if (rs->set.raid_parms)
+ DMEMIT("%d ", rs->set.chunk_size_parm);
+
+ if (rs->set.raid_parms > 1)
+ DMEMIT("%d ", rs->sc.stripes_parm);
+
+ if (rs->set.raid_parms > 2)
+ DMEMIT("%d ", rs->set.io_size_parm);
+
+ if (rs->set.raid_parms > 3)
+ DMEMIT("%d ", rs->recover.io_size_parm);
+
+ if (rs->set.raid_parms > 4)
+ DMEMIT("%d ", rs->recover.bandwidth_parm);
+
+ DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
+
+ for (i = 0; i < rs->set.raid_devs; i++)
+ DMEMIT("%s %llu ",
+ format_dev_t(buf,
+ rs->dev[i].dev->bdev->bd_dev),
+ (unsigned long long) rs->dev[i].start);
+ }
+
+ return 0;
+}
+
+/*
+ * Message interface
+ */
+enum raid_msg_actions {
+ act_bw, /* Recovery bandwidth switch. */
+ act_dev, /* Device failure switch. */
+ act_overwrite, /* Stripe overwrite check. */
+ act_read_ahead, /* Set read ahead. */
+ act_stats, /* Development statistics switch. */
+ act_sc, /* Stripe cache switch. */
+
+ act_on, /* Set entity on. */
+ act_off, /* Set entity off. */
+ act_reset, /* Reset entity. */
+
+ act_set = act_on, /* Set # absolute. */
+ act_grow = act_off, /* Grow # by an amount. */
+ act_shrink = act_reset, /* Shrink # by an amount. */
+};
+
+/* Turn a delta to absolute. */
+static int _absolute(unsigned long action, int act, int r)
+{
+ /* Make delta absolute. */
+ if (test_bit(act_set, &action))
+ ;
+ else if (test_bit(act_grow, &action))
+ r += act;
+ else if (test_bit(act_shrink, &action))
+ r = act - r;
+ else
+ r = -EINVAL;
+
+ return r;
+}
+
+ /* Change recovery io bandwidth. */
+static int bandwidth_change(struct dm_msg *msg, void *context)
+{
+ struct raid_set *rs = context;
+ int act = rs->recover.bandwidth;
+ int bandwidth = DM_MSG_INT_ARG(msg);
+
+ if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ /* Make delta bandwidth absolute. */
+ bandwidth = _absolute(msg->action, act, bandwidth);
+
+ /* Check range. */
+ if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ recover_set_bandwidth(rs, bandwidth);
+ return 0;
+ }
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Change state of a device (running/offline). */
+/* FIXME: this only works while recovering!. */
+static int device_state(struct dm_msg *msg, void *context)
+{
+ int r;
+ const char *str = "is already ";
+ union dev_lookup dl = { .dev_name = DM_MSG_STR_ARG(msg) };
+ struct raid_set *rs = context;
+
+ r = raid_dev_lookup(rs, strchr(dl.dev_name, ':') ?
+ bymajmin : byname, &dl);
+ if (r == -ENODEV) {
+ DMERR("device %s is no member of this set", dl.dev_name);
+ return r;
+ }
+
+ if (test_bit(act_off, &msg->action)) {
+ if (dev_operational(rs, r))
+ str = "";
+ } else if (!dev_operational(rs, r))
+ str = "";
+
+ DMINFO("/dev/%s %s%s", dl.dev_name, str,
+ test_bit(act_off, &msg->action) ? "offline" : "running");
+
+ return test_bit(act_off, &msg->action) ?
+ raid_set_check_and_degrade(rs, NULL, r) :
+ raid_set_check_and_upgrade(rs, r);
+}
+
+/* Set/reset development feature flags. */
+static int devel_flags(struct dm_msg *msg, void *context)
+{
+ struct raid_set *rs = context;
+
+ if (test_bit(act_on, &msg->action))
+ return test_and_set_bit(msg->spec->parm,
+ &rs->io.flags) ? -EPERM : 0;
+ else if (test_bit(act_off, &msg->action))
+ return test_and_clear_bit(msg->spec->parm,
+ &rs->io.flags) ? 0 : -EPERM;
+ else if (test_bit(act_reset, &msg->action)) {
+ if (test_bit(act_stats, &msg->action)) {
+ stats_reset(rs);
+ goto on;
+ } else if (test_bit(act_overwrite, &msg->action)) {
+on:
+ set_bit(msg->spec->parm, &rs->io.flags);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+ /* Set stripe and chunk read ahead pages. */
+static int read_ahead_set(struct dm_msg *msg, void *context)
+{
+ int stripes = DM_MSG_INT_ARGS(msg, 0);
+ int chunks = DM_MSG_INT_ARGS(msg, 1);
+
+ if (range_ok(stripes, 1, 512) &&
+ range_ok(chunks, 1, 512)) {
+ rs_set_bdi(context, stripes, chunks);
+ return 0;
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Resize the stripe cache. */
+static int stripecache_resize(struct dm_msg *msg, void *context)
+{
+ int act, stripes;
+ struct raid_set *rs = context;
+
+ /* Deny permission in case the daemon is still shrinking!. */
+ if (atomic_read(&rs->sc.stripes_to_shrink))
+ return -EPERM;
+
+ stripes = DM_MSG_INT_ARG(msg);
+ if (stripes > 0) {
+ act = atomic_read(&rs->sc.stripes);
+
+ /* Make delta stripes absolute. */
+ stripes = _absolute(msg->action, act, stripes);
+
+ /*
+ * Check range and that the # of stripes changes.
+ * We can grow from gere but need to leave any
+ * shrinking to the worker for synchronization.
+ */
+ if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX)) {
+ if (stripes > act)
+ return sc_grow(&rs->sc, stripes - act, SC_GROW);
+ else if (stripes < act) {
+ atomic_set(&rs->sc.stripes_to_shrink,
+ act - stripes);
+ wake_do_raid(rs);
+ }
+
+ return 0;
+ }
+ }
+
+ set_bit(dm_msg_ret_arg, &msg->ret);
+ set_bit(dm_msg_ret_inval, &msg->ret);
+ return -EINVAL;
+}
+
+/* Parse the RAID message action. */
+/*
+ * 'ba[ndwidth] {se[t],g[row],sh[rink]} #' # e.g 'ba se 50'
+ * 'de{vice] o[ffline]/r[unning] DevName/maj:min' # e.g 'device o /dev/sda'
+ * "o[verwrite] {on,of[f],r[eset]}' # e.g. 'o of'
+ * "r[ead_ahead] set #stripes #chunks # e.g. 'r se 3 2'
+ * 'sta[tistics] {on,of[f],r[eset]}' # e.g. 'stat of'
+ * 'str[ipecache] {se[t],g[row],sh[rink]} #' # e.g. 'stripe set 1024'
+ *
+ */
+static int
+raid_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+ /* Variables to store the parsed parameters im. */
+ static int i[2];
+ static unsigned long *i_arg[] = {
+ (unsigned long *) i + 0,
+ (unsigned long *) i + 1,
+ };
+ static char *p;
+ static unsigned long *p_arg[] = { (unsigned long *) &p };
+
+ /* Declare all message option strings. */
+ static char *str_sgs[] = { "set", "grow", "shrink" };
+ static char *str_dev[] = { "running", "offline" };
+ static char *str_oor[] = { "on", "off", "reset" };
+
+ /* Declare all actions. */
+ static unsigned long act_sgs[] = { act_set, act_grow, act_shrink };
+ static unsigned long act_oor[] = { act_on, act_off, act_reset };
+
+ /* Bandwidth option. */
+ static struct dm_message_option bw_opt = { 3, str_sgs, act_sgs };
+ static struct dm_message_argument bw_args = {
+ 1, i_arg, { dm_msg_int_t }
+ };
+
+ /* Device option. */
+ static struct dm_message_option dev_opt = { 2, str_dev, act_oor };
+ static struct dm_message_argument dev_args = {
+ 1, p_arg, { dm_msg_base_t }
+ };
+
+ /* Read ahead option. */
+ static struct dm_message_option ra_opt = { 1, str_sgs, act_sgs };
+ static struct dm_message_argument ra_args = {
+ 2, i_arg, { dm_msg_int_t, dm_msg_int_t }
+ };
+
+ static struct dm_message_argument null_args = {
+ 0, NULL, { dm_msg_int_t }
+ };
+
+ /* Overwrite and statistics option. */
+ static struct dm_message_option ovr_stats_opt = { 3, str_oor, act_oor };
+
+ /* Sripecache option. */
+ static struct dm_message_option stripe_opt = { 3, str_sgs, act_sgs };
+
+ /* Declare messages. */
+ static struct dm_msg_spec specs[] = {
+ { "bandwidth", act_bw, &bw_opt, &bw_args,
+ 0, bandwidth_change },
+ { "device", act_dev, &dev_opt, &dev_args,
+ 0, device_state },
+ { "overwrite", act_overwrite, &ovr_stats_opt, &null_args,
+ RS_CHECK_OVERWRITE, devel_flags },
+ { "read_ahead", act_read_ahead, &ra_opt, &ra_args,
+ 0, read_ahead_set },
+ { "statistics", act_stats, &ovr_stats_opt, &null_args,
+ RS_DEVEL_STATS, devel_flags },
+ { "stripecache", act_sc, &stripe_opt, &bw_args,
+ 0, stripecache_resize },
+ };
+
+ /* The message for the parser. */
+ struct dm_msg msg = {
+ .num_specs = ARRAY_SIZE(specs),
+ .specs = specs,
+ };
+
+ return dm_message_parse(TARGET, &msg, ti->private, argc, argv);
+}
+/*
+ * END message interface
+ */
+
+static struct target_type raid_target = {
+ .name = "raid45",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = raid_ctr,
+ .dtr = raid_dtr,
+ .map = raid_map,
+ .postsuspend = raid_postsuspend,
+ .resume = raid_resume,
+ .status = raid_status,
+ .message = raid_message,
+};
+
+static void init_exit(const char *bad_msg, const char *good_msg, int r)
+{
+ if (r)
+ DMERR("Failed to %sregister target [%d]", bad_msg, r);
+ else
+ DMINFO("%s %s", good_msg, version);
+}
+
+static int __init dm_raid_init(void)
+{
+ int r;
+
+ r = dm_register_target(&raid_target);
+ init_exit("", "initialized", r);
+ return r;
+}
+
+static void __exit dm_raid_exit(void)
+{
+ dm_unregister_target(&raid_target);
+ init_exit("un", "exit", 0);
+}
+
+/* Module hooks. */
+module_init(dm_raid_init);
+module_exit(dm_raid_exit);
+
+MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
+MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+MODULE_LICENSE("GPL");
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <linux/if_ether.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
#include <linux/moduleparam.h>
#include <linux/mm.h>
+ #include <linux/slab.h>
#include <net/ip.h>
#include <xen/xen.h>
#include <linux/ctype.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/notifier.h>
-#include <linux/kthread.h>
#include <linux/mutex.h>
-#include <linux/io.h>
+ #include <linux/slab.h>
+#include <linux/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <linux/namei.h>
#include <linux/miscdevice.h>
#include <linux/magic.h>
+ #include <linux/slab.h>
+#include <linux/precache.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
--- /dev/null
+/*
+ * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
++#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_struct.h>
+#include <linux/nfs4acl.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * ACL entries that have ACE4_SPECIAL_WHO set in ace->e_flags use the
+ * pointer values of these constants in ace->u.e_who to avoid massive
+ * amounts of string comparisons.
+ */
+
+const char nfs4ace_owner_who[] = "OWNER@";
+const char nfs4ace_group_who[] = "GROUP@";
+const char nfs4ace_everyone_who[] = "EVERYONE@";
+
+EXPORT_SYMBOL(nfs4ace_owner_who);
+EXPORT_SYMBOL(nfs4ace_group_who);
+EXPORT_SYMBOL(nfs4ace_everyone_who);
+
+/**
+ * nfs4acl_alloc - allocate an acl
+ * @count: number of entries
+ */
+struct nfs4acl *
+nfs4acl_alloc(int count)
+{
+ size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
+ struct nfs4acl *acl = kmalloc(size, GFP_KERNEL);
+
+ if (acl) {
+ memset(acl, 0, size);
+ atomic_set(&acl->a_refcount, 1);
+ acl->a_count = count;
+ }
+ return acl;
+}
+EXPORT_SYMBOL(nfs4acl_alloc);
+
+/**
+ * nfs4acl_clone - create a copy of an acl
+ */
+struct nfs4acl *
+nfs4acl_clone(const struct nfs4acl *acl)
+{
+ int count = acl->a_count;
+ size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
+ struct nfs4acl *dup = kmalloc(size, GFP_KERNEL);
+
+ if (dup) {
+ memcpy(dup, acl, size);
+ atomic_set(&dup->a_refcount, 1);
+ }
+ return dup;
+}
+
+/*
+ * The POSIX permissions are supersets of the below mask flags.
+ *
+ * The ACE4_READ_ATTRIBUTES and ACE4_READ_ACL flags are always granted
+ * in POSIX. The ACE4_SYNCHRONIZE flag has no meaning under POSIX. We
+ * make sure that we do not mask them if they are set, so that users who
+ * rely on these flags won't get confused.
+ */
+#define ACE4_POSIX_MODE_READ ( \
+ ACE4_READ_DATA | ACE4_LIST_DIRECTORY )
+#define ACE4_POSIX_MODE_WRITE ( \
+ ACE4_WRITE_DATA | ACE4_ADD_FILE | \
+ ACE4_APPEND_DATA | ACE4_ADD_SUBDIRECTORY | \
+ ACE4_DELETE_CHILD )
+#define ACE4_POSIX_MODE_EXEC ( \
+ ACE4_EXECUTE)
+
+static int
+nfs4acl_mask_to_mode(unsigned int mask)
+{
+ int mode = 0;
+
+ if (mask & ACE4_POSIX_MODE_READ)
+ mode |= MAY_READ;
+ if (mask & ACE4_POSIX_MODE_WRITE)
+ mode |= MAY_WRITE;
+ if (mask & ACE4_POSIX_MODE_EXEC)
+ mode |= MAY_EXEC;
+
+ return mode;
+}
+
+/**
+ * nfs4acl_masks_to_mode - compute file mode permission bits from file masks
+ *
+ * Compute the file mode permission bits from the file masks in the acl.
+ */
+int
+nfs4acl_masks_to_mode(const struct nfs4acl *acl)
+{
+ return nfs4acl_mask_to_mode(acl->a_owner_mask) << 6 |
+ nfs4acl_mask_to_mode(acl->a_group_mask) << 3 |
+ nfs4acl_mask_to_mode(acl->a_other_mask);
+}
+EXPORT_SYMBOL(nfs4acl_masks_to_mode);
+
+static unsigned int
+nfs4acl_mode_to_mask(mode_t mode)
+{
+ unsigned int mask = ACE4_POSIX_ALWAYS_ALLOWED;
+
+ if (mode & MAY_READ)
+ mask |= ACE4_POSIX_MODE_READ;
+ if (mode & MAY_WRITE)
+ mask |= ACE4_POSIX_MODE_WRITE;
+ if (mode & MAY_EXEC)
+ mask |= ACE4_POSIX_MODE_EXEC;
+
+ return mask;
+}
+
+/**
+ * nfs4acl_chmod - update the file masks to reflect the new mode
+ * @mode: file mode permission bits to apply to the @acl
+ *
+ * Converts the mask flags corresponding to the owner, group, and other file
+ * permissions and computes the file masks. Returns @acl if it already has the
+ * appropriate file masks, or updates the flags in a copy of @acl. Takes over
+ * @acl.
+ */
+struct nfs4acl *
+nfs4acl_chmod(struct nfs4acl *acl, mode_t mode)
+{
+ unsigned int owner_mask, group_mask, other_mask;
+ struct nfs4acl *clone;
+
+ owner_mask = nfs4acl_mode_to_mask(mode >> 6);
+ group_mask = nfs4acl_mode_to_mask(mode >> 3);
+ other_mask = nfs4acl_mode_to_mask(mode);
+
+ if (acl->a_owner_mask == owner_mask &&
+ acl->a_group_mask == group_mask &&
+ acl->a_other_mask == other_mask &&
+ (!nfs4acl_is_auto_inherit(acl) || nfs4acl_is_protected(acl)))
+ return acl;
+
+ clone = nfs4acl_clone(acl);
+ nfs4acl_put(acl);
+ if (!clone)
+ return ERR_PTR(-ENOMEM);
+
+ clone->a_owner_mask = owner_mask;
+ clone->a_group_mask = group_mask;
+ clone->a_other_mask = other_mask;
+ if (nfs4acl_is_auto_inherit(clone))
+ clone->a_flags |= ACL4_PROTECTED;
+
+ if (nfs4acl_write_through(&clone)) {
+ nfs4acl_put(clone);
+ clone = ERR_PTR(-ENOMEM);
+ }
+ return clone;
+}
+EXPORT_SYMBOL(nfs4acl_chmod);
+
+/**
+ * nfs4acl_want_to_mask - convert permission want argument to a mask
+ * @want: @want argument of the permission inode operation
+ *
+ * When checking for append, @want is (MAY_WRITE | MAY_APPEND).
+ */
+unsigned int
+nfs4acl_want_to_mask(int want)
+{
+ unsigned int mask = 0;
+
+ if (want & MAY_READ)
+ mask |= ACE4_READ_DATA;
+ if (want & MAY_APPEND)
+ mask |= ACE4_APPEND_DATA;
+ else if (want & MAY_WRITE)
+ mask |= ACE4_WRITE_DATA;
+ if (want & MAY_EXEC)
+ mask |= ACE4_EXECUTE;
+
+ return mask;
+}
+EXPORT_SYMBOL(nfs4acl_want_to_mask);
+
+/**
+ * nfs4acl_capability_check - check for capabilities overriding read/write access
+ * @inode: inode to check
+ * @mask: requested access (ACE4_* bitmask)
+ *
+ * Capabilities other than CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH must be checked
+ * separately.
+ */
+static inline int nfs4acl_capability_check(struct inode *inode, unsigned int mask)
+{
+ /*
+ * Read/write DACs are always overridable.
+ * Executable DACs are overridable if at least one exec bit is set.
+ */
+ if (!(mask & (ACE4_WRITE_ACL | ACE4_WRITE_OWNER)) &&
+ (!(mask & ACE4_EXECUTE) ||
+ (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)))
+ if (capable(CAP_DAC_OVERRIDE))
+ return 0;
+
+ /*
+ * Searching includes executable on directories, else just read.
+ */
+ if (!(mask & ~(ACE4_READ_DATA | ACE4_EXECUTE)) &&
+ (S_ISDIR(inode->i_mode) || !(mask & ACE4_EXECUTE)))
+ if (capable(CAP_DAC_READ_SEARCH))
+ return 0;
+
+ return -EACCES;
+}
+
+/**
+ * nfs4acl_permission - permission check algorithm with masking
+ * @inode: inode to check
+ * @acl: nfs4 acl of the inode
+ * @mask: requested access (ACE4_* bitmask)
+ *
+ * Checks if the current process is granted @mask flags in @acl. With
+ * write-through, the OWNER@ is always granted the owner file mask, the
+ * GROUP@ is always granted the group file mask, and EVERYONE@ is always
+ * granted the other file mask. Otherwise, processes are only granted
+ * @mask flags which they are granted in the @acl as well as in their
+ * file mask.
+ */
+int nfs4acl_permission(struct inode *inode, const struct nfs4acl *acl,
+ unsigned int mask)
+{
+ const struct nfs4ace *ace;
+ unsigned int file_mask, requested = mask, denied = 0;
+ int in_owning_group = in_group_p(inode->i_gid);
+ int owner_or_group_class = in_owning_group;
+
+ /*
+ * A process is in the
+ * - owner file class if it owns the file, in the
+ * - group file class if it is in the file's owning group or
+ * it matches any of the user or group entries, and in the
+ * - other file class otherwise.
+ */
+
+ nfs4acl_for_each_entry(ace, acl) {
+ unsigned int ace_mask = ace->e_mask;
+
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_owner(ace)) {
+ if (current_fsuid() != inode->i_uid)
+ continue;
+ goto is_owner;
+ } else if (nfs4ace_is_group(ace)) {
+ if (!in_owning_group)
+ continue;
+ } else if (nfs4ace_is_unix_id(ace)) {
+ if (ace->e_flags & ACE4_IDENTIFIER_GROUP) {
+ if (!in_group_p(ace->u.e_id))
+ continue;
+ } else {
+ if (current_fsuid() != ace->u.e_id)
+ continue;
+ }
+ } else
+ goto is_everyone;
+
+ /*
+ * Apply the group file mask to entries other than OWNER@ and
+ * EVERYONE@. This is not required for correct access checking
+ * but ensures that we grant the same permissions as the acl
+ * computed by nfs4acl_apply_masks().
+ *
+ * For example, without this restriction, 'group@:rw::allow'
+ * with mode 0600 would grant rw access to owner processes
+ * which are also in the owning group. This cannot be expressed
+ * in an acl.
+ */
+ if (nfs4ace_is_allow(ace))
+ ace_mask &= acl->a_group_mask;
+
+ is_owner:
+ /* The process is in the owner or group file class. */
+ owner_or_group_class = 1;
+
+ is_everyone:
+ /* Check which mask flags the ACE allows or denies. */
+ if (nfs4ace_is_deny(ace))
+ denied |= ace_mask & mask;
+ mask &= ~ace_mask;
+
+ /* Keep going until we know which file class the process is in. */
+ if (!mask && owner_or_group_class)
+ break;
+ }
+ denied |= mask;
+
+ /*
+ * Figure out which file mask applies.
+ * Clear write-through if the process is in the file group class but
+ * not in the owning group, and so the denied permissions apply.
+ */
+ if (current_fsuid() == inode->i_uid)
+ file_mask = acl->a_owner_mask;
+ else if (in_owning_group || owner_or_group_class)
+ file_mask = acl->a_group_mask;
+ else
+ file_mask = acl->a_other_mask;
+
+ denied |= requested & ~file_mask;
+ if (!denied)
+ return 0;
+ return nfs4acl_capability_check(inode, requested);
+}
+EXPORT_SYMBOL(nfs4acl_permission);
+
+/**
+ * nfs4acl_generic_permission - permission check algorithm without explicit acl
+ * @inode: inode to check permissions for
+ * @mask: requested access (ACE4_* bitmask)
+ *
+ * The file mode of a file without ACL corresponds to an ACL with a single
+ * "EVERYONE:~0::ALLOW" entry, with file masks that correspond to the file mode
+ * permissions. Instead of constructing a temporary ACL and applying
+ * nfs4acl_permission() to it, compute the identical result directly from the file
+ * mode.
+ */
+int nfs4acl_generic_permission(struct inode *inode, unsigned int mask)
+{
+ int mode = inode->i_mode;
+
+ if (current_fsuid() == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+ if (!(mask & ~nfs4acl_mode_to_mask(mode)))
+ return 0;
+ return nfs4acl_capability_check(inode, mask);
+}
+EXPORT_SYMBOL(nfs4acl_generic_permission);
+
+/*
+ * nfs4ace_is_same_who - do both acl entries refer to the same identifier?
+ */
+int
+nfs4ace_is_same_who(const struct nfs4ace *a, const struct nfs4ace *b)
+{
+#define WHO_FLAGS (ACE4_SPECIAL_WHO | ACE4_IDENTIFIER_GROUP)
+ if ((a->e_flags & WHO_FLAGS) != (b->e_flags & WHO_FLAGS))
+ return 0;
+ if (a->e_flags & ACE4_SPECIAL_WHO)
+ return a->u.e_who == b->u.e_who;
+ else
+ return a->u.e_id == b->u.e_id;
+#undef WHO_FLAGS
+}
+
+/**
+ * nfs4acl_set_who - set a special who value
+ * @ace: acl entry
+ * @who: who value to use
+ */
+int
+nfs4ace_set_who(struct nfs4ace *ace, const char *who)
+{
+ if (!strcmp(who, nfs4ace_owner_who))
+ who = nfs4ace_owner_who;
+ else if (!strcmp(who, nfs4ace_group_who))
+ who = nfs4ace_group_who;
+ else if (!strcmp(who, nfs4ace_everyone_who))
+ who = nfs4ace_everyone_who;
+ else
+ return -EINVAL;
+
+ ace->u.e_who = who;
+ ace->e_flags |= ACE4_SPECIAL_WHO;
+ ace->e_flags &= ~ACE4_IDENTIFIER_GROUP;
+ return 0;
+}
+EXPORT_SYMBOL(nfs4ace_set_who);
+
+/**
+ * nfs4acl_allowed_to_who - mask flags allowed to a specific who value
+ *
+ * Computes the mask values allowed to a specific who value, taking
+ * EVERYONE@ entries into account.
+ */
+static unsigned int
+nfs4acl_allowed_to_who(struct nfs4acl *acl, struct nfs4ace *who)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed = 0;
+
+ nfs4acl_for_each_entry_reverse(ace, acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_same_who(ace, who) ||
+ nfs4ace_is_everyone(ace)) {
+ if (nfs4ace_is_allow(ace))
+ allowed |= ace->e_mask;
+ else if (nfs4ace_is_deny(ace))
+ allowed &= ~ace->e_mask;
+ }
+ }
+ return allowed;
+}
+
+/**
+ * nfs4acl_compute_max_masks - compute upper bound masks
+ *
+ * Computes upper bound owner, group, and other masks so that none of
+ * the mask flags allowed by the acl are disabled (for any choice of the
+ * file owner or group membership).
+ */
+static void
+nfs4acl_compute_max_masks(struct nfs4acl *acl)
+{
+ struct nfs4ace *ace;
+
+ acl->a_owner_mask = 0;
+ acl->a_group_mask = 0;
+ acl->a_other_mask = 0;
+
+ nfs4acl_for_each_entry_reverse(ace, acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+
+ if (nfs4ace_is_owner(ace)) {
+ if (nfs4ace_is_allow(ace))
+ acl->a_owner_mask |= ace->e_mask;
+ else if (nfs4ace_is_deny(ace))
+ acl->a_owner_mask &= ~ace->e_mask;
+ } else if (nfs4ace_is_everyone(ace)) {
+ if (nfs4ace_is_allow(ace)) {
+ struct nfs4ace who = {
+ .e_flags = ACE4_SPECIAL_WHO,
+ .u.e_who = nfs4ace_group_who,
+ };
+
+ acl->a_other_mask |= ace->e_mask;
+ acl->a_group_mask |=
+ nfs4acl_allowed_to_who(acl, &who);
+ acl->a_owner_mask |= ace->e_mask;
+ } else if (nfs4ace_is_deny(ace)) {
+ acl->a_other_mask &= ~ace->e_mask;
+ acl->a_group_mask &= ~ace->e_mask;
+ acl->a_owner_mask &= ~ace->e_mask;
+ }
+ } else {
+ if (nfs4ace_is_allow(ace)) {
+ unsigned int mask =
+ nfs4acl_allowed_to_who(acl, ace);
+
+ acl->a_group_mask |= mask;
+ acl->a_owner_mask |= mask;
+ }
+ }
+ }
+}
+
+/**
+ * nfs4acl_inherit - compute the acl a new file will inherit
+ * @dir_acl: acl of the containing direcory
+ * @mode: file type and create mode of the new file
+ *
+ * Given the containing directory's acl, this function will compute the
+ * acl that new files in that directory will inherit, or %NULL if
+ * @dir_acl does not contain acl entries inheritable by this file.
+ *
+ * Without write-through, the file masks in the returned acl are set to
+ * the intersection of the create mode and the maximum permissions
+ * allowed to each file class. With write-through, the file masks are
+ * set to the create mode.
+ */
+struct nfs4acl *
+nfs4acl_inherit(const struct nfs4acl *dir_acl, mode_t mode)
+{
+ const struct nfs4ace *dir_ace;
+ struct nfs4acl *acl;
+ struct nfs4ace *ace;
+ int count = 0;
+
+ if (S_ISDIR(mode)) {
+ nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ if (!nfs4ace_is_inheritable(dir_ace))
+ continue;
+ count++;
+ }
+ if (!count)
+ return NULL;
+ acl = nfs4acl_alloc(count);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ ace = acl->a_entries;
+ nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ if (!nfs4ace_is_inheritable(dir_ace))
+ continue;
+ memcpy(ace, dir_ace, sizeof(struct nfs4ace));
+ if (dir_ace->e_flags & ACE4_NO_PROPAGATE_INHERIT_ACE)
+ nfs4ace_clear_inheritance_flags(ace);
+ if ((dir_ace->e_flags & ACE4_FILE_INHERIT_ACE) &&
+ !(dir_ace->e_flags & ACE4_DIRECTORY_INHERIT_ACE))
+ ace->e_flags |= ACE4_INHERIT_ONLY_ACE;
+ ace++;
+ }
+ } else {
+ nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
+ continue;
+ count++;
+ }
+ if (!count)
+ return NULL;
+ acl = nfs4acl_alloc(count);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ ace = acl->a_entries;
+ nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
+ continue;
+ memcpy(ace, dir_ace, sizeof(struct nfs4ace));
+ nfs4ace_clear_inheritance_flags(ace);
+ ace++;
+ }
+ }
+
+ /* The maximum max flags that the owner, group, and other classes
+ are allowed. */
+ if (dir_acl->a_flags & ACL4_WRITE_THROUGH) {
+ acl->a_owner_mask = ACE4_VALID_MASK;
+ acl->a_group_mask = ACE4_VALID_MASK;
+ acl->a_other_mask = ACE4_VALID_MASK;
+
+ mode &= ~current->fs->umask;
+ } else
+ nfs4acl_compute_max_masks(acl);
+
+ /* Apply the create mode. */
+ acl->a_owner_mask &= nfs4acl_mode_to_mask(mode >> 6);
+ acl->a_group_mask &= nfs4acl_mode_to_mask(mode >> 3);
+ acl->a_other_mask &= nfs4acl_mode_to_mask(mode);
+
+ if (nfs4acl_write_through(&acl)) {
+ nfs4acl_put(acl);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ acl->a_flags = (dir_acl->a_flags & ~ACL4_PROTECTED);
+ if (nfs4acl_is_auto_inherit(acl)) {
+ nfs4acl_for_each_entry(ace, acl)
+ ace->e_flags |= ACE4_INHERITED_ACE;
+ acl->a_flags |= ACL4_PROTECTED;
+ }
+
+ return acl;
+}
+EXPORT_SYMBOL(nfs4acl_inherit);
--- /dev/null
+/*
+ * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
++#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/nfs4acl.h>
+
+/**
+ * struct nfs4acl_alloc - remember how many entries are actually allocated
+ * @acl: acl with a_count <= @count
+ * @count: the actual number of entries allocated in @acl
+ *
+ * We pass around this structure while modifying an acl, so that we do
+ * not have to reallocate when we remove existing entries followed by
+ * adding new entries.
+ */
+struct nfs4acl_alloc {
+ struct nfs4acl *acl;
+ unsigned int count;
+};
+
+/**
+ * nfs4acl_delete_entry - delete an entry in an acl
+ * @x: acl and number of allocated entries
+ * @ace: an entry in @x->acl
+ *
+ * Updates @ace so that it points to the entry before the deleted entry
+ * on return. (When deleting the first entry, @ace will point to the
+ * (non-existant) entry before the first entry). This behavior is the
+ * expected behavior when deleting entries while forward iterating over
+ * an acl.
+ */
+static void
+nfs4acl_delete_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace)
+{
+ void *end = x->acl->a_entries + x->acl->a_count;
+
+ memmove(*ace, *ace + 1, end - (void *)(*ace + 1));
+ (*ace)--;
+ x->acl->a_count--;
+}
+
+/**
+ * nfs4acl_insert_entry - insert an entry in an acl
+ * @x: acl and number of allocated entries
+ * @ace: entry before which the new entry shall be inserted
+ *
+ * Insert a new entry in @x->acl at position @ace, and zero-initialize
+ * it. This may require reallocating @x->acl.
+ */
+static int
+nfs4acl_insert_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace)
+{
+ if (x->count == x->acl->a_count) {
+ int n = *ace - x->acl->a_entries;
+ struct nfs4acl *acl2;
+
+ acl2 = nfs4acl_alloc(x->acl->a_count + 1);
+ if (!acl2)
+ return -1;
+ acl2->a_flags = x->acl->a_flags;
+ acl2->a_owner_mask = x->acl->a_owner_mask;
+ acl2->a_group_mask = x->acl->a_group_mask;
+ acl2->a_other_mask = x->acl->a_other_mask;
+ memcpy(acl2->a_entries, x->acl->a_entries,
+ n * sizeof(struct nfs4ace));
+ memcpy(acl2->a_entries + n + 1, *ace,
+ (x->acl->a_count - n) * sizeof(struct nfs4ace));
+ kfree(x->acl);
+ x->acl = acl2;
+ x->count = acl2->a_count;
+ *ace = acl2->a_entries + n;
+ } else {
+ void *end = x->acl->a_entries + x->acl->a_count;
+
+ memmove(*ace + 1, *ace, end - (void *)*ace);
+ x->acl->a_count++;
+ }
+ memset(*ace, 0, sizeof(struct nfs4ace));
+ return 0;
+}
+
+/**
+ * nfs4ace_change_mask - change the mask in @ace to @mask
+ * @x: acl and number of allocated entries
+ * @ace: entry to modify
+ * @mask: new mask for @ace
+ *
+ * Set the effective mask of @ace to @mask. This will require splitting
+ * off a separate acl entry if @ace is inheritable. In that case, the
+ * effective- only acl entry is inserted after the inheritable acl
+ * entry, end the inheritable acl entry is set to inheritable-only. If
+ * @mode is 0, either set the original acl entry to inheritable-only if
+ * it was inheritable, or remove it otherwise. The returned @ace points
+ * to the modified or inserted effective-only acl entry if that entry
+ * exists, to the entry that has become inheritable-only, or else to the
+ * previous entry in the acl. This is the expected behavior when
+ * modifying masks while forward iterating over an acl.
+ */
+static int
+nfs4ace_change_mask(struct nfs4acl_alloc *x, struct nfs4ace **ace,
+ unsigned int mask)
+{
+ if (mask && (*ace)->e_mask == mask)
+ return 0;
+ if (mask & ~ACE4_POSIX_ALWAYS_ALLOWED) {
+ if (nfs4ace_is_inheritable(*ace)) {
+ if (nfs4acl_insert_entry(x, ace))
+ return -1;
+ memcpy(*ace, *ace + 1, sizeof(struct nfs4ace));
+ (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE;
+ (*ace)++;
+ nfs4ace_clear_inheritance_flags(*ace);
+ }
+ (*ace)->e_mask = mask;
+ } else {
+ if (nfs4ace_is_inheritable(*ace))
+ (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE;
+ else
+ nfs4acl_delete_entry(x, ace);
+ }
+ return 0;
+}
+
+/**
+ * nfs4acl_move_everyone_aces_down - move everyone@ acl entries to the end
+ * @x: acl and number of allocated entries
+ *
+ * Move all everyone acl entries to the bottom of the acl so that only a
+ * single everyone@ allow acl entry remains at the end, and update the
+ * mask fields of all acl entries on the way. If everyone@ is not
+ * granted any permissions, no empty everyone@ acl entry is inserted.
+ *
+ * This transformation does not modify the permissions that the acl
+ * grants, but we need it to simplify successive transformations.
+ */
+static int
+nfs4acl_move_everyone_aces_down(struct nfs4acl_alloc *x)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed = 0, denied = 0;
+
+ nfs4acl_for_each_entry(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_everyone(ace)) {
+ if (nfs4ace_is_allow(ace))
+ allowed |= (ace->e_mask & ~denied);
+ else if (nfs4ace_is_deny(ace))
+ denied |= (ace->e_mask & ~allowed);
+ else
+ continue;
+ if (nfs4ace_change_mask(x, &ace, 0))
+ return -1;
+ } else {
+ if (nfs4ace_is_allow(ace)) {
+ if (nfs4ace_change_mask(x, &ace, allowed |
+ (ace->e_mask & ~denied)))
+ return -1;
+ } else if (nfs4ace_is_deny(ace)) {
+ if (nfs4ace_change_mask(x, &ace, denied |
+ (ace->e_mask & ~allowed)))
+ return -1;
+ }
+ }
+ }
+ if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) {
+ struct nfs4ace *last_ace = ace - 1;
+
+ if (nfs4ace_is_everyone(last_ace) &&
+ nfs4ace_is_allow(last_ace) &&
+ nfs4ace_is_inherit_only(last_ace) &&
+ last_ace->e_mask == allowed)
+ last_ace->e_flags &= ~ACE4_INHERIT_ONLY_ACE;
+ else {
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = allowed;
+ ace->u.e_who = nfs4ace_everyone_who;
+ }
+ }
+ return 0;
+}
+
+/**
+ * __nfs4acl_propagate_everyone - propagate everyone@ mask flags up for @who
+ * @x: acl and number of allocated entries
+ * @who: identifier to propagate mask flags for
+ * @allow: mask flags to propagate up
+ *
+ * Propagate mask flags from the trailing everyone@ allow acl entry up
+ * for the specified @who.
+ *
+ * The idea here is to precede the trailing EVERYONE@ ALLOW entry by an
+ * additional @who ALLOW entry, but with the following optimizations:
+ * (1) we don't bother setting any flags in the new @who ALLOW entry
+ * that has already been allowed or denied by a previous @who entry, (2)
+ * we merge the new @who entry with a previous @who entry if there is
+ * such a previous @who entry and there are no intervening DENY entries
+ * with mask flags that overlap the flags we care about.
+ */
+static int
+__nfs4acl_propagate_everyone(struct nfs4acl_alloc *x, struct nfs4ace *who,
+ unsigned int allow)
+{
+ struct nfs4ace *allow_last = NULL, *ace;
+
+ /* Remove the mask flags from allow that are already determined for
+ this who value, and figure out if there is an ALLOW entry for
+ this who value that is "reachable" from the trailing EVERYONE@
+ ALLOW ACE. */
+ nfs4acl_for_each_entry(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_allow(ace)) {
+ if (nfs4ace_is_same_who(ace, who)) {
+ allow &= ~ace->e_mask;
+ allow_last = ace;
+ }
+ } else if (nfs4ace_is_deny(ace)) {
+ if (nfs4ace_is_same_who(ace, who))
+ allow &= ~ace->e_mask;
+ if (allow & ace->e_mask)
+ allow_last = NULL;
+ }
+ }
+
+ if (allow) {
+ if (allow_last)
+ return nfs4ace_change_mask(x, &allow_last,
+ allow_last->e_mask | allow);
+ else {
+ struct nfs4ace who_copy;
+
+ ace = x->acl->a_entries + x->acl->a_count - 1;
+ memcpy(&who_copy, who, sizeof(struct nfs4ace));
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ memcpy(ace, &who_copy, sizeof(struct nfs4ace));
+ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
+ nfs4ace_clear_inheritance_flags(ace);
+ ace->e_mask = allow;
+ }
+ }
+ return 0;
+}
+
+/**
+ * nfs4acl_propagate_everyone - propagate everyone@ mask flags up the acl
+ * @x: acl and number of allocated entries
+ *
+ * Make sure for owner@, group@, and all other users, groups, and
+ * special identifiers that they are allowed or denied all permissions
+ * that are granted be the trailing everyone@ acl entry. If they are
+ * not, try to add the missing permissions to existing allow acl entries
+ * for those users, or introduce additional acl entries if that is not
+ * possible.
+ *
+ * We do this so that no mask flags will get lost when finally applying
+ * the file masks to the acl entries: otherwise, with an other file mask
+ * that is more restrictive than the owner and/or group file mask, mask
+ * flags that were allowed to processes in the owner and group classes
+ * and that the other mask denies would be lost. For example, the
+ * following two acls show the problem when mode 0664 is applied to
+ * them:
+ *
+ * masking without propagation (wrong)
+ * ===========================================================
+ * joe:r::allow => joe:r::allow
+ * everyone@:rwx::allow => everyone@:r::allow
+ * -----------------------------------------------------------
+ * joe:w::deny => joe:w::deny
+ * everyone@:rwx::allow everyone@:r::allow
+ *
+ * Note that the permissions of joe end up being more restrictive than
+ * what the acl would allow when first computing the allowed flags and
+ * then applying the respective mask. With propagation of permissions,
+ * we get:
+ *
+ * masking after propagation (correct)
+ * ===========================================================
+ * joe:r::allow => joe:rw::allow
+ * owner@:rw::allow
+ * group@:rw::allow
+ * everyone@:rwx::allow everyone@:r::allow
+ * -----------------------------------------------------------
+ * joe:w::deny => owner@:x::deny
+ * joe:w::deny
+ * owner@:rw::allow
+ * owner@:rw::allow
+ * joe:r::allow
+ * everyone@:rwx::allow everyone@:r::allow
+ *
+ * The examples show the acls that would result from propagation with no
+ * masking performed. In fact, we do apply the respective mask to the
+ * acl entries before computing the propagation because this will save
+ * us from adding acl entries that would end up with empty mask fields
+ * after applying the masks.
+ *
+ * It is ensured that no more than one entry will be inserted for each
+ * who value, no matter how many entries each who value has already.
+ */
+static int
+nfs4acl_propagate_everyone(struct nfs4acl_alloc *x)
+{
+ int write_through = (x->acl->a_flags & ACL4_WRITE_THROUGH);
+ struct nfs4ace who = { .e_flags = ACE4_SPECIAL_WHO };
+ struct nfs4ace *ace;
+ unsigned int owner_allow, group_allow;
+ int retval;
+
+ if (!((x->acl->a_owner_mask | x->acl->a_group_mask) &
+ ~x->acl->a_other_mask))
+ return 0;
+ if (!x->acl->a_count)
+ return 0;
+ ace = x->acl->a_entries + x->acl->a_count - 1;
+ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace))
+ return 0;
+ if (!(ace->e_mask & ~x->acl->a_other_mask)) {
+ /* None of the allowed permissions will get masked. */
+ return 0;
+ }
+ owner_allow = ace->e_mask & x->acl->a_owner_mask;
+ group_allow = ace->e_mask & x->acl->a_group_mask;
+
+ /* Propagate everyone@ permissions through to owner@. */
+ if (owner_allow && !write_through &&
+ (x->acl->a_owner_mask & ~x->acl->a_other_mask)) {
+ who.u.e_who = nfs4ace_owner_who;
+ retval = __nfs4acl_propagate_everyone(x, &who, owner_allow);
+ if (retval)
+ return -1;
+ }
+
+ if (group_allow && (x->acl->a_group_mask & ~x->acl->a_other_mask)) {
+ int n;
+
+ if (!write_through) {
+ /* Propagate everyone@ permissions through to group@. */
+ who.u.e_who = nfs4ace_group_who;
+ retval = __nfs4acl_propagate_everyone(x, &who,
+ group_allow);
+ if (retval)
+ return -1;
+ }
+
+ /* Start from the entry before the trailing EVERYONE@ ALLOW
+ entry. We will not hit EVERYONE@ entries in the loop. */
+ for (n = x->acl->a_count - 2; n != -1; n--) {
+ ace = x->acl->a_entries + n;
+
+ if (nfs4ace_is_inherit_only(ace) ||
+ nfs4ace_is_owner(ace) ||
+ nfs4ace_is_group(ace))
+ continue;
+ if (nfs4ace_is_allow(ace) || nfs4ace_is_deny(ace)) {
+ /* Any inserted entry will end up below the
+ current entry. */
+ retval = __nfs4acl_propagate_everyone(x, ace,
+ group_allow);
+ if (retval)
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * __nfs4acl_apply_masks - apply the masks to the acl entries
+ * @x: acl and number of allocated entries
+ *
+ * Apply the owner file mask to owner@ entries, the intersection of the
+ * group and other file masks to everyone@ entries, and the group file
+ * mask to all other entries.
+ */
+static int
+__nfs4acl_apply_masks(struct nfs4acl_alloc *x)
+{
+ struct nfs4ace *ace;
+
+ nfs4acl_for_each_entry(ace, x->acl) {
+ unsigned int mask;
+
+ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_allow(ace))
+ continue;
+ if (nfs4ace_is_owner(ace))
+ mask = x->acl->a_owner_mask;
+ else if (nfs4ace_is_everyone(ace))
+ mask = x->acl->a_other_mask;
+ else
+ mask = x->acl->a_group_mask;
+ if (nfs4ace_change_mask(x, &ace, ace->e_mask & mask))
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * nfs4acl_max_allowed - maximum mask flags that anybody is allowed
+ */
+static unsigned int
+nfs4acl_max_allowed(struct nfs4acl *acl)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed = 0;
+
+ nfs4acl_for_each_entry_reverse(ace, acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_allow(ace))
+ allowed |= ace->e_mask;
+ else if (nfs4ace_is_deny(ace)) {
+ if (nfs4ace_is_everyone(ace))
+ allowed &= ~ace->e_mask;
+ }
+ }
+ return allowed;
+}
+
+/**
+ * nfs4acl_isolate_owner_class - limit the owner class to the owner file mask
+ * @x: acl and number of allocated entries
+ *
+ * Make sure the owner class (owner@) is granted no more than the owner
+ * mask by first checking which permissions anyone is granted, and then
+ * denying owner@ all permissions beyond that.
+ */
+static int
+nfs4acl_isolate_owner_class(struct nfs4acl_alloc *x)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed = 0;
+
+ allowed = nfs4acl_max_allowed(x->acl);
+ if (allowed & ~x->acl->a_owner_mask) {
+ /* Figure out if we can update an existig OWNER@ DENY entry. */
+ nfs4acl_for_each_entry(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_deny(ace)) {
+ if (nfs4ace_is_owner(ace))
+ break;
+ } else if (nfs4ace_is_allow(ace)) {
+ ace = x->acl->a_entries + x->acl->a_count;
+ break;
+ }
+ }
+ if (ace != x->acl->a_entries + x->acl->a_count) {
+ if (nfs4ace_change_mask(x, &ace, ace->e_mask |
+ (allowed & ~x->acl->a_owner_mask)))
+ return -1;
+ } else {
+ /* Insert an owner@ deny entry at the front. */
+ ace = x->acl->a_entries;
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = allowed & ~x->acl->a_owner_mask;
+ ace->u.e_who = nfs4ace_owner_who;
+ }
+ }
+ return 0;
+}
+
+/**
+ * __nfs4acl_isolate_who - isolate entry from EVERYONE@ ALLOW entry
+ * @x: acl and number of allocated entries
+ * @who: identifier to isolate
+ * @deny: mask flags this identifier should not be allowed
+ *
+ * Make sure that @who is not allowed any mask flags in @deny by checking
+ * which mask flags this identifier is allowed, and adding excess allowed
+ * mask flags to an existing DENY entry before the trailing EVERYONE@ ALLOW
+ * entry, or inserting such an entry.
+ */
+static int
+__nfs4acl_isolate_who(struct nfs4acl_alloc *x, struct nfs4ace *who,
+ unsigned int deny)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed = 0, n;
+
+ /* Compute the mask flags granted to this who value. */
+ nfs4acl_for_each_entry_reverse(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_same_who(ace, who)) {
+ if (nfs4ace_is_allow(ace))
+ allowed |= ace->e_mask;
+ else if (nfs4ace_is_deny(ace))
+ allowed &= ~ace->e_mask;
+ deny &= ~ace->e_mask;
+ }
+ }
+ if (!deny)
+ return 0;
+
+ /* Figure out if we can update an existig DENY entry. Start
+ from the entry before the trailing EVERYONE@ ALLOW entry. We
+ will not hit EVERYONE@ entries in the loop. */
+ for (n = x->acl->a_count - 2; n != -1; n--) {
+ ace = x->acl->a_entries + n;
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_deny(ace)) {
+ if (nfs4ace_is_same_who(ace, who))
+ break;
+ } else if (nfs4ace_is_allow(ace) &&
+ (ace->e_mask & deny)) {
+ n = -1;
+ break;
+ }
+ }
+ if (n != -1) {
+ if (nfs4ace_change_mask(x, &ace, ace->e_mask | deny))
+ return -1;
+ } else {
+ /* Insert a eny entry before the trailing EVERYONE@ DENY
+ entry. */
+ struct nfs4ace who_copy;
+
+ ace = x->acl->a_entries + x->acl->a_count - 1;
+ memcpy(&who_copy, who, sizeof(struct nfs4ace));
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ memcpy(ace, &who_copy, sizeof(struct nfs4ace));
+ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
+ nfs4ace_clear_inheritance_flags(ace);
+ ace->e_mask = deny;
+ }
+ return 0;
+}
+
+/**
+ * nfs4acl_isolate_group_class - limit the group class to the group file mask
+ * @x: acl and number of allocated entries
+ *
+ * Make sure the group class (all entries except owner@ and everyone@) is
+ * granted no more than the group mask by inserting DENY entries for group
+ * class entries where necessary.
+ */
+static int
+nfs4acl_isolate_group_class(struct nfs4acl_alloc *x)
+{
+ struct nfs4ace who = {
+ .e_flags = ACE4_SPECIAL_WHO,
+ .u.e_who = nfs4ace_group_who,
+ };
+ struct nfs4ace *ace;
+ unsigned int deny;
+
+ if (!x->acl->a_count)
+ return 0;
+ ace = x->acl->a_entries + x->acl->a_count - 1;
+ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace))
+ return 0;
+ deny = ace->e_mask & ~x->acl->a_group_mask;
+
+ if (deny) {
+ unsigned int n;
+
+ if (__nfs4acl_isolate_who(x, &who, deny))
+ return -1;
+
+ /* Start from the entry before the trailing EVERYONE@ ALLOW
+ entry. We will not hit EVERYONE@ entries in the loop. */
+ for (n = x->acl->a_count - 2; n != -1; n--) {
+ ace = x->acl->a_entries + n;
+
+ if (nfs4ace_is_inherit_only(ace) ||
+ nfs4ace_is_owner(ace) ||
+ nfs4ace_is_group(ace))
+ continue;
+ if (__nfs4acl_isolate_who(x, ace, deny))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * __nfs4acl_write_through - grant the full masks to owner@, group@, everyone@
+ *
+ * Make sure that owner, group@, and everyone@ are allowed the full mask
+ * permissions, and not only the permissions granted both by the acl and
+ * the masks.
+ */
+static int
+__nfs4acl_write_through(struct nfs4acl_alloc *x)
+{
+ struct nfs4ace *ace;
+ unsigned int allowed;
+
+ /* Remove all owner@ and group@ ACEs: we re-insert them at the
+ top. */
+ nfs4acl_for_each_entry(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if ((nfs4ace_is_owner(ace) || nfs4ace_is_group(ace)) &&
+ nfs4ace_change_mask(x, &ace, 0))
+ return -1;
+ }
+
+ /* Insert the everyone@ allow entry at the end, or update the
+ existing entry. */
+ allowed = x->acl->a_other_mask;
+ if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) {
+ ace = x->acl->a_entries + x->acl->a_count - 1;
+ if (x->acl->a_count && nfs4ace_is_everyone(ace) &&
+ !nfs4ace_is_inherit_only(ace)) {
+ if (nfs4ace_change_mask(x, &ace, allowed))
+ return -1;
+ } else {
+ ace = x->acl->a_entries + x->acl->a_count;
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = allowed;
+ ace->u.e_who = nfs4ace_everyone_who;
+ }
+ }
+
+ /* Compute the permissions that owner@ and group@ are already granted
+ though the everyone@ allow entry at the end. Note that the acl
+ contains no owner@ or group@ entries at this point. */
+ allowed = 0;
+ nfs4acl_for_each_entry_reverse(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_allow(ace)) {
+ if (nfs4ace_is_everyone(ace))
+ allowed |= ace->e_mask;
+ } else if (nfs4ace_is_deny(ace))
+ allowed &= ~ace->e_mask;
+ }
+
+ /* Insert the appropriate group@ allow entry at the front. */
+ if (x->acl->a_group_mask & ~allowed) {
+ ace = x->acl->a_entries;
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = x->acl->a_group_mask /*& ~allowed*/;
+ ace->u.e_who = nfs4ace_group_who;
+ }
+
+ /* Insert the appropriate owner@ allow entry at the front. */
+ if (x->acl->a_owner_mask & ~allowed) {
+ ace = x->acl->a_entries;
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = x->acl->a_owner_mask /*& ~allowed*/;
+ ace->u.e_who = nfs4ace_owner_who;
+ }
+
+ /* Insert the appropriate owner@ deny entry at the front. */
+ allowed = nfs4acl_max_allowed(x->acl);
+ if (allowed & ~x->acl->a_owner_mask) {
+ nfs4acl_for_each_entry(ace, x->acl) {
+ if (nfs4ace_is_inherit_only(ace))
+ continue;
+ if (nfs4ace_is_allow(ace)) {
+ ace = x->acl->a_entries + x->acl->a_count;
+ break;
+ }
+ if (nfs4ace_is_deny(ace) && nfs4ace_is_owner(ace))
+ break;
+ }
+ if (ace != x->acl->a_entries + x->acl->a_count) {
+ if (nfs4ace_change_mask(x, &ace, ace->e_mask |
+ (allowed & ~x->acl->a_owner_mask)))
+ return -1;
+ } else {
+ ace = x->acl->a_entries;
+ if (nfs4acl_insert_entry(x, &ace))
+ return -1;
+ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
+ ace->e_flags = ACE4_SPECIAL_WHO;
+ ace->e_mask = allowed & ~x->acl->a_owner_mask;
+ ace->u.e_who = nfs4ace_owner_who;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * nfs4acl_apply_masks - apply the masks to the acl
+ *
+ * Apply the masks so that the acl allows no more flags than the
+ * intersection between the flags that the original acl allows and the
+ * mask matching the process.
+ *
+ * Note: this algorithm may push the number of entries in the acl above
+ * ACL4_XATTR_MAX_COUNT, so a read-modify-write cycle would fail.
+ */
+int
+nfs4acl_apply_masks(struct nfs4acl **acl)
+{
+ struct nfs4acl_alloc x = {
+ .acl = *acl,
+ .count = (*acl)->a_count,
+ };
+ int retval = 0;
+
+ if (nfs4acl_move_everyone_aces_down(&x) ||
+ nfs4acl_propagate_everyone(&x) ||
+ __nfs4acl_apply_masks(&x) ||
+ nfs4acl_isolate_owner_class(&x) ||
+ nfs4acl_isolate_group_class(&x))
+ retval = -ENOMEM;
+
+ *acl = x.acl;
+ return retval;
+}
+EXPORT_SYMBOL(nfs4acl_apply_masks);
+
+int nfs4acl_write_through(struct nfs4acl **acl)
+{
+ struct nfs4acl_alloc x = {
+ .acl = *acl,
+ .count = (*acl)->a_count,
+ };
+ int retval = 0;
+
+ if (!((*acl)->a_flags & ACL4_WRITE_THROUGH))
+ goto out;
+
+ if (nfs4acl_move_everyone_aces_down(&x) ||
+ nfs4acl_propagate_everyone(&x) ||
+ __nfs4acl_write_through(&x))
+ retval = -ENOMEM;
+
+ *acl = x.acl;
+out:
+ return retval;
+}
struct module *__module_text_address(unsigned long addr);
struct module *__module_address(unsigned long addr);
bool is_module_address(unsigned long addr);
+ bool is_module_percpu_address(unsigned long addr);
bool is_module_text_address(unsigned long addr);
+const char *supported_printable(int taint);
static inline int within_module_core(unsigned long addr, struct module *mod)
{
void kmem_cache_free(struct kmem_cache *, void *);
unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
+ int kern_ptr_validate(const void *ptr, unsigned long size);
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
+unsigned kmem_alloc_estimate(struct kmem_cache *cachep,
+ gfp_t flags, int objects);
/*
* Please use this macro to create slab caches. Simply specify the
#include <linux/skbuff.h> /* struct sk_buff */
#include <linux/mm.h>
#include <linux/security.h>
+ #include <linux/slab.h>
+#include <linux/reserve.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/init.h>
+ #include <linux/slab.h>
+#include <linux/err.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
--- /dev/null
+/*
+ *
+ * Most of this code is borrowed and adapted from the lkcd command "lcrash"
+ * and its supporting libarary.
+ *
+ * This kdb commands for casting memory structures.
+ * It provides
+ * "print" "px", "pd" *
+ *
+ * Careful of porting the klib KL_XXX functions (they call thru a jump table
+ * that we don't use here)
+ *
+ * The kernel type information is added be insmod'g the kdb debuginfo module
+ * It loads symbolic debugging info (provided from lcrash -o),
+ * (this information originally comes from the lcrash "kerntypes" file)
+ *
+ */
+
+#define VMALLOC_START_IA64 0xa000000200000000
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/fs.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/fcntl.h>
+#include <linux/vmalloc.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/err.h>
++#include <linux/slab.h>
+#include "modules/lcrash/klib.h"
+#include "modules/lcrash/kl_stringtab.h"
+#include "modules/lcrash/kl_btnode.h"
+#include "modules/lcrash/lc_eval.h"
+
+#undef next_node /* collision with nodemask.h */
+int have_debug_file = 0;
+dbg_sym_t *types_tree_head;
+dbg_sym_t *typedefs_tree_head;
+kltype_t *kltype_array;
+dbg_sym_t *dsym_types_array;
+
+
+EXPORT_SYMBOL(types_tree_head);
+EXPORT_SYMBOL(typedefs_tree_head);
+EXPORT_SYMBOL(kltype_array);
+EXPORT_SYMBOL(dsym_types_array);
+
+#define C_HEX 0x0002
+#define C_WHATIS 0x0004
+#define C_NOVARS 0x0008
+#define C_SIZEOF 0x0010
+#define C_SHOWOFFSET 0x0020
+#define C_LISTHEAD 0x0040
+#define C_LISTHEAD_N 0x0080 /* walk using list_head.next */
+#define C_LISTHEAD_P 0x0100 /* walk using list_head.prev */
+#define C_BINARY 0x0200
+#define MAX_LONG_LONG 0xffffffffffffffffULL
+klib_t kdb_klib;
+klib_t *KLP = &kdb_klib;
+k_error_t klib_error = 0;
+dbg_sym_t *type_tree = (dbg_sym_t *)NULL;
+dbg_sym_t *typedef_tree = (dbg_sym_t *)NULL;
+dbg_sym_t *func_tree = (dbg_sym_t *)NULL;
+dbg_sym_t *srcfile_tree = (dbg_sym_t *)NULL;
+dbg_sym_t *var_tree = (dbg_sym_t *)NULL;
+dbg_sym_t *xtype_tree = (dbg_sym_t *)NULL;
+dbg_hashrec_t *dbg_hash[TYPE_NUM_SLOTS];
+int all_count, deall_count;
+void single_type(char *str);
+void sizeof_type(char *str);
+typedef struct chunk_s {
+ struct chunk_s *next; /* Must be first */
+ struct chunk_s *prev; /* Must be second */
+ void *addr;
+ struct bucket_s *bucketp;
+ uint32_t chunksz; /* size of memory chunk (via malloc()) */
+ uint32_t blksz; /* Not including header */
+ short blkcount; /* Number of blksz blocks in chunk */
+} chunk_t;
+
+typedef struct blkhdr_s {
+ struct blkhdr_s *next;
+ union {
+ struct blkhdr_s *prev;
+ chunk_t *chunkp;
+ } b_un;
+ int flg;
+ int size;
+} blkhdr_t;
+
+int ptrsz64 = ((int)sizeof(void *) == 8);
+alloc_functions_t alloc_functions;
+
+/*
+ * return 1 if addr is invalid
+ */
+static int
+invalid_address(kaddr_t addr, int count)
+{
+ unsigned char c;
+ unsigned long lcount;
+ /* FIXME: untested? */
+ lcount = count;
+ /* FIXME: use kdb_verify_area */
+ while (count--) {
+ if (kdb_getarea(c, addr))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * wrappers for calls to kernel-style allocation/deallocation
+ */
+static void *
+kl_alloc_block(int size)
+{
+ void *vp;
+
+ vp = kmalloc(size, GFP_KERNEL);
+ if (!vp) {
+ kdb_printf ("kmalloc of %d bytes failed\n", size);
+ }
+ /* important: the lcrash code sometimes assumes that the
+ * allocation is zeroed out
+ */
+ memset(vp, 0, size);
+ all_count++;
+ return vp;
+}
+static void
+kl_free_block(void *vp)
+{
+ kfree(vp);
+ deall_count++;
+ return;
+}
+
+int
+get_value(char *s, uint64_t *value)
+{
+ return kl_get_value(s, NULL, 0, value);
+}
+
+/*
+ * kl_get_block()
+ *
+ * Read a size block from virtual address addr in the system memory image.
+ */
+k_error_t
+kl_get_block(kaddr_t addr, unsigned size, void *bp, void *mmap)
+{
+ if (!bp) {
+ return(KLE_NULL_BUFF);
+ } else if (!size) {
+ return(KLE_ZERO_SIZE);
+ }
+
+ memcpy(bp, (void *)addr, size);
+
+ return(0);
+}
+
+/*
+ * print_value()
+ */
+void
+print_value(char *ldstr, uint64_t value, int width)
+{
+ int w = 0;
+ char fmtstr[12], f, s[2]="\000\000";
+
+ if (ldstr) {
+ kdb_printf("%s", ldstr);
+ }
+ s[0] = '#';
+ f = 'x';
+ if (width) {
+ if (ptrsz64) {
+ w = 18; /* due to leading "0x" */
+ } else {
+ w = 10; /* due to leading "0x" */
+ }
+ }
+ if (w) {
+ sprintf(fmtstr, "%%%s%d"FMT64"%c", s, w, f);
+ } else {
+ sprintf(fmtstr, "%%%s"FMT64"%c", s, f);
+ }
+ kdb_printf(fmtstr, value);
+}
+
+/*
+ * print_list_head()
+ */
+void
+print_list_head(kaddr_t saddr)
+{
+ print_value("STRUCT ADDR: ", (uint64_t)saddr, 8);
+ kdb_printf("\n");
+}
+
+/*
+ * check_prev_ptr()
+ */
+void
+check_prev_ptr(kaddr_t ptr, kaddr_t prev)
+{
+ if(ptr != prev) {
+ kdb_printf("\nWARNING: Pointer broken. %#"FMTPTR"x,"
+ " SHOULD BE: %#"FMTPTR"x\n", prev, ptr);
+ }
+}
+
+/*
+ * kl_kaddr() -- Return a kernel virtual address stored in a structure
+ *
+ * Pointer 'p' points to a kernel structure
+ * of type 's.' Get the kernel address located in member 'm.'
+ */
+kaddr_t
+kl_kaddr(void *p, char *s, char *m)
+{
+ uint64_t *u64p;
+ int offset;
+
+ offset = kl_member_offset(s, m);
+ u64p = (uint64_t *)(p + offset);
+ return((kaddr_t)*u64p);
+}
+
+/*
+ * walk_structs() -- walk linked lists of kernel data structures
+ */
+int
+walk_structs(char *s, char *f, char *member, kaddr_t addr, int flags)
+{
+ int size, offset, mem_offset=0;
+ kaddr_t last = 0, next;
+ kltype_t *klt = (kltype_t *)NULL, *memklt=(kltype_t *)NULL;
+ unsigned long long iter_threshold = 10000;
+
+ int counter = 0;
+ kaddr_t head=0, head_next=0, head_prev=0, entry=0;
+ kaddr_t entry_next=0, entry_prev;
+
+ /* field name of link pointer, determine its offset in the struct. */
+ if ((offset = kl_member_offset(s, f)) == -1) {
+ kdb_printf("Could not determine offset for member %s of %s.\n",
+ f, s);
+ return 0;
+ }
+
+ /* Get the type of the enclosing structure */
+ if (!(klt = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
+ kdb_printf("Could not find the type of %s\n", s);
+ return(1);
+ }
+
+ /* Get the struct size */
+ if ((size = kl_struct_len(s)) == 0) {
+ kdb_printf ("could not get the length of %s\n", s);
+ return(1);
+ }
+
+ /* test for a named member of the structure that should be displayed */
+ if (member) {
+ memklt = kl_get_member(klt, member);
+ if (!memklt) {
+ kdb_printf ("%s has no member %s\n", s, member);
+ return 1;
+ }
+ mem_offset = kl_get_member_offset(klt, member);
+ }
+
+ if ((next = addr)) {
+ /* get head of list (anchor) when struct list_head is used */
+ if (flags & C_LISTHEAD) {
+ head = next;
+ if (invalid_address(head, sizeof(head))) {
+ kdb_printf ("invalid address %#lx\n",
+ head);
+ return 1;
+ }
+ /* get contents of addr struct member */
+ head_next = kl_kaddr((void *)head, "list_head", "next");
+ if (invalid_address(head, sizeof(head_next))) {
+ kdb_printf ("invalid address %#lx\n",
+ head_next);
+ return 1;
+ }
+ /* get prev field of anchor */
+ head_prev = kl_kaddr((void *)head, "list_head", "prev");
+ if (invalid_address(head, sizeof(head_prev))) {
+ kdb_printf ("invalid address %#lx\n",
+ head_prev);
+ return 1;
+ }
+ entry = 0;
+ }
+ }
+
+ while(next && counter < iter_threshold) {
+ counter++;
+ if (counter > iter_threshold) {
+ kdb_printf("\nWARNING: Iteration threshold reached.\n");
+ kdb_printf("Current threshold: %lld\n", iter_threshold);
+ break;
+ }
+ if(flags & C_LISTHEAD) {
+ if(!(entry)){
+ if(flags & C_LISTHEAD_N){
+ entry = head_next;
+ } else {
+ entry = head_prev;
+ }
+ last = head;
+ }
+
+ if(head == entry) {
+ if(flags & C_LISTHEAD_N){
+ check_prev_ptr(last, head_prev);
+ } else {
+ check_prev_ptr(last, head_next);
+ }
+ break;
+ }
+
+ next = entry - offset; /* next structure */
+ /* check that the whole structure can be addressed */
+ if (invalid_address(next, size)) {
+ kdb_printf(
+ "invalid struct address %#lx\n", next);
+ return 1;
+ }
+ /* and validate that it points to valid addresses */
+ entry_next = kl_kaddr((void *)entry,"list_head","next");
+ if (invalid_address(entry_next, sizeof(entry_next))) {
+ kdb_printf("invalid address %#lx\n",
+ entry_next);
+ return 1;
+ }
+ entry_prev = kl_kaddr((void *)entry,"list_head","prev");
+ if (invalid_address(entry_prev, sizeof(entry_prev))) {
+ kdb_printf("invalid address %#lx\n",
+ entry_prev);
+ return 1;
+ }
+ if(flags & C_LISTHEAD_N){
+ check_prev_ptr(last, entry_prev);
+ } else {
+ check_prev_ptr(last, entry_next);
+ }
+ print_list_head(next);
+ last = entry;
+ if(flags & C_LISTHEAD_N){
+ entry = entry_next; /* next list_head */
+ } else {
+ entry = entry_prev; /* next list_head */
+ }
+ }
+
+ if (memklt) {
+ /* print named sub-structure in C-like struct format. */
+ kl_print_member(
+ (void *)((unsigned long)next+mem_offset),
+ memklt, 0, C_HEX);
+ } else {
+ /* print entire structure in C-like struct format. */
+ kl_print_type((void *)next, klt, 0, C_HEX);
+ }
+
+ if(!(flags & C_LISTHEAD)) {
+ last = next;
+ next = (kaddr_t) (*(uint64_t*)(next + offset));
+ }
+ }
+
+ return(0);
+}
+
+/*
+ * Implement the lcrash walk -s command
+ * see lcrash cmd_walk.c
+ */
+int
+kdb_walk(int argc, const char **argv)
+{
+ int i, nonoptc=0, optc=0, flags=0, init_len=0;
+ char *cmd, *arg, *structp=NULL, *forwp=NULL, *memberp=NULL;
+ char *addrp=NULL;
+ uint64_t value;
+ kaddr_t start_addr;
+
+ all_count=0;
+ deall_count=0;
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return 0;
+ }
+ /* If there is nothing to evaluate, just return */
+ if (argc == 0) {
+ return 0;
+ }
+ cmd = (char *)*argv; /* s/b "walk" */
+ if (strcmp(cmd,"walk")) {
+ kdb_printf("got %s, not \"walk\"\n", cmd);
+ return 0;
+ }
+
+ for (i=1; i<=argc; i++) {
+ arg = (char *)*(argv+i);
+ if (*arg == '-') {
+ optc++;
+ if (optc > 2) {
+ kdb_printf("too many options\n");
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ if (*(arg+1) == 's') {
+ continue; /* ignore -s */
+ } else if (*(arg+1) == 'h') {
+ if ((init_len=kl_struct_len("list_head"))
+ == 0) {
+ kdb_printf(
+ "could not find list_head\n");
+ return 0;
+ }
+ if (*(arg+2) == 'p') {
+ flags = C_LISTHEAD;
+ flags |= C_LISTHEAD_P;
+ } else if (*(arg+2) == 'n') {
+ flags = C_LISTHEAD;
+ flags |= C_LISTHEAD_N;
+ } else {
+ kdb_printf("invalid -h option <%s>\n",
+ arg);
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ } else {
+ kdb_printf("invalid option <%s>\n", arg);
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ } else {
+ nonoptc++;
+ if (nonoptc > 4) {
+ kdb_printf("too many arguments\n");
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ if (nonoptc == 1) {
+ structp = arg;
+ } else if (nonoptc == 2) {
+ forwp = arg;
+ } else if (nonoptc == 3) {
+ addrp = arg;
+ } else if (nonoptc == 4) {
+ /* the member is optional; if we get
+ a fourth, the previous was the member */
+ memberp = addrp;
+ addrp = arg;
+ } else {
+ kdb_printf("invalid argument <%s>\n", arg);
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ }
+ }
+ if (nonoptc < 3) {
+ kdb_printf("too few arguments\n");
+ kdb_printf("see 'walkhelp'\n");
+ return 0;
+ }
+ if (!(flags & C_LISTHEAD)) {
+ if ((init_len=kl_struct_len(structp)) == 0) {
+ kdb_printf("could not find %s\n", structp);
+ return 0;
+ }
+ }
+
+ /* Get the start address of the structure */
+ if (get_value(addrp, &value)) {
+ kdb_printf ("address %s invalid\n", addrp);
+ return 0;
+ }
+ start_addr = (kaddr_t)value;
+ if (invalid_address(start_addr, init_len)) {
+ kdb_printf ("address %#lx invalid\n", start_addr);
+ return 0;
+ }
+
+ if (memberp) {
+ }
+
+ if (walk_structs(structp, forwp, memberp, start_addr, flags)) {
+ kdb_printf ("walk_structs failed\n");
+ return 0;
+ }
+ /* kdb_printf("ptc allocated:%d deallocated:%d\n",
+ all_count, deall_count); */
+ return 0;
+}
+
+/*
+ * Implement the lcrash px (print, pd) command
+ * see lcrash cmd_print.c
+ *
+ * px <expression>
+ * e.g. px *(task_struct *) <address>
+ */
+int
+kdb_debuginfo_print(int argc, const char **argv)
+{
+ /* argc does not count the command itself, which is argv[0] */
+ char *cmd, *next, *end, *exp, *cp;
+ unsigned char *buf;
+ int i, j, iflags;
+ node_t *np;
+ uint64_t flags = 0;
+
+ /* If there is nothing to evaluate, just return */
+ if (argc == 0) {
+ return 0;
+ }
+ all_count=0;
+ deall_count=0;
+
+ cmd = (char *)*argv;
+
+ /* Set up the flags value. If this command was invoked via
+ * "pd" or "px", then make sure the appropriate flag is set.
+ */
+ flags = 0;
+ if (!strcmp(cmd, "pd") || !strcmp(cmd, "print")) {
+ flags = 0;
+ } else if (!strcmp(cmd, "px")) {
+ flags |= C_HEX;
+ } else if (!strcmp(cmd, "whatis")) {
+ if (argc != 1) {
+ kdb_printf("usage: whatis <symbol | type>\n");
+ return 0;
+ }
+ cp = (char *)*(argv+1);
+ single_type(cp);
+ /* kdb_printf("allocated:%d deallocated:%d\n",
+ all_count, deall_count); */
+ return 0;
+ } else if (!strcmp(cmd, "sizeof")) {
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return 0;
+ }
+ if (argc != 1) {
+ kdb_printf("usage: sizeof type\n");
+ return 0;
+ }
+ cp = (char *)*(argv+1);
+ sizeof_type(cp);
+ return 0;
+ } else {
+ kdb_printf("command error: %s\n", cmd);
+ return 0;
+ }
+
+ /*
+ * Count the number of bytes necessary to hold the entire expression
+ * string.
+ */
+ for (i=1, j=0; i <= argc; i++) {
+ j += (strlen(*(argv+i)) + 1);
+ }
+
+ /*
+ * Allocate space for the expression string and copy the individual
+ * arguments into it.
+ */
+ buf = kl_alloc_block(j);
+ if (!buf) {
+ return 0;
+ }
+
+ for (i=1; i <= argc; i++) {
+ strcat(buf, *(argv+i));
+ /* put spaces between arguments */
+ if (i < argc) {
+ strcat(buf, " ");
+ }
+ }
+
+ /* Walk through the expression string, expression by expression.
+ * Note that a comma (',') is the delimiting character between
+ * expressions.
+ */
+ next = buf;
+ while (next) {
+ if ((end = strchr(next, ','))) {
+ *end = (char)0;
+ }
+
+ /* Copy the next expression to a separate expression string.
+ * A separate expresison string is necessary because it is
+ * likely to get freed up in eval() when variables get expanded.
+ */
+ i = strlen(next)+1;
+ exp = (char *)kl_alloc_block(i);
+ if (!exp) {
+ return 0;
+ }
+ strcpy(exp, next);
+
+ /* Evaluate the expression */
+ np = eval(&exp, 0);
+ if (!np || eval_error) {
+ print_eval_error(cmd, exp,
+ (error_token ? error_token : (char*)NULL),
+ eval_error, CMD_NAME_FLG);
+ if (np) {
+ free_nodes(np);
+ }
+ kl_free_block(buf);
+ kl_free_block(exp);
+ free_eval_memory();
+ return 0;
+ }
+ iflags = flags;
+ if (print_eval_results(np, iflags)) {
+ free_nodes(np);
+ kl_free_block(buf);
+ free_eval_memory();
+ return 0;
+ }
+ kl_free_block(exp);
+
+ if (end) {
+ next = end + 1;
+ kdb_printf(" ");
+ } else {
+ next = (char*)NULL;
+ kdb_printf("\n");
+ }
+ free_nodes(np);
+ }
+ free_eval_memory();
+ kl_free_block(buf);
+ /* kdb_printf("allocated:%d deallocated:%d\n",
+ all_count, deall_count); */
+ return 0;
+}
+
+/*
+ * Display help for the px command
+ */
+int
+kdb_pxhelp(int argc, const char **argv)
+{
+ if (have_debug_file) {
+ kdb_printf ("Some examples of using the px command:\n");
+ kdb_printf (" the whole structure:\n");
+ kdb_printf (" px *(task_struct *)0xe0000...\n");
+ kdb_printf (" one member:\n");
+ kdb_printf (" px (*(task_struct *)0xe0000...)->comm\n");
+ kdb_printf (" the address of a member\n");
+ kdb_printf (" px &((task_struct *)0xe0000...)->children\n");
+ kdb_printf (" a structure pointed to by a member:\n");
+ kdb_printf (" px ((*(class_device *)0xe0000...)->class)->name\n");
+ kdb_printf (" array element:\n");
+ kdb_printf (" px (cache_sizes *)0xa0000...[0]\n");
+ kdb_printf (" px (task_struct *)(0xe0000...)->cpus_allowed.bits[0]\n");
+ } else {
+ kdb_printf ("There is no debug info file.\n");
+ kdb_printf ("The px/pd/print commands can only evaluate ");
+ kdb_printf ("arithmetic expressions.\n");
+ }
+ return 0;
+}
+
+/*
+ * Display help for the walk command
+ */
+int
+kdb_walkhelp(int argc, const char **argv)
+{
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return 0;
+ }
+ kdb_printf ("Using the walk command:\n");
+ kdb_printf (" (only the -s (symbolic) form is supported, so -s is ignored)\n");
+ kdb_printf ("\n");
+ kdb_printf (" If the list is not linked with list_head structures:\n");
+ kdb_printf (" walk [-s] struct name-of-forward-pointer address\n");
+ kdb_printf (" example: walk xyz_struct next 0xe00....\n");
+ kdb_printf ("\n");
+ kdb_printf (" If the list is linked with list_head structures, use -hn\n");
+ kdb_printf (" to walk the 'next' list, -hp for the 'prev' list\n");
+ kdb_printf (" walk -h[n|p] struct name-of-forward-pointer [member-to-show] address-of-list-head\n");
+ kdb_printf (" example, to show the entire task_struct:\n");
+ kdb_printf (" walk -hn task_struct tasks 0xe000....\n");
+ kdb_printf (" example, to show the task_struct member comm:\n");
+ kdb_printf (" walk -hn task_struct tasks comm 0xe000....\n");
+ kdb_printf (" (address is not the address of first member's list_head, ");
+ kdb_printf ("but of the anchoring list_head\n");
+ return 0;
+}
+
+/*
+ * dup_block()
+ */
+void *
+dup_block(void *b, int len)
+{
+ void *b2;
+
+ if ((b2 = kl_alloc_block(len))) {
+ memcpy(b2, b, len); /* dst, src, sz */
+ }
+ return(b2);
+}
+
+/*
+ * kl_reset_error()
+ */
+void
+kl_reset_error(void)
+{
+ klib_error = 0;
+}
+
+/*
+ * given a symbol name, look up its address
+ *
+ * in lcrash, this would return a pointer to the syment_t in
+ * a binary tree of them
+ *
+ * In this one, look up the symbol in the standard kdb way,
+ * which fills in the kdb_symtab_t.
+ * Then fill in the global syment_t "lkup_syment" -- assuming
+ * we'll only need one at a time!
+ *
+ * kl_lkup_symname returns the address of syment_t if the symbol is
+ * found, else null.
+ *
+ * Note: we allocate a syment_t the caller should kfree it
+ */
+syment_t *
+kl_lkup_symname (char *cp)
+{
+ syment_t *sp;
+ kdb_symtab_t kdb_symtab;
+
+ if (kdbgetsymval(cp, &kdb_symtab)) {
+ sp = (syment_t *)kl_alloc_block(sizeof(syment_t));
+ sp->s_addr = (kaddr_t)kdb_symtab.sym_start;
+ KL_ERROR = 0;
+ return (sp);
+ } else {
+ /* returns 0 if the symbol is not found */
+ KL_ERROR = KLE_INVALID_VALUE;
+ return ((syment_t *)0);
+ }
+}
+
+/*
+ * kl_get_ra()
+ *
+ * This function returns its own return address.
+ * Usefule when trying to capture where we came from.
+ */
+void*
+kl_get_ra(void)
+{
+ return (__builtin_return_address(0));
+}
+
+/* start kl_util.c */
+/*
+ * Definitions for the do_math() routine.
+ */
+#define M_ADD '+'
+#define M_SUBTRACT '-'
+#define M_MULTIPLY '*'
+#define M_DIVIDE '/'
+
+/*
+ * do_math() -- Calculate some math values based on a string argument
+ * passed into the function. For example, if you use:
+ *
+ * 0xffffc000*2+6/5-3*19-8
+ *
+ * And you will get the value 0xffff7fc0 back. I could
+ * probably optimize this a bit more, but right now, it
+ * works, which is good enough for me.
+ */
+static uint64_t
+do_math(char *str)
+{
+ int i = 0;
+ char *buf, *loc;
+ uint64_t value1, value2;
+ syment_t *sp;
+
+ buf = (char *)kl_alloc_block((strlen(str) + 1));
+ sprintf(buf, "%s", str);
+ for (i = strlen(str); i >= 0; i--) {
+ if ((str[i] == M_ADD) || (str[i] == M_SUBTRACT)) {
+ buf[i] = '\0';
+ value1 = do_math(buf);
+ value2 = do_math(&str[i+1]);
+ kl_free_block((void *)buf);
+ if (str[i] == M_SUBTRACT) {
+ return value1 - value2;
+ } else {
+ return value1 + value2;
+ }
+ }
+ }
+
+ for (i = strlen(str); i >= 0; i--) {
+ if ((str[i] == M_MULTIPLY) || (str[i] == M_DIVIDE)) {
+ buf[i] = '\0';
+ value1 = do_math(buf);
+ value2 = do_math(&str[i+1]);
+ kl_free_block((void *)buf);
+ if (str[i] == M_MULTIPLY) {
+ return (value1 * value2);
+ } else {
+ if (value2 == 0) {
+ /* handle divide by zero */
+ /* XXX -- set proper error code */
+ klib_error = 1;
+ return (0);
+ } else {
+ return (value1 / value2);
+ }
+ }
+ }
+ }
+
+ /*
+ * Otherwise, just process the value, and return it.
+ */
+ sp = kl_lkup_symname(buf);
+ if (KL_ERROR) {
+ KL_ERROR = 0;
+ value2 = kl_strtoull(buf, &loc, 10);
+ if (((!value2) && (buf[0] != '0')) || (*loc) ||
+ (!strncmp(buf, "0x", 2)) || (!strncmp(buf, "0X", 2))) {
+ value1 = (kaddr_t)kl_strtoull(buf, (char**)NULL, 16);
+ } else {
+ value1 = (unsigned)kl_strtoull(buf, (char**)NULL, 10);
+ }
+ } else {
+ value1 = (kaddr_t)sp->s_addr;
+ kl_free_block((void *)sp);
+ }
+ kl_free_block((void *)buf);
+ return (value1);
+}
+/*
+ * kl_get_value() -- Translate numeric input strings
+ *
+ * A generic routine for translating an input string (param) in a
+ * number of dfferent ways. If the input string is an equation
+ * (contains the characters '+', '-', '/', and '*'), then perform
+ * the math evaluation and return one of the following modes (if
+ * mode is passed):
+ *
+ * 0 -- if the resulting value is <= elements, if elements (number
+ * of elements in a table) is passed.
+ *
+ * 1 -- if the first character in param is a pound sign ('#').
+ *
+ * 3 -- the numeric result of an equation.
+ *
+ * If the input string is NOT an equation, mode (if passed) will be
+ * set in one of the following ways (depending on the contents of
+ * param and elements).
+ *
+ * o When the first character of param is a pound sign ('#'), mode
+ * is set equal to one and the trailing numeric value (assumed to
+ * be decimal) is returned.
+ *
+ * o When the first two characters in param are "0x" or "0X," or
+ * when when param contains one of the characers "abcdef," or when
+ * the length of the input value is eight characters. mode is set
+ * equal to two and the numeric value contained in param is
+ * translated as hexadecimal and returned.
+ *
+ * o The value contained in param is translated as decimal and mode
+ * is set equal to zero. The resulting value is then tested to see
+ * if it exceeds elements (if passed). If it does, then value is
+ * translated as hexadecimal and mode is set equal to two.
+ *
+ * Note that mode is only set when a pointer is passed in the mode
+ * paramater. Also note that when elements is set equal to zero, any
+ * non-hex (as determined above) value not starting with a pound sign
+ * will be translated as hexadecimal (mode will be set equal to two) --
+ * IF the length of the string of characters is less than 16 (kaddr_t).
+ *
+ */
+int
+kl_get_value(char *param, int *mode, int elements, uint64_t *value)
+{
+ char *loc;
+ uint64_t v;
+
+ kl_reset_error();
+
+ /* Check to see if we are going to need to do any math
+ */
+ if (strpbrk(param, "+-/*")) {
+ if (!strncmp(param, "#", 1)) {
+ v = do_math(¶m[1]);
+ if (mode) {
+ *mode = 1;
+ }
+ } else {
+ v = do_math(param);
+ if (mode) {
+ if (elements && (*value <= elements)) {
+ *mode = 0;
+ } else {
+ *mode = 3;
+ }
+ }
+ }
+ } else {
+ if (!strncmp(param, "#", 1)) {
+ if (!strncmp(param, "0x", 2)
+ || !strncmp(param, "0X", 2)
+ || strpbrk(param, "abcdef")) {
+ v = kl_strtoull(¶m[1], &loc, 16);
+ } else {
+ v = kl_strtoull(¶m[1], &loc, 10);
+ }
+ if (loc) {
+ KL_ERROR = KLE_INVALID_VALUE;
+ return (1);
+ }
+ if (mode) {
+ *mode = 1;
+ }
+ } else if (!strncmp(param, "0x", 2) || !strncmp(param, "0X", 2)
+ || strpbrk(param, "abcdef")) {
+ v = kl_strtoull(param, &loc, 16);
+ if (loc) {
+ KL_ERROR = KLE_INVALID_VALUE;
+ return (1);
+ }
+ if (mode) {
+ *mode = 2; /* HEX VALUE */
+ }
+ } else if (elements || (strlen(param) < 16) ||
+ (strlen(param) > 16)) {
+ v = kl_strtoull(param, &loc, 10);
+ if (loc) {
+ KL_ERROR = KLE_INVALID_VALUE;
+ return (1);
+ }
+ if (elements && (v >= elements)) {
+ v = (kaddr_t)kl_strtoull(param,
+ (char**)NULL, 16);
+ if (mode) {
+ *mode = 2; /* HEX VALUE */
+ }
+ } else if (mode) {
+ *mode = 0;
+ }
+ } else {
+ v = kl_strtoull(param, &loc, 16);
+ if (loc) {
+ KL_ERROR = KLE_INVALID_VALUE;
+ return (1);
+ }
+ if (mode) {
+ *mode = 2; /* ASSUME HEX VALUE */
+ }
+ }
+ }
+ *value = v;
+ return (0);
+}
+/* end kl_util.c */
+
+/* start kl_libutil.c */
+static int
+valid_digit(char c, int base)
+{
+ switch(base) {
+ case 2:
+ if ((c >= '0') && (c <= '1')) {
+ return(1);
+ } else {
+ return(0);
+ }
+ case 8:
+ if ((c >= '0') && (c <= '7')) {
+ return(1);
+ } else {
+ return(0);
+ }
+ case 10:
+ if ((c >= '0') && (c <= '9')) {
+ return(1);
+ } else {
+ return(0);
+ }
+ case 16:
+ if (((c >= '0') && (c <= '9'))
+ || ((c >= 'a') && (c <= 'f'))
+ || ((c >= 'A') && (c <= 'F'))) {
+ return(1);
+ } else {
+ return(0);
+ }
+ }
+ return(0);
+}
+
+static int
+digit_value(char c, int base, int *val)
+{
+ if (!valid_digit(c, base)) {
+ return(1);
+ }
+ switch (base) {
+ case 2:
+ case 8:
+ case 10:
+ *val = (int)((int)(c - 48));
+ break;
+ case 16:
+ if ((c >= 'a') && (c <= 'f')) {
+ *val = ((int)(c - 87));
+ } else if ((c >= 'A') && (c <= 'F')) {
+ *val = ((int)(c - 55));
+ } else {
+ *val = ((int)(c - 48));
+ }
+ }
+ return(0);
+}
+
+uint64_t
+kl_strtoull(char *str, char **loc, int base)
+{
+ int dval;
+ uint64_t i = 1, v, value = 0;
+ char *c, *cp = str;
+
+ *loc = (char *)NULL;
+ if (base == 0) {
+ if (!strncmp(cp, "0x", 2) || !strncmp(cp, "0X", 2)) {
+ base = 16;
+ } else if (cp[0] == '0') {
+ if (cp[1] == 'b') {
+ base = 2;
+ } else {
+ base = 8;
+ }
+ } else if (strpbrk(cp, "abcdefABCDEF")) {
+ base = 16;
+ } else {
+ base = 10;
+ }
+ }
+ if ((base == 8) && (*cp == '0')) {
+ cp += 1;
+ } else if ((base == 2) && !strncmp(cp, "0b", 2)) {
+ cp += 2;
+ } else if ((base == 16) &&
+ (!strncmp(cp, "0x", 2) || !strncmp(cp, "0X", 2))) {
+ cp += 2;
+ }
+ c = &cp[strlen(cp) - 1];
+ while (c >= cp) {
+
+ if (digit_value(*c, base, &dval)) {
+ if (loc) {
+ *loc = c;
+ }
+ return(value);
+ }
+ v = dval * i;
+ if ((MAX_LONG_LONG - value) < v) {
+ return(MAX_LONG_LONG);
+ }
+ value += v;
+ i *= (uint64_t)base;
+ c--;
+ }
+ return(value);
+}
+/* end kl_libutil.c */
+
+/*
+ * dbg_hash_sym()
+ */
+void
+dbg_hash_sym(uint64_t typenum, dbg_sym_t *stp)
+{
+ dbg_hashrec_t *shp, *hshp;
+
+ if ((typenum == 0) || (!stp)) {
+ return;
+ }
+ shp = (dbg_hashrec_t *)kl_alloc_block(sizeof(dbg_hashrec_t));
+ shp->h_typenum = typenum;
+ shp->h_ptr = stp;
+ shp->h_next = (dbg_hashrec_t *)NULL;
+ if ((hshp = dbg_hash[TYPE_NUM_HASH(typenum)])) {
+ while (hshp->h_next) {
+ hshp = hshp->h_next;
+ }
+ hshp->h_next = shp;
+ } else {
+ dbg_hash[TYPE_NUM_HASH(typenum)] = shp;
+ }
+}
+
+/*
+ * dbg_find_sym()
+ */
+dbg_sym_t *
+dbg_find_sym(char *name, int type, uint64_t typenum)
+{
+ dbg_sym_t *stp = (dbg_sym_t *)NULL;
+
+ if (name && strlen(name)) {
+ /* Cycle through the type flags and see if any records are
+ * present. Note that if multiple type flags or DBG_ALL is
+ * passed in, only the first occurance of 'name' will be
+ * found and returned. If name exists in multiple trees,
+ * then multiple searches are necessary to find them.
+ */
+ if (type & DBG_TYPE) {
+ if ((stp = (dbg_sym_t *)kl_find_btnode((btnode_t *)
+ type_tree, name, (int *)NULL))) {
+ goto found_sym;
+ }
+ }
+ if (type & DBG_TYPEDEF) {
+ if ((stp = (dbg_sym_t *)kl_find_btnode((btnode_t *)
+ typedef_tree, name, (int *)NULL))) {
+ goto found_sym;
+ }
+ }
+ if (!stp) {
+ return((dbg_sym_t*)NULL);
+ }
+ }
+found_sym:
+ if (typenum) {
+ dbg_hashrec_t *hshp;
+
+ if (stp) {
+ if (stp->sym_typenum == typenum) {
+ return(stp);
+ }
+ } else if ((hshp = dbg_hash[TYPE_NUM_HASH(typenum)])) {
+ while (hshp) {
+ if (hshp->h_typenum == typenum) {
+ return(hshp->h_ptr);
+ }
+ hshp = hshp->h_next;
+ }
+ }
+ }
+ return(stp);
+}
+
+/*
+ * kl_find_type() -- find a KLT type by name.
+ */
+kltype_t *
+kl_find_type(char *name, int tnum)
+{
+ dbg_sym_t *stp;
+ kltype_t *kltp = (kltype_t *)NULL;
+
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return kltp;
+ }
+
+ if (!tnum || IS_TYPE(tnum)) {
+ if ((stp = dbg_find_sym(name, DBG_TYPE, 0))) {
+ kltp = (kltype_t *)stp->sym_kltype;
+ if (tnum && !(kltp->kl_type & tnum)) {
+ /* We have found a type by this name
+ * but it does not have the right
+ * type number (e.g., we're looking
+ * for a struct and we don't find
+ * a KLT_STRUCT type by this name).
+ */
+ return((kltype_t *)NULL);
+ }
+ }
+ }
+ if (!tnum || IS_TYPEDEF(tnum)) {
+ if ((stp = dbg_find_sym(name, DBG_TYPEDEF, 0))) {
+ kltp = (kltype_t *)stp->sym_kltype;
+ }
+ }
+ return(kltp);
+}
+
+/*
+ * kl_first_btnode() -- non-recursive implementation.
+ */
+btnode_t *
+kl_first_btnode(btnode_t *np)
+{
+ if (!np) {
+ return((btnode_t *)NULL);
+ }
+
+ /* Walk down the left side 'til the end...
+ */
+ while (np->bt_left) {
+ np = np->bt_left;
+ }
+ return(np);
+}
+
+/*
+ * kl_next_btnode() -- non-recursive implementation.
+ */
+btnode_t *
+kl_next_btnode(btnode_t *node)
+{
+ btnode_t *np = node, *parent;
+
+ if (np) {
+ if (np->bt_right) {
+ return(kl_first_btnode(np->bt_right));
+ } else {
+ parent = np->bt_parent;
+next:
+ if (parent) {
+ if (parent->bt_left == np) {
+ return(parent);
+ }
+ np = parent;
+ parent = parent->bt_parent;
+ goto next;
+ }
+ }
+ }
+ return((btnode_t *)NULL);
+}
+
+/*
+ * dbg_next_sym()
+ */
+dbg_sym_t *
+dbg_next_sym(dbg_sym_t *stp)
+{
+ dbg_sym_t *next_stp;
+
+ next_stp = (dbg_sym_t *)kl_next_btnode((btnode_t *)stp);
+ return(next_stp);
+}
+
+/*
+ * kl_prev_btnode() -- non-recursive implementation.
+ */
+btnode_t *
+kl_prev_btnode(btnode_t *node)
+{
+ btnode_t *np = node, *parent;
+
+ if (np) {
+ if (np->bt_left) {
+ np = np->bt_left;
+ while (np->bt_right) {
+ np = np->bt_right;
+ }
+ return(np);
+ }
+ parent = np->bt_parent;
+next:
+ if (parent) {
+ if (parent->bt_right == np) {
+ return(parent);
+ }
+ np = parent;
+ parent = parent->bt_parent;
+ goto next;
+ }
+ }
+ return((btnode_t *)NULL);
+}
+
+/*
+ * dbg_prev_sym()
+ */
+dbg_sym_t *
+dbg_prev_sym(dbg_sym_t *stp)
+{
+ dbg_sym_t *prev_stp;
+
+ prev_stp = (dbg_sym_t *)kl_prev_btnode((btnode_t *)stp);
+ return(prev_stp);
+}
+
+/*
+ * kl_find_next_type() -- find next KLT type
+ */
+kltype_t *
+kl_find_next_type(kltype_t *kltp, int type)
+{
+ kltype_t *nkltp = NULL;
+ dbg_sym_t *nstp;
+
+ if (kltp && kltp->kl_ptr) {
+ nstp = (dbg_sym_t *)kltp->kl_ptr;
+ nkltp = (kltype_t *)nstp->sym_kltype;
+ if (type) {
+ while(nkltp && !(nkltp->kl_type & type)) {
+ if ((nstp = dbg_next_sym(nstp))) {
+ nkltp = (kltype_t *)nstp->sym_kltype;
+ } else {
+ nkltp = (kltype_t *)NULL;
+ }
+ }
+ }
+ }
+ return(nkltp);
+}
+
+/*
+ * dbg_first_sym()
+ */
+dbg_sym_t *
+dbg_first_sym(int type)
+{
+ dbg_sym_t *stp = (dbg_sym_t *)NULL;
+
+ switch(type) {
+ case DBG_TYPE:
+ stp = (dbg_sym_t *)
+ kl_first_btnode((btnode_t *)type_tree);
+ break;
+ case DBG_TYPEDEF:
+ stp = (dbg_sym_t *)
+ kl_first_btnode((btnode_t *)typedef_tree);
+ break;
+ }
+ return(stp);
+}
+
+/*
+ * kl_first_type()
+ */
+kltype_t *
+kl_first_type(int tnum)
+{
+ kltype_t *kltp = NULL;
+ dbg_sym_t *stp;
+
+ if (IS_TYPE(tnum)) {
+ /* If (tnum == KLT_TYPE), then return the first type
+ * record, regardless of the type. Otherwise, search
+ * for the frst type that mapps into tnum.
+ */
+ if ((stp = dbg_first_sym(DBG_TYPE))) {
+ kltp = (kltype_t *)stp->sym_kltype;
+ if (tnum != KLT_TYPE) {
+ while (kltp && !(kltp->kl_type & tnum)) {
+ if ((stp = dbg_next_sym(stp))) {
+ kltp = (kltype_t *)stp->sym_kltype;
+ } else {
+ kltp = (kltype_t *)NULL;
+ }
+ }
+ }
+ }
+ } else if (IS_TYPEDEF(tnum)) {
+ if ((stp = dbg_first_sym(DBG_TYPEDEF))) {
+ kltp = (kltype_t *)stp->sym_kltype;
+ }
+ }
+ return(kltp);
+}
+
+/*
+ * kl_next_type()
+ */
+kltype_t *
+kl_next_type(kltype_t *kltp)
+{
+ dbg_sym_t *stp, *nstp;
+ kltype_t *nkltp = (kltype_t *)NULL;
+
+ if (!kltp) {
+ return((kltype_t *)NULL);
+ }
+ stp = (dbg_sym_t *)kltp->kl_ptr;
+ if ((nstp = dbg_next_sym(stp))) {
+ nkltp = (kltype_t *)nstp->sym_kltype;
+ }
+ return(nkltp);
+}
+
+/*
+ * kl_prev_type()
+ */
+kltype_t *
+kl_prev_type(kltype_t *kltp)
+{
+ dbg_sym_t *stp, *pstp;
+ kltype_t *pkltp = (kltype_t *)NULL;
+
+ if (!kltp) {
+ return((kltype_t *)NULL);
+ }
+ stp = (dbg_sym_t *)kltp->kl_ptr;
+ if ((pstp = dbg_prev_sym(stp))) {
+ pkltp = (kltype_t *)pstp->sym_kltype;
+ }
+ return(pkltp);
+}
+
+/*
+ * kl_realtype()
+ */
+kltype_t *
+kl_realtype(kltype_t *kltp, int tnum)
+{
+ kltype_t *rkltp = kltp;
+
+ while (rkltp) {
+ if (tnum && (rkltp->kl_type == tnum)) {
+ break;
+ }
+ if (!rkltp->kl_realtype) {
+ break;
+ }
+ if (rkltp->kl_realtype == rkltp) {
+ break;
+ }
+ rkltp = rkltp->kl_realtype;
+ if (rkltp == kltp) {
+ break;
+ }
+ }
+ return(rkltp);
+}
+
+/*
+ * dbg_find_typenum()
+ */
+dbg_type_t *
+dbg_find_typenum(uint64_t typenum)
+{
+ dbg_sym_t *stp;
+ dbg_type_t *sp = (dbg_type_t *)NULL;
+
+ if ((stp = dbg_find_sym(0, DBG_TYPE, typenum))) {
+ sp = (dbg_type_t *)stp->sym_kltype;
+ }
+ return(sp);
+}
+
+/*
+ * find type by typenum
+ */
+kltype_t *
+kl_find_typenum(uint64_t typenum)
+{
+ kltype_t *kltp;
+
+ kltp = (kltype_t *)dbg_find_typenum(typenum);
+ return(kltp);
+}
+
+/*
+ * kl_find_btnode() -- non-recursive implementation.
+ */
+btnode_t *
+_kl_find_btnode(btnode_t *np, char *key, int *max_depth, size_t len)
+{
+ int ret;
+ btnode_t *next, *prev;
+
+ if (np) {
+ if (max_depth) {
+ (*max_depth)++;
+ }
+ next = np;
+again:
+ if (len) {
+ ret = strncmp(key, next->bt_key, len);
+ } else {
+ ret = strcmp(key, next->bt_key);
+ }
+ if (ret == 0) {
+ if ((prev = kl_prev_btnode(next))) {
+ if (len) {
+ ret = strncmp(key, prev->bt_key, len);
+ } else {
+ ret = strcmp(key, prev->bt_key);
+ }
+ if (ret == 0) {
+ next = prev;
+ goto again;
+ }
+ }
+ return(next);
+ } else if (ret < 0) {
+ if ((next = next->bt_left)) {
+ goto again;
+ }
+ } else {
+ if ((next = next->bt_right)) {
+ goto again;
+ }
+ }
+ }
+ return((btnode_t *)NULL);
+}
+
+/*
+ * kl_type_size()
+ */
+int
+kl_type_size(kltype_t *kltp)
+{
+ kltype_t *rkltp;
+
+ if (!kltp) {
+ return(0);
+ }
+ if (!(rkltp = kl_realtype(kltp, 0))) {
+ return(0);
+ }
+ return(rkltp->kl_size);
+}
+
+/*
+ * kl_struct_len()
+ */
+int
+kl_struct_len(char *s)
+{
+ kltype_t *kltp;
+
+ if ((kltp = kl_find_type(s, (KLT_TYPES)))) {
+ return kl_type_size(kltp);
+ }
+ return(0);
+}
+
+/*
+ * kl_get_member()
+ */
+kltype_t *
+kl_get_member(kltype_t *kltp, char *f)
+{
+ kltype_t *mp;
+
+ if ((mp = kltp->kl_member)) {
+ while (mp) {
+ if (mp->kl_flags & TYP_ANONYMOUS_FLG) {
+ kltype_t *amp;
+
+ if ((amp = kl_get_member(mp->kl_realtype, f))) {
+ return(amp);
+ }
+ } else if (!strcmp(mp->kl_name, f)) {
+ break;
+ }
+ mp = mp->kl_member;
+ }
+ }
+ return(mp);
+}
+
+/*
+ * kl_member()
+ */
+kltype_t *
+kl_member(char *s, char *f)
+{
+ kltype_t *kltp, *mp = NULL;
+
+ if (!(kltp = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
+ if ((kltp = kl_find_type(s, KLT_TYPEDEF))) {
+ kltp = kl_realtype(kltp, 0);
+ }
+ }
+ if (kltp) {
+ mp = kl_get_member(kltp, f);
+ }
+ return(mp);
+}
+
+
+/*
+ * kl_get_member_offset()
+ */
+int
+kl_get_member_offset(kltype_t *kltp, char *f)
+{
+ kltype_t *mp;
+
+ if ((mp = kltp->kl_member)) {
+ while (mp) {
+ if (mp->kl_flags & TYP_ANONYMOUS_FLG) {
+ int off;
+
+ /* Drill down to see if the member we are looking for is in
+ * an anonymous union or struct. Since this call is recursive,
+ * the drill down may actually be multi-layer.
+ */
+ off = kl_get_member_offset(mp->kl_realtype, f);
+ if (off >= 0) {
+ return(mp->kl_offset + off);
+ }
+ } else if (!strcmp(mp->kl_name, f)) {
+ return(mp->kl_offset);
+ }
+ mp = mp->kl_member;
+ }
+ }
+ return(-1);
+}
+
+/*
+ * kl_member_offset()
+ */
+int
+kl_member_offset(char *s, char *f)
+{
+ int off = -1;
+ kltype_t *kltp;
+
+ if (!(kltp = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
+ if ((kltp = kl_find_type(s, KLT_TYPEDEF))) {
+ kltp = kl_realtype(kltp, 0);
+ }
+ }
+ if (kltp) {
+ off = kl_get_member_offset(kltp, f);
+ }
+ return(off);
+}
+
+/*
+ * kl_is_member()
+ */
+int
+kl_is_member(char *s, char *f)
+{
+ kltype_t *mp;
+
+ if ((mp = kl_member(s, f))) {
+ return(1);
+ }
+ return(0);
+}
+
+/*
+ * kl_member_size()
+ */
+int
+kl_member_size(char *s, char *f)
+{
+ kltype_t *mp;
+
+ if ((mp = kl_member(s, f))) {
+ return(mp->kl_size);
+ }
+ return(0);
+}
+
+#define TAB_SPACES 8
+#define LEVEL_INDENT(level, flags) {\
+ int i, j; \
+ if (!(flags & NO_INDENT)) { \
+ for (i = 0; i < level; i++) { \
+ for (j = 0; j < TAB_SPACES; j++) { \
+ kdb_printf(" "); \
+ } \
+ }\
+ } \
+}
+#define PRINT_NL(flags) \
+ if (!(flags & SUPPRESS_NL)) { \
+ kdb_printf("\n"); \
+ }
+#define PRINT_SEMI_COLON(level, flags) \
+ if (level && (!(flags & SUPPRESS_SEMI_COLON))) { \
+ kdb_printf(";"); \
+ }
+
+/*
+ * print_realtype()
+ */
+static void
+print_realtype(kltype_t *kltp)
+{
+ kltype_t *rkltp;
+
+ if ((rkltp = kltp->kl_realtype)) {
+ while (rkltp && rkltp->kl_realtype) {
+ rkltp = rkltp->kl_realtype;
+ }
+ if (rkltp->kl_type == KLT_BASE) {
+ kdb_printf(" (%s)", rkltp->kl_name);
+ }
+ }
+}
+
+int align_chk = 0;
+/*
+ * kl_print_uint16()
+ *
+ */
+void
+kl_print_uint16(void *ptr, int flags)
+{
+ unsigned long long a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * * dump core)
+ * */
+ if (align_chk && (uaddr_t)ptr % 16) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(unsigned long long *) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("%#llx", a);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(a);
+ } else {
+ kdb_printf("%llu", a);
+ }
+}
+
+#if 0
+/*
+ * kl_print_float16()
+ *
+ */
+void
+kl_print_float16(void *ptr, int flags)
+{
+ double a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * * dump core)
+ * */
+ if (align_chk && (uaddr_t)ptr % 16) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(double*) ptr;
+ kdb_printf("%f", a);
+}
+#endif
+
+/*
+ * kl_print_int16()
+ *
+ */
+void
+kl_print_int16(void *ptr, int flags)
+{
+ long long a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * * dump core)
+ * */
+ if (align_chk && (uaddr_t)ptr % 16) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(long long *) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("%#llx", a);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(a);
+ } else {
+ kdb_printf("%lld", a);
+ }
+}
+
+/*
+ * kl_print_int8()
+ */
+void
+kl_print_int8(void *ptr, int flags)
+{
+ long long a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core)
+ */
+ if (align_chk && (uaddr_t)ptr % 8) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(long long *) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("%#llx", a);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(a);
+ } else {
+ kdb_printf("%lld", a);
+ }
+}
+
+#if 0
+/*
+ * kl_print_float8()
+ */
+void
+kl_print_float8(void *ptr, int flags)
+{
+ double a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core)
+ */
+ if (align_chk && (uaddr_t)ptr % 8) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(double*) ptr;
+ kdb_printf("%f", a);
+}
+#endif
+
+/*
+ * kl_print_uint8()
+ */
+void
+kl_print_uint8(void *ptr, int flags)
+{
+ unsigned long long a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core)
+ */
+ if (align_chk && (uaddr_t)ptr % 8) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(unsigned long long *) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("%#llx", a);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(a);
+ } else {
+ kdb_printf("%llu", a);
+ }
+}
+
+/*
+ * kl_print_int4()
+ */
+void
+kl_print_int4(void *ptr, int flags)
+{
+ int32_t a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core
+ */
+ if (align_chk && (uaddr_t)ptr % 4) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(int32_t*) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("0x%x", a);
+ } else if (flags & C_BINARY) {
+ uint64_t value = a & 0xffffffff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%d", a);
+ }
+}
+
+#if 0
+/*
+ * kl_print_float4()
+ */
+void
+kl_print_float4(void *ptr, int flags)
+{
+ float a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core)
+ */
+ if (align_chk && (uaddr_t)ptr % 4) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(float*) ptr;
+ kdb_printf("%f", a);
+}
+#endif
+
+/*
+ * kl_print_uint4()
+ */
+void
+kl_print_uint4(void *ptr, int flags)
+{
+ uint32_t a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core)
+ */
+ if (align_chk && (uaddr_t)ptr % 4) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(uint32_t*) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("0x%x", a);
+ } else if (flags & C_BINARY) {
+ uint64_t value = a & 0xffffffff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%u", a);
+ }
+}
+
+/*
+ * kl_print_int2()
+ */
+void
+kl_print_int2(void *ptr, int flags)
+{
+ int16_t a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core
+ */
+ if (align_chk && (uaddr_t)ptr % 2) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(int16_t*) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("0x%hx", a);
+ } else if (flags & C_BINARY) {
+ uint64_t value = a & 0xffff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%hd", a);
+ }
+}
+
+/*
+ * kl_print_uint2()
+ */
+void
+kl_print_uint2(void *ptr, int flags)
+{
+ uint16_t a;
+
+ /* Make sure the pointer is properly aligned (or we will
+ * dump core
+ */
+ if (align_chk && (uaddr_t)ptr % 2) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ a = *(uint16_t*) ptr;
+ if (flags & C_HEX) {
+ kdb_printf("0x%hx", a);
+ } else if (flags & C_BINARY) {
+ uint64_t value = a & 0xffff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%hu", a);
+ }
+}
+
+/*
+ * kl_print_char()
+ */
+void
+kl_print_char(void *ptr, int flags)
+{
+ char c;
+
+ if (flags & C_HEX) {
+ kdb_printf("0x%x", (*(char *)ptr) & 0xff);
+ } else if (flags & C_BINARY) {
+ uint64_t value = (*(char *)ptr) & 0xff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ c = *(char *)ptr;
+
+ kdb_printf("\'\\%03o\'", (unsigned char)c);
+ switch (c) {
+ case '\a' :
+ kdb_printf(" = \'\\a\'");
+ break;
+ case '\b' :
+ kdb_printf(" = \'\\b\'");
+ break;
+ case '\t' :
+ kdb_printf(" = \'\\t\'");
+ break;
+ case '\n' :
+ kdb_printf(" = \'\\n\'");
+ break;
+ case '\f' :
+ kdb_printf(" = \'\\f\'");
+ break;
+ case '\r' :
+ kdb_printf(" = \'\\r\'");
+ break;
+ case '\e' :
+ kdb_printf(" = \'\\e\'");
+ break;
+ default :
+ if( !iscntrl((unsigned char) c) ) {
+ kdb_printf(" = \'%c\'", c);
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * kl_print_uchar()
+ */
+void
+kl_print_uchar(void *ptr, int flags)
+{
+ if (flags & C_HEX) {
+ kdb_printf("0x%x", *(unsigned char *)ptr);
+ } else if (flags & C_BINARY) {
+ uint64_t value = (*(unsigned char *)ptr) & 0xff;
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%u", *(unsigned char *)ptr);
+ }
+}
+
+/*
+ * kl_print_base()
+ */
+void
+kl_print_base(void *ptr, int size, int encoding, int flags)
+{
+ /* FIXME: untested */
+ if (invalid_address((kaddr_t)ptr, size)) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
+ return;
+ }
+ switch (size) {
+
+ case 1:
+ if (encoding == ENC_UNSIGNED) {
+ kl_print_uchar(ptr, flags);
+ } else {
+ kl_print_char(ptr, flags);
+ }
+ break;
+
+ case 2:
+ if (encoding == ENC_UNSIGNED) {
+ kl_print_uint2(ptr, flags);
+ } else {
+ kl_print_int2(ptr, flags);
+ }
+ break;
+
+ case 4:
+ if (encoding == ENC_UNSIGNED) {
+ kl_print_uint4(ptr, flags);
+ } else if (encoding == ENC_FLOAT) {
+ printk("error: print of 4-byte float\n");
+ /* kl_print_float4(ptr, flags); */
+ } else {
+ kl_print_int4(ptr, flags);
+ }
+ break;
+
+ case 8:
+ if (encoding == ENC_UNSIGNED) {
+ kl_print_uint8(ptr, flags);
+ } else if (encoding == ENC_FLOAT) {
+ printk("error: print of 8-byte float\n");
+ /* kl_print_float8(ptr, flags); */
+ } else {
+ kl_print_int8(ptr, flags);
+ }
+ break;
+
+ case 16:
+ if (encoding == ENC_UNSIGNED) {
+ /* Ex: unsigned long long */
+ kl_print_uint16(ptr, flags);
+ } else if (encoding == ENC_FLOAT) {
+ printk("error: print of 16-byte float\n");
+ /* Ex: long double */
+ /* kl_print_float16(ptr, flags); */
+ } else {
+ /* Ex: long long */
+ kl_print_int16(ptr, flags);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * kl_print_base_value()
+ */
+void
+kl_print_base_value(void *ptr, kltype_t *kltp, int flags)
+{
+ kltype_t *rkltp=NULL;
+
+ if (kltp->kl_type != KLT_BASE) {
+ if (!(rkltp = kltp->kl_realtype)) {
+ return;
+ }
+ if (rkltp->kl_type != KLT_BASE) {
+ return;
+ }
+ } else {
+ rkltp = kltp;
+ }
+ kl_print_base(ptr, rkltp->kl_size, rkltp->kl_encoding, flags);
+}
+
+/*
+ * kl_print_typedef_type()
+ */
+void
+kl_print_typedef_type(
+ void *ptr,
+ kltype_t *kltp,
+ int level,
+ int flags)
+{
+ char *name;
+ kltype_t *rkltp;
+
+ if (ptr) {
+ rkltp = kltp->kl_realtype;
+ while (rkltp->kl_type == KLT_TYPEDEF) {
+ if (rkltp->kl_realtype) {
+ rkltp = rkltp->kl_realtype;
+ }
+ }
+ if (rkltp->kl_type == KLT_POINTER) {
+ kl_print_pointer_type(ptr, kltp, level, flags);
+ return;
+ }
+ switch (rkltp->kl_type) {
+ case KLT_BASE:
+ kl_print_base_type(ptr, kltp,
+ level, flags);
+ break;
+
+ case KLT_UNION:
+ case KLT_STRUCT:
+ kl_print_struct_type(ptr, kltp,
+ level, flags);
+ break;
+
+ case KLT_ARRAY:
+ kl_print_array_type(ptr, kltp,
+ level, flags);
+ break;
+
+ case KLT_ENUMERATION:
+ kl_print_enumeration_type(ptr,
+ kltp, level, flags);
+ break;
+
+ default:
+ kl_print_base_type(ptr, kltp,
+ level, flags);
+ break;
+ }
+ } else {
+ LEVEL_INDENT(level, flags);
+ if (flags & NO_REALTYPE) {
+ rkltp = kltp;
+ } else {
+ rkltp = kltp->kl_realtype;
+ while (rkltp && rkltp->kl_type == KLT_POINTER) {
+ rkltp = rkltp->kl_realtype;
+ }
+ }
+ if (!rkltp) {
+ if (SUPPRESS_NAME) {
+ kdb_printf("<UNKNOWN>");
+ } else {
+ kdb_printf( "typedef <UNKNOWN>%s;",
+ kltp->kl_name);
+ }
+ return;
+ }
+ if (rkltp->kl_type == KLT_FUNCTION) {
+ if (kltp->kl_realtype->kl_type == KLT_POINTER) {
+ kdb_printf("typedef %s(*%s)();",
+ kltp->kl_typestr, kltp->kl_name);
+ } else {
+ kdb_printf( "typedef %s(%s)();",
+ kltp->kl_typestr, kltp->kl_name);
+ }
+ } else if (rkltp->kl_type == KLT_ARRAY) {
+ kl_print_array_type(ptr, rkltp, level, flags);
+ } else if (rkltp->kl_type == KLT_TYPEDEF) {
+ if (!(name = rkltp->kl_name)) {
+ name = rkltp->kl_typestr;
+ }
+
+ if (SUPPRESS_NAME) {
+ kdb_printf("%s", name);
+ } else {
+ kdb_printf("typedef %s%s;",
+ name, kltp->kl_name);
+ }
+ print_realtype(rkltp);
+ } else {
+ kl_print_type(ptr, rkltp, level, flags);
+ }
+ PRINT_NL(flags);
+ }
+}
+
+/*
+ * kl_print_pointer_type()
+ */
+void
+kl_print_pointer_type(
+ void *ptr,
+ kltype_t *kltp,
+ int level,
+ int flags)
+{
+ kltype_t *itp;
+
+ if (kltp->kl_type == KLT_MEMBER) {
+ itp = kltp->kl_realtype;
+ } else {
+ itp = kltp;
+ }
+
+ /* See if this is a pointer to a function. If it is, then it
+ * has to be handled differently...
+ */
+ while (itp->kl_type == KLT_POINTER) {
+ if ((itp = itp->kl_realtype)) {
+ if (itp->kl_type == KLT_FUNCTION) {
+ kl_print_function_type(ptr,
+ kltp, level, flags);
+ return;
+ }
+ } else {
+ LEVEL_INDENT(level, flags);
+ kdb_printf("%s%s;\n",
+ kltp->kl_typestr, kltp->kl_name);
+ return;
+ }
+ }
+
+ LEVEL_INDENT(level, flags);
+ if (ptr) {
+ kaddr_t tmp;
+ tmp = *(kaddr_t *)ptr;
+ flags |= SUPPRESS_SEMI_COLON;
+ if(kltp->kl_name){
+ if (*(kaddr_t *)ptr) {
+ kdb_printf("%s = 0x%"FMTPTR"x",
+ kltp->kl_name, tmp);
+ } else {
+ kdb_printf("%s = (nil)", kltp->kl_name);
+ }
+ } else {
+ if (tmp != 0) {
+ kdb_printf("0x%"FMTPTR"x", tmp);
+ } else {
+ kdb_printf( "(nil)");
+ }
+ }
+ } else {
+ if (kltp->kl_typestr) {
+ if (kltp->kl_name && !(flags & SUPPRESS_NAME)) {
+ kdb_printf("%s%s",
+ kltp->kl_typestr, kltp->kl_name);
+ } else {
+ kdb_printf("%s", kltp->kl_typestr);
+ }
+ } else {
+ kdb_printf("<UNKNOWN>");
+ }
+ }
+ PRINT_SEMI_COLON(level, flags);
+ PRINT_NL(flags);
+}
+
+/*
+ * kl_print_function_type()
+ */
+void
+kl_print_function_type(
+ void *ptr,
+ kltype_t *kltp,
+ int level,
+ int flags)
+{
+ LEVEL_INDENT(level, flags);
+ if (ptr) {
+ kaddr_t a;
+
+ a = *(kaddr_t *)ptr;
+ kdb_printf("%s = 0x%"FMTPTR"x", kltp->kl_name, a);
+ } else {
+ if (flags & SUPPRESS_NAME) {
+ kdb_printf("%s(*)()", kltp->kl_typestr);
+ } else {
+ kdb_printf("%s(*%s)();",
+ kltp->kl_typestr, kltp->kl_name);
+ }
+ }
+ PRINT_NL(flags);
+}
+
+/*
+ * kl_print_array_type()
+ */
+void
+kl_print_array_type(void *ptr, kltype_t *kltp, int level, int flags)
+{
+ int i, count = 0, anon = 0, size, low, high, multi = 0;
+ char typestr[128], *name, *p;
+ kltype_t *rkltp, *etp, *retp;
+
+ if (kltp->kl_type != KLT_ARRAY) {
+ if ((rkltp = kltp->kl_realtype)) {
+ while (rkltp->kl_type != KLT_ARRAY) {
+ if (!(rkltp = rkltp->kl_realtype)) {
+ break;
+ }
+ }
+ }
+ if (!rkltp) {
+ LEVEL_INDENT(level, flags);
+ kdb_printf("<ARRAY_TYPE>");
+ PRINT_SEMI_COLON(level, flags);
+ PRINT_NL(flags);
+ return;
+ }
+ } else {
+ rkltp = kltp;
+ }
+
+ etp = rkltp->kl_elementtype;
+ if (!etp) {
+ LEVEL_INDENT(level, flags);
+ kdb_printf("<BAD_ELEMENT_TYPE> %s", rkltp->kl_name);
+ PRINT_SEMI_COLON(level, flags);
+ PRINT_NL(flags);
+ return;
+ }
+
+ /* Set retp to point to the actual element type. This is necessary
+ * for multi-dimensional arrays, which link using the kl_elementtype
+ * member.
+ */
+ retp = etp;
+ while (retp->kl_type == KLT_ARRAY) {
+ retp = retp->kl_elementtype;
+ }
+ low = rkltp->kl_low_bounds + 1;
+ high = rkltp->kl_high_bounds;
+
+ if (ptr) {
+
+ p = ptr;
+
+ if ((retp->kl_size == 1) && (retp->kl_encoding == ENC_CHAR)) {
+ if (kltp->kl_type == KLT_MEMBER) {
+ LEVEL_INDENT(level, flags);
+ }
+ if (flags & SUPPRESS_NAME) {
+ kdb_printf("\"");
+ flags &= ~SUPPRESS_NAME;
+ } else {
+ kdb_printf("%s = \"", kltp->kl_name);
+ }
+ for (i = 0; i < high; i++) {
+ if (*(char*)p == 0) {
+ break;
+ }
+ kdb_printf("%c", *(char *)p);
+ p++;
+ }
+ kdb_printf("\"");
+ PRINT_NL(flags);
+ } else {
+ if (kltp->kl_type == KLT_MEMBER) {
+ LEVEL_INDENT(level, flags);
+ }
+
+ if (flags & SUPPRESS_NAME) {
+ kdb_printf("{\n");
+ flags &= ~SUPPRESS_NAME;
+ } else {
+ kdb_printf("%s = {\n", kltp->kl_name);
+ }
+
+ if (retp->kl_type == KLT_POINTER) {
+ size = sizeof(void *);
+ } else {
+ while (retp->kl_realtype) {
+ retp = retp->kl_realtype;
+ }
+ size = retp->kl_size;
+ }
+ if ((retp->kl_type != KLT_STRUCT) &&
+ (retp->kl_type != KLT_UNION)) {
+ /* Turn off the printing of names for all
+ * but structs and unions.
+ */
+ flags |= SUPPRESS_NAME;
+ }
+ for (i = low; i <= high; i++) {
+
+ LEVEL_INDENT(level + 1, flags);
+ kdb_printf("[%d] ", i);
+
+ switch (retp->kl_type) {
+ case KLT_POINTER :
+ kl_print_pointer_type(
+ p, retp, level,
+ flags|NO_INDENT);
+ break;
+
+ case KLT_TYPEDEF:
+ kl_print_typedef_type(
+ p, retp, level,
+ flags|NO_INDENT);
+ break;
+
+ case KLT_BASE:
+ kl_print_base_value(p,
+ retp, flags|NO_INDENT);
+ kdb_printf("\n");
+ break;
+
+ case KLT_ARRAY:
+ kl_print_array_type(p, retp,
+ level + 1,
+ flags|SUPPRESS_NAME);
+ break;
+
+ case KLT_STRUCT:
+ case KLT_UNION:
+ kl_print_struct_type(p,
+ retp, level + 1,
+ flags|NO_INDENT);
+ break;
+
+ default:
+ kl_print_base_value(
+ p, retp,
+ flags|NO_INDENT);
+ kdb_printf("\n");
+ break;
+ }
+ p = (void *)((uaddr_t)p + size);
+ }
+ LEVEL_INDENT(level, flags);
+ kdb_printf("}");
+ PRINT_SEMI_COLON(level, flags);
+ PRINT_NL(flags);
+ }
+ } else {
+ if (rkltp) {
+ count = (rkltp->kl_high_bounds -
+ rkltp->kl_low_bounds) + 1;
+ } else {
+ count = 1;
+ }
+
+ if (!strcmp(retp->kl_typestr, "struct ") ||
+ !strcmp(retp->kl_typestr, "union ")) {
+ anon = 1;
+ }
+next_dimension:
+ switch (retp->kl_type) {
+
+ case KLT_UNION:
+ case KLT_STRUCT:
+ if (anon) {
+ if (multi) {
+ kdb_printf("[%d]", count);
+ break;
+ }
+ kl_print_struct_type(ptr, retp, level,
+ flags|
+ SUPPRESS_NL|
+ SUPPRESS_SEMI_COLON);
+ if (kltp->kl_type == KLT_MEMBER) {
+ kdb_printf(" %s[%d]",
+ kltp->kl_name, count);
+ } else {
+ kdb_printf(" [%d]", count);
+ }
+ break;
+ }
+ /* else drop through */
+
+ default:
+ LEVEL_INDENT(level, flags);
+ if (multi) {
+ kdb_printf("[%d]", count);
+ break;
+ }
+ name = kltp->kl_name;
+ if (retp->kl_type == KLT_TYPEDEF) {
+ strcpy(typestr, retp->kl_name);
+ strcat(typestr, " ");
+ } else {
+ strcpy(typestr, retp->kl_typestr);
+ }
+ if (!name || (flags & SUPPRESS_NAME)) {
+ kdb_printf("%s[%d]", typestr, count);
+ } else {
+ kdb_printf("%s%s[%d]",
+ typestr, name, count);
+ }
+ }
+ if (etp->kl_type == KLT_ARRAY) {
+ count = etp->kl_high_bounds - etp->kl_low_bounds + 1;
+ etp = etp->kl_elementtype;
+ multi++;
+ goto next_dimension;
+ }
+ PRINT_SEMI_COLON(level, flags);
+ PRINT_NL(flags);
+ }
+}
+
+/*
+ * kl_print_enumeration_type()
+ */
+void
+kl_print_enumeration_type(
+ void *ptr,
+ kltype_t *kltp,
+ int level,
+ int flags)
+{
+ unsigned long long val = 0;
+ kltype_t *mp, *rkltp;
+
+ rkltp = kl_realtype(kltp, KLT_ENUMERATION);
+ if (ptr) {
+ switch (kltp->kl_size) {
+ case 1:
+ val = *(unsigned long long *)ptr;
+ break;
+
+ case 2:
+ val = *(uint16_t *)ptr;
+ break;
+
+ case 4:
+ val = *(uint32_t *)ptr;
+ break;
+
+ case 8:
+ val = *(uint64_t *)ptr;
+ break;
+ }
+ mp = rkltp->kl_member;
+ while (mp) {
+ if (mp->kl_value == val) {
+ break;
+ }
+ mp = mp->kl_member;
+ }
+ LEVEL_INDENT(level, flags);
+ if (mp) {
+ kdb_printf("%s = (%s=%lld)",
+ kltp->kl_name, mp->kl_name, val);
+ } else {
+ kdb_printf("%s = %lld", kltp->kl_name, val);
+ }
+ PRINT_NL(flags);
+ } else {
+ LEVEL_INDENT(level, flags);
+ kdb_printf ("%s {", kltp->kl_typestr);
+ mp = rkltp->kl_member;
+ while (mp) {
+ kdb_printf("%s = %d", mp->kl_name, mp->kl_value);
+ if ((mp = mp->kl_member)) {
+ kdb_printf(", ");
+ }
+ }
+ mp = kltp;
+ if (level) {
+ kdb_printf("} %s;", mp->kl_name);
+ } else {
+ kdb_printf("};");
+ }
+ PRINT_NL(flags);
+ }
+}
+
+/*
+ * kl_binary_print()
+ */
+void
+kl_binary_print(uint64_t num)
+{
+ int i, pre = 1;
+
+ for (i = 63; i >= 0; i--) {
+ if (num & ((uint64_t)1 << i)) {
+ kdb_printf("1");
+ if (pre) {
+ pre = 0;
+ }
+ } else {
+ if (!pre) {
+ kdb_printf("0");
+ }
+ }
+ }
+ if (pre) {
+ kdb_printf("0");
+ }
+}
+
+/*
+ * kl_get_bit_value()
+ *
+ * x = byte_size, y = bit_size, z = bit_offset
+ */
+uint64_t
+kl_get_bit_value(void *ptr, unsigned int x, unsigned int y, unsigned int z)
+{
+ uint64_t value=0, mask;
+
+ /* handle x bytes of buffer -- doing just memcpy won't work
+ * on big endian architectures
+ */
+ switch (x) {
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ x = 8;
+ value = *(uint64_t*) ptr;
+ break;
+ case 3:
+ case 4:
+ x = 4;
+ value = *(uint32_t*) ptr;
+ break;
+ case 2:
+ value = *(uint16_t*) ptr;
+ break;
+ case 1:
+ value = *(uint8_t *)ptr;
+ break;
+ default:
+ /* FIXME: set KL_ERROR */
+ return(0);
+ }
+ /*
+ o FIXME: correct handling of overlapping fields
+ */
+
+ /* goto bit offset */
+ value = value >> z;
+
+ /* mask bit size bits */
+ mask = (((uint64_t)1 << y) - 1);
+ return (value & mask);
+}
+
+/*
+ * kl_print_bit_value()
+ *
+ * x = byte_size, y = bit_size, z = bit_offset
+ */
+void
+kl_print_bit_value(void *ptr, int x, int y, int z, int flags)
+{
+ unsigned long long value;
+
+ value = kl_get_bit_value(ptr, x, y, z);
+ if (flags & C_HEX) {
+ kdb_printf("%#llx", value);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ kdb_printf("%lld", value);
+ }
+}
+
+/*
+ * kl_print_base_type()
+ */
+void
+kl_print_base_type(void *ptr, kltype_t *kltp, int level, int flags)
+{
+ LEVEL_INDENT(level, flags);
+ if (ptr) {
+ if (!(flags & SUPPRESS_NAME)) {
+ kdb_printf ("%s = ", kltp->kl_name);
+ }
+ }
+ if (kltp->kl_type == KLT_MEMBER) {
+ if (kltp->kl_bit_size < (kltp->kl_size * 8)) {
+ if (ptr) {
+ kl_print_bit_value(ptr, kltp->kl_size,
+ kltp->kl_bit_size,
+ kltp->kl_bit_offset, flags);
+ } else {
+ if (kltp->kl_name) {
+ kdb_printf ("%s%s :%d;",
+ kltp->kl_typestr,
+ kltp->kl_name,
+ kltp->kl_bit_size);
+ } else {
+ kdb_printf ("%s :%d;",
+ kltp->kl_typestr,
+ kltp->kl_bit_size);
+ }
+ }
+ PRINT_NL(flags);
+ return;
+ }
+ }
+ if (ptr) {
+ kltype_t *rkltp;
+
+ rkltp = kl_realtype(kltp, 0);
+ if (rkltp->kl_encoding == ENC_UNDEFINED) {
+ /* This is a void value
+ */
+ kdb_printf("<VOID>");
+ } else {
+ kl_print_base(ptr, kltp->kl_size,
+ rkltp->kl_encoding, flags);
+ }
+ } else {
+ if (kltp->kl_type == KLT_MEMBER) {
+ if (flags & SUPPRESS_NAME) {
+ kdb_printf ("%s", kltp->kl_typestr);
+ } else {
+ if (kltp->kl_name) {
+ kdb_printf("%s%s;", kltp->kl_typestr,
+ kltp->kl_name);
+ } else {
+ kdb_printf ("%s :%d;",
+ kltp->kl_typestr,
+ kltp->kl_bit_size);
+ }
+ }
+ } else {
+ if (SUPPRESS_NAME) {
+ kdb_printf("%s", kltp->kl_name);
+ } else {
+ kdb_printf("%s;", kltp->kl_name);
+ }
+ }
+ }
+ PRINT_NL(flags);
+}
+
+/*
+ * kl_print_member()
+ */
+void
+kl_print_member(void *ptr, kltype_t *mp, int level, int flags)
+{
+ int kl_type = 0;
+ kltype_t *rkltp;
+
+ if (flags & C_SHOWOFFSET) {
+ kdb_printf("%#x ", mp->kl_offset);
+ }
+
+ if ((rkltp = mp->kl_realtype)) {
+ kl_type = rkltp->kl_type;
+ } else
+ kl_type = mp->kl_type;
+ switch (kl_type) {
+ case KLT_STRUCT:
+ case KLT_UNION:
+ kl_print_struct_type(ptr, mp, level, flags);
+ break;
+ case KLT_ARRAY:
+ kl_print_array_type(ptr, mp, level, flags);
+ break;
+ case KLT_POINTER:
+ kl_print_pointer_type(ptr, mp, level, flags);
+ break;
+ case KLT_FUNCTION:
+ kl_print_function_type(ptr, mp, level, flags);
+ break;
+ case KLT_BASE:
+ kl_print_base_type(ptr, mp, level, flags);
+ break;
+ case KLT_ENUMERATION:
+ kl_print_enumeration_type(ptr, mp, level, flags);
+ break;
+ case KLT_TYPEDEF:
+ while (rkltp && rkltp->kl_realtype) {
+ if (rkltp->kl_realtype == rkltp) {
+ break;
+ }
+ rkltp = rkltp->kl_realtype;
+ }
+ if (ptr) {
+ kl_print_typedef_type(ptr, mp,
+ level, flags);
+ break;
+ }
+ LEVEL_INDENT(level, flags);
+ if (flags & SUPPRESS_NAME) {
+ if (rkltp && (mp->kl_bit_size <
+ (rkltp->kl_size * 8))) {
+ kdb_printf ("%s :%d",
+ mp->kl_typestr,
+ mp->kl_bit_size);
+ } else {
+ kdb_printf("%s",
+ mp->kl_realtype->kl_name);
+ }
+ print_realtype(mp->kl_realtype);
+ } else {
+ if (rkltp && (mp->kl_bit_size <
+ (rkltp->kl_size * 8))) {
+ if (mp->kl_name) {
+ kdb_printf ("%s%s :%d;",
+ mp->kl_typestr,
+ mp->kl_name,
+ mp->kl_bit_size);
+ } else {
+ kdb_printf ("%s :%d;",
+ mp->kl_typestr,
+ mp->kl_bit_size);
+ }
+ } else {
+ kdb_printf("%s %s;",
+ mp->kl_realtype->kl_name,
+ mp->kl_name);
+ }
+ }
+ PRINT_NL(flags);
+ break;
+
+ default:
+ LEVEL_INDENT(level, flags);
+ if (mp->kl_typestr) {
+ kdb_printf("%s%s;",
+ mp->kl_typestr, mp->kl_name);
+ } else {
+ kdb_printf("<\?\?\? kl_type:%d> %s;",
+ kl_type, mp->kl_name);
+ }
+ PRINT_NL(flags);
+ break;
+ }
+}
+
+/*
+ * kl_print_struct_type()
+ */
+void
+kl_print_struct_type(void *buf, kltype_t *kltp, int level, int flags)
+{
+ int orig_flags = flags;
+ void *ptr = NULL;
+ kltype_t *mp, *rkltp;
+
+ /* If we are printing out an actual struct, then don't print any
+ * semi colons.
+ */
+ if (buf) {
+ flags |= SUPPRESS_SEMI_COLON;
+ }
+
+ LEVEL_INDENT(level, flags);
+ if ((level == 0) || (flags & NO_INDENT)) {
+ kdb_printf("%s{\n", kltp->kl_typestr);
+ } else {
+ if (buf) {
+ if (level && !(kltp->kl_flags & TYP_ANONYMOUS_FLG)) {
+ kdb_printf("%s = %s{\n",
+ kltp->kl_name, kltp->kl_typestr);
+ } else {
+ kdb_printf("%s{\n", kltp->kl_typestr);
+ }
+ flags &= (~SUPPRESS_NL);
+ } else {
+ if (kltp->kl_typestr) {
+ kdb_printf("%s{\n", kltp->kl_typestr);
+ } else {
+ kdb_printf("<UNKNOWN> {\n");
+ }
+ }
+ }
+
+ /* If the SUPPRESS_NL, SUPPRESS_SEMI_COLON, and SUPPRESS_NAME flags
+ * are set and buf is NULL, then turn them off as they only apply
+ * at the end of the struct. We save the original flags for that
+ * purpose.
+ */
+ if (!buf) {
+ flags &= ~(SUPPRESS_NL|SUPPRESS_SEMI_COLON|SUPPRESS_NAME);
+ }
+
+ /* If the NO_INDENT is set, we need to turn it off at this
+ * point -- just in case we come across a member of this struct
+ * that is also a struct.
+ */
+ if (flags & NO_INDENT) {
+ flags &= ~(NO_INDENT);
+ }
+
+ if (kltp->kl_type == KLT_MEMBER) {
+ rkltp = kl_realtype(kltp, 0);
+ } else {
+ rkltp = kltp;
+ }
+ level++;
+ if ((mp = rkltp->kl_member)) {
+ while (mp) {
+ if (buf) {
+ ptr = buf + mp->kl_offset;
+ }
+ kl_print_member(ptr, mp, level, flags);
+ mp = mp->kl_member;
+ }
+ } else {
+ if (kltp->kl_flags & TYP_INCOMPLETE_FLG) {
+ LEVEL_INDENT(level, flags);
+ kdb_printf("<INCOMPLETE TYPE>\n");
+ }
+ }
+ level--;
+ LEVEL_INDENT(level, flags);
+
+ /* kl_size = 0 for empty structs */
+ if (ptr || ((kltp->kl_size == 0) && buf)) {
+ kdb_printf("}");
+ } else if ((kltp->kl_type == KLT_MEMBER) &&
+ !(orig_flags & SUPPRESS_NAME) &&
+ !(kltp->kl_flags & TYP_ANONYMOUS_FLG)) {
+ kdb_printf("} %s", kltp->kl_name);
+ } else {
+ kdb_printf("}");
+ }
+ PRINT_SEMI_COLON(level, orig_flags);
+ PRINT_NL(orig_flags);
+}
+
+/*
+ * kl_print_type()
+ */
+void
+kl_print_type(void *buf, kltype_t *kltp, int level, int flags)
+{
+ void *ptr;
+
+ if (buf) {
+ if (kltp->kl_offset) {
+ ptr = (void *)((uaddr_t)buf + kltp->kl_offset);
+ } else {
+ ptr = buf;
+ }
+ } else {
+ ptr = 0;
+ }
+
+ /* Only allow binary printing for base types
+ */
+ if (kltp->kl_type != KLT_BASE) {
+ flags &= (~C_BINARY);
+ }
+ switch (kltp->kl_type) {
+
+ case KLT_TYPEDEF:
+ kl_print_typedef_type(ptr, kltp, level, flags);
+ break;
+
+ case KLT_STRUCT:
+ case KLT_UNION:
+ kl_print_struct_type(ptr, kltp, level, flags);
+ break;
+
+ case KLT_MEMBER:
+ kl_print_member(ptr, kltp, level, flags);
+ break;
+
+ case KLT_POINTER:
+ kl_print_pointer_type(ptr, kltp, level, flags);
+ break;
+
+ case KLT_FUNCTION:
+ LEVEL_INDENT(level, flags);
+ kl_print_function_type(ptr, kltp, level, flags);
+ break;
+
+ case KLT_ARRAY:
+ kl_print_array_type(ptr, kltp, level, flags);
+ break;
+
+ case KLT_ENUMERATION:
+ kl_print_enumeration_type(ptr,
+ kltp, level, flags);
+ break;
+
+ case KLT_BASE:
+ kl_print_base_type(ptr, kltp, level, flags);
+ break;
+
+ default:
+ LEVEL_INDENT(level, flags);
+ if (flags & SUPPRESS_NAME) {
+ kdb_printf ("%s", kltp->kl_name);
+ } else {
+ kdb_printf ("%s %s;",
+ kltp->kl_name, kltp->kl_name);
+ }
+ PRINT_NL(flags);
+ }
+}
+
+/*
+ * eval is from lcrash eval.c
+ */
+
+/* Forward declarations */
+static void free_node(node_t *);
+static node_t *make_node(token_t *, int);
+static node_t *get_node_list(token_t *, int);
+static node_t *do_eval(int);
+static int is_unary(int);
+static int is_binary(int);
+static int precedence(int);
+static node_t *get_sizeof(void);
+static int replace_cast(node_t *, int);
+static int replace_unary(node_t *, int);
+static node_t *replace(node_t *, int);
+static void array_to_element(node_t*, node_t*);
+static int type_to_number(node_t *);
+kltype_t *number_to_type(node_t *);
+static type_t *eval_type(node_t *);
+static type_t *get_type(char *, int);
+static int add_rchild(node_t *, node_t *);
+static void free_nodelist(node_t *);
+
+/* Global variables
+ */
+static int logical_flag;
+static node_t *node_list = (node_t *)NULL;
+uint64_t eval_error;
+char *error_token;
+
+/*
+ * set_eval_error()
+ */
+static void
+set_eval_error(uint64_t ecode)
+{
+ eval_error = ecode;
+}
+
+/*
+ * is_typestr()
+ *
+ * We check for "struct", "union", etc. separately because they
+ * would not be an actual part of the type name. We also assume
+ * that the string passed in
+ *
+ * - does not have any leading blanks or tabs
+ * - is NULL terminated
+ * - contains only one type name to check
+ * - does not contain any '*' characters
+ */
+static int
+is_typestr(char *str)
+{
+ int len;
+
+ len = strlen(str);
+ if ((len >= 6) && !strncmp(str, "struct", 6)) {
+ return(1);
+ } else if ((len >= 5) &&!strncmp(str, "union", 5)) {
+ return(1);
+ } else if ((len >= 5) &&!strncmp(str, "short", 5)) {
+ return(1);
+ } else if ((len >= 8) &&!strncmp(str, "unsigned", 8)) {
+ return(1);
+ } else if ((len >= 6) &&!strncmp(str, "signed", 6)) {
+ return(1);
+ } else if ((len >= 4) &&!strncmp(str, "long", 4)) {
+ return(1);
+ }
+ /* Strip off any trailing blanks
+ */
+ while(*str && ((str[strlen(str) - 1] == ' ')
+ || (str[strlen(str) - 1] == '\t'))) {
+ str[strlen(str) - 1] = 0;
+ }
+ if (kl_find_type(str, KLT_TYPES)) {
+ return (1);
+ }
+ return(0);
+}
+
+/*
+ * free_tokens()
+ */
+static void
+free_tokens(token_t *tp)
+{
+ token_t *t, *tnext;
+
+ t = tp;
+ while (t) {
+ tnext = t->next;
+ if (t->string) {
+ kl_free_block((void *)t->string);
+ }
+ kl_free_block((void *)t);
+ t = tnext;
+ }
+}
+
+/*
+ * process_text()
+ */
+static int
+process_text(char **str, token_t *tok)
+{
+ char *cp = *str;
+ char *s = NULL;
+ int len = 0;
+
+ /* Check and see if this token is a STRING or CHARACTER
+ * type (beginning with a single or double quote).
+ */
+ if (*cp == '\'') {
+ /* make sure that only a single character is between
+ * the single quotes (it can be an escaped character
+ * too).
+ */
+ s = strpbrk((cp + 1), "\'");
+ if (!s) {
+ set_eval_error(E_SINGLE_QUOTE);
+ error_token = tok->ptr;
+ return(1);
+ }
+ len = (uaddr_t)s - (uaddr_t)cp;
+ if ((*(cp+1) == '\\')) {
+ if (*(cp+2) == '0') {
+ long int val;
+ unsigned long uval;
+ char *ep;
+
+ uval = kl_strtoull((char*)(cp+2),
+ (char **)&ep, 8);
+ val = uval;
+ if ((val > 255) || (*ep != '\'')) {
+ set_eval_error(E_BAD_CHAR);
+ error_token = tok->ptr;
+ return(1);
+ }
+ } else if (*(cp+3) != '\'') {
+ set_eval_error(E_BAD_CHAR);
+ error_token = tok->ptr;
+ return(1);
+ }
+ tok->type = CHARACTER;
+ } else if (len == 2) {
+ tok->type = CHARACTER;
+ } else {
+
+ /* Treat as a single token entry. It's possible
+ * that what's between the single quotes is a
+ * type name. That will be determined later on.
+ */
+ tok->type = STRING;
+ }
+ *str = cp + len;
+ } else if (*cp == '\"') {
+ s = strpbrk((cp + 1), "\"");
+ if (!s) {
+ set_eval_error(E_BAD_STRING);
+ error_token = tok->ptr;
+ return(1);
+ }
+ len = (uaddr_t)s - (uaddr_t)cp;
+ tok->type = TEXT;
+ *str = cp + len;
+ }
+ if ((tok->type == STRING) || (tok->type == TEXT)) {
+
+ if ((tok->type == TEXT) && (strlen(cp) > (len + 1))) {
+
+ /* Check to see if there is a comma or semi-colon
+ * directly following the string. If there is,
+ * then the string is OK (the following characters
+ * are part of the next expression). Also, it's OK
+ * to have trailing blanks as long as that's all
+ * threre is.
+ */
+ char *c;
+
+ c = s + 1;
+ while (*c) {
+ if ((*c == ',') || (*c == ';')) {
+ break;
+ } else if (*c != ' ') {
+ set_eval_error(E_END_EXPECTED);
+ tok->ptr = c;
+ error_token = tok->ptr;
+ return(1);
+ }
+ c++;
+ }
+ /* Truncate the trailing blanks (they are not
+ * part of the string).
+ */
+ if (c != (s + 1)) {
+ *(s + 1) = 0;
+ }
+ }
+ tok->string = (char *)kl_alloc_block(len);
+ memcpy(tok->string, (cp + 1), len - 1);
+ tok->string[len - 1] = 0;
+ }
+ return(0);
+}
+
+/*
+ * get_token_list()
+ */
+static token_t *
+get_token_list(char *str)
+{
+ int paren_count = 0;
+ char *cp;
+ token_t *tok = (token_t*)NULL, *tok_head = (token_t*)NULL;
+ token_t *tok_last = (token_t*)NULL;
+
+ cp = str;
+ eval_error = 0;
+
+ while (*cp) {
+
+ /* Skip past any "white space" (spaces and tabs).
+ */
+ switch (*cp) {
+ case ' ' :
+ case '\t' :
+ case '`' :
+ cp++;
+ continue;
+ default :
+ break;
+ }
+
+ /* Allocate space for the next token */
+ tok = (token_t *)kl_alloc_block(sizeof(token_t));
+ tok->ptr = cp;
+
+ switch(*cp) {
+
+ /* Check for operators
+ */
+ case '+' :
+ if (*((char*)cp + 1) == '+') {
+
+ /* We aren't doing asignment here,
+ * so the ++ operator is not
+ * considered valid.
+ */
+ set_eval_error(E_BAD_OPERATOR);
+ error_token = tok_last->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ } else if (!tok_last ||
+ (tok_last->operator &&
+ (tok_last->operator != CLOSE_PAREN))) {
+ tok->operator = UNARY_PLUS;
+ } else {
+ tok->operator = ADD;
+ }
+ break;
+
+ case '-' :
+ if (*((char*)cp + 1) == '-') {
+
+ /* We aren't doing asignment here, so
+ * the -- operator is not considered
+ * valid.
+ */
+ set_eval_error(E_BAD_OPERATOR);
+ error_token = tok_last->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ } else if (*((char*)cp + 1) == '>') {
+ tok->operator = RIGHT_ARROW;
+ cp++;
+ } else if (!tok_last || (tok_last->operator &&
+ (tok_last->operator != CLOSE_PAREN))) {
+ tok->operator = UNARY_MINUS;
+ } else {
+ tok->operator = SUBTRACT;
+ }
+ break;
+
+ case '.' :
+ /* XXX - need to check to see if this is a
+ * decimal point in the middle fo a floating
+ * point value.
+ */
+ tok->operator = DOT;
+ break;
+
+ case '*' :
+ /* XXX - need a better way to tell if this is
+ * an INDIRECTION. perhaps check the next
+ * token?
+ */
+ if (!tok_last || (tok_last->operator &&
+ ((tok_last->operator != CLOSE_PAREN) &&
+ (tok_last->operator != CAST)))) {
+ tok->operator = INDIRECTION;
+ } else {
+ tok->operator = MULTIPLY;
+ }
+ break;
+
+ case '/' :
+ tok->operator = DIVIDE;
+ break;
+
+ case '%' :
+ tok->operator = MODULUS;
+ break;
+
+ case '(' : {
+ char *s, *s1, *s2;
+ int len;
+
+ /* Make sure the previous token is an operator
+ */
+ if (tok_last && !tok_last->operator) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = tok_last->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+
+ if (tok_last &&
+ ((tok_last->operator == RIGHT_ARROW) ||
+ (tok_last->operator == DOT))) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = tok_last->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+
+ /* Check here to see if following tokens
+ * constitute a cast.
+ */
+
+ /* Skip past any "white space" (spaces
+ * and tabs)
+ */
+ while ((*(cp+1) == ' ') || (*(cp+1) == '\t')) {
+ cp++;
+ }
+ if ((*(cp+1) == '(') || isdigit(*(cp+1)) ||
+ (*(cp+1) == '+') || (*(cp+1) == '-') ||
+ (*(cp+1) == '*') || (*(cp+1) == '&') ||
+ (*(cp+1) == ')')){
+ tok->operator = OPEN_PAREN;
+ paren_count++;
+ break;
+ }
+
+ /* Make sure we have a CLOSE_PAREN.
+ */
+ if (!(s1 = strchr(cp+1, ')'))) {
+ set_eval_error(E_OPEN_PAREN);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+ /* Check to see if this is NOT a simple
+ * typecast.
+ */
+ if (!(s2 = strchr(cp+1, '.'))) {
+ s2 = strstr(cp+1, "->");
+ }
+ if (s2 && (s2 < s1)) {
+ tok->operator = OPEN_PAREN;
+ paren_count++;
+ break;
+ }
+
+ if ((s = strpbrk(cp+1, "*)"))) {
+ char str[128];
+
+ len = (uaddr_t)s - (uaddr_t)(cp+1);
+ strncpy(str, cp+1, len);
+ str[len] = 0;
+ if (!is_typestr(str)) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+ if (!(s = strpbrk((cp+1), ")"))) {
+ set_eval_error(E_OPEN_PAREN);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+ len = (uaddr_t)s - (uaddr_t)(cp+1);
+ tok->string = (char *)
+ kl_alloc_block(len + 1);
+ memcpy(tok->string, (cp+1), len);
+ tok->string[len] = 0;
+ tok->operator = CAST;
+ cp = (char *)((uaddr_t)(cp+1) + len);
+ break;
+ }
+ tok->operator = OPEN_PAREN;
+ paren_count++;
+ break;
+ }
+
+ case ')' :
+ if (tok_last && ((tok_last->operator ==
+ RIGHT_ARROW) ||
+ (tok_last->operator == DOT))) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = tok_last->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return ((token_t*)NULL);
+ }
+ tok->operator = CLOSE_PAREN;
+ paren_count--;
+ break;
+
+ case '&' :
+ if (*((char*)cp + 1) == '&') {
+ tok->operator = LOGICAL_AND;
+ cp++;
+ } else if (!tok_last || (tok_last &&
+ (tok_last->operator &&
+ tok_last->operator !=
+ CLOSE_PAREN))) {
+ tok->operator = ADDRESS;
+ } else {
+ tok->operator = BITWISE_AND;
+ }
+ break;
+
+ case '|' :
+ if (*((char*)cp + 1) == '|') {
+ tok->operator = LOGICAL_OR;
+ cp++;
+ } else {
+ tok->operator = BITWISE_OR;
+ }
+ break;
+
+ case '=' :
+ if (*((char*)cp + 1) == '=') {
+ tok->operator = EQUAL;
+ cp++;
+ } else {
+ /* ASIGNMENT -- NOT IMPLEMENTED
+ */
+ tok->operator = NOT_YET;
+ }
+ break;
+
+ case '<' :
+ if (*((char*)cp + 1) == '<') {
+ tok->operator = LEFT_SHIFT;
+ cp++;
+ } else if (*((char*)cp + 1) == '=') {
+ tok->operator = LESS_THAN_OR_EQUAL;
+ cp++;
+ } else {
+ tok->operator = LESS_THAN;
+ }
+ break;
+
+ case '>' :
+ if (*((char*)(cp + 1)) == '>') {
+ tok->operator = RIGHT_SHIFT;
+ cp++;
+ } else if (*((char*)cp + 1) == '=') {
+ tok->operator = GREATER_THAN_OR_EQUAL;
+ cp++;
+ } else {
+ tok->operator = GREATER_THAN;
+ }
+ break;
+
+ case '!' :
+ if (*((char*)cp + 1) == '=') {
+ tok->operator = NOT_EQUAL;
+ cp++;
+ } else {
+ tok->operator = LOGICAL_NEGATION;
+ }
+ break;
+
+ case '$' :
+ set_eval_error(E_NOT_IMPLEMENTED);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return((token_t*)NULL);
+ case '~' :
+ tok->operator = ONES_COMPLEMENT;
+ break;
+
+ case '^' :
+ tok->operator = BITWISE_EXCLUSIVE_OR;
+ break;
+
+ case '?' :
+ set_eval_error(E_NOT_IMPLEMENTED);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return((token_t*)NULL);
+ case ':' :
+ set_eval_error(E_NOT_IMPLEMENTED);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return((token_t*)NULL);
+ case '[' :
+ tok->operator = OPEN_SQUARE_BRACKET;;
+ break;
+
+ case ']' :
+ tok->operator = CLOSE_SQUARE_BRACKET;;
+ break;
+
+ default: {
+
+ char *s;
+ int len;
+
+ /* See if the last token is a RIGHT_ARROW
+ * or a DOT. If it is, then this token must
+ * be the name of a struct/union member.
+ */
+ if (tok_last &&
+ ((tok_last->operator == RIGHT_ARROW) ||
+ (tok_last->operator == DOT))) {
+ tok->type = MEMBER;
+ } else if (process_text(&cp, tok)) {
+ free_tokens(tok_head);
+ free_tokens(tok);
+ return((token_t*)NULL);
+ }
+ if (tok->type == TEXT) {
+ return(tok);
+ } else if (tok->type == STRING) {
+ if (is_typestr(tok->string)) {
+ tok->type = TYPE_DEF;
+ } else {
+ tok->operator = TEXT;
+ return(tok);
+ }
+ break;
+ } else if (tok->type == CHARACTER) {
+ break;
+ }
+
+ /* Check and See if the entire string is
+ * a typename (valid only for whatis case).
+ */
+ s = strpbrk(cp,
+ ".\t+-*/()[]|~!$&%^<>?:&=^\"\'");
+ if (!s && !tok->type && is_typestr(cp)) {
+ tok->type = TYPE_DEF;
+ len = strlen(cp) + 1;
+ tok->string = (char *)
+ kl_alloc_block(len);
+ memcpy(tok->string, cp, len - 1);
+ tok->string[len - 1] = 0;
+ cp = (char *)((uaddr_t)cp + len - 2);
+ break;
+ }
+
+ /* Now check for everything else
+ */
+ if ((s = strpbrk(cp,
+ " .\t+-*/()[]|~!$&%^<>?:&=^\"\'"))) {
+ len = (uaddr_t)s - (uaddr_t)cp + 1;
+ } else {
+ len = strlen(cp) + 1;
+ }
+
+ tok->string =
+ (char *)kl_alloc_block(len);
+ memcpy(tok->string, cp, len - 1);
+ tok->string[len - 1] = 0;
+
+ cp = (char *)((uaddr_t)cp + len - 2);
+
+ /* Check to see if this is the keyword
+ * "sizeof". If not, then check to see if
+ * the string is a member name.
+ */
+ if (!strcmp(tok->string, "sizeof")) {
+ tok->operator = SIZEOF;
+ kl_free_block((void *)tok->string);
+ tok->string = 0;
+ } else if (tok_last &&
+ ((tok_last->operator == RIGHT_ARROW) ||
+ (tok_last->operator == DOT))) {
+ tok->type = MEMBER;
+ } else {
+ tok->type = STRING;
+ }
+ break;
+ }
+ }
+ if (!(tok->type)) {
+ tok->type = OPERATOR;
+ }
+ if (!tok_head) {
+ tok_head = tok_last = tok;
+ } else {
+ tok_last->next = tok;
+ tok_last = tok;
+ }
+ cp++;
+ }
+ if (paren_count < 0) {
+ set_eval_error(E_CLOSE_PAREN);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ return((token_t*)NULL);
+ } else if (paren_count > 0) {
+ set_eval_error(E_OPEN_PAREN);
+ error_token = tok->ptr;
+ free_tokens(tok_head);
+ return((token_t*)NULL);
+ }
+ return(tok_head);
+}
+
+/*
+ * valid_binary_args()
+ */
+int
+valid_binary_args(node_t *np, node_t *left, node_t *right)
+{
+ int op = np->operator;
+
+ if ((op == RIGHT_ARROW) || (op == DOT)) {
+ if (!left) {
+ set_eval_error(E_MISSING_STRUCTURE);
+ error_token = np->tok_ptr;
+ return(0);
+ } else if (!(left->node_type == TYPE_DEF) &&
+ !(left->node_type == MEMBER) &&
+ !(left->operator == CLOSE_PAREN) &&
+ !(left->operator == CLOSE_SQUARE_BRACKET)) {
+ set_eval_error(E_BAD_STRUCTURE);
+ error_token = left->tok_ptr;
+ return(0);
+ }
+ if (!right || (!(right->node_type == MEMBER))) {
+ set_eval_error(E_BAD_MEMBER);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ return(1);
+ }
+ if (!left || !right) {
+ set_eval_error(E_MISSING_OPERAND);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ switch (left->operator) {
+ case CLOSE_PAREN:
+ case CLOSE_SQUARE_BRACKET:
+ break;
+ default:
+ switch(left->node_type) {
+ case NUMBER:
+ case STRING:
+ case TEXT:
+ case CHARACTER:
+ case EVAL_VAR:
+ case MEMBER:
+ break;
+ default:
+ set_eval_error(E_BAD_OPERAND);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ }
+ switch (right->operator) {
+ case OPEN_PAREN:
+ break;
+ default:
+ switch(right->node_type) {
+ case NUMBER:
+ case STRING:
+ case TEXT:
+ case CHARACTER:
+ case EVAL_VAR:
+ case MEMBER:
+ break;
+ default:
+ set_eval_error(E_BAD_OPERAND);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ }
+ return(1);
+}
+
+/*
+ * get_node_list()
+ */
+static node_t *
+get_node_list(token_t *tp, int flags)
+{
+ node_t *root = (node_t *)NULL;
+ node_t *np = (node_t *)NULL;
+ node_t *last = (node_t *)NULL;
+
+ /* Loop through the tokens and convert them to nodes.
+ */
+ while (tp) {
+ np = make_node(tp, flags);
+ if (eval_error) {
+ return((node_t *)NULL);
+ }
+ if (root) {
+ last->next = np;
+ last = np;
+ } else {
+ root = last = np;
+ }
+ tp = tp->next;
+ }
+ last->next = (node_t *)NULL; /* cpw patch */
+ last = (node_t *)NULL;
+ for (np = root; np; np = np->next) {
+ if (is_binary(np->operator)) {
+ if (!valid_binary_args(np, last, np->next)) {
+ free_nodelist(root);
+ return((node_t *)NULL);
+ }
+ }
+ last = np;
+ }
+ return(root);
+}
+
+/*
+ * next_node()
+ */
+static node_t *
+next_node(void)
+{
+ node_t *np;
+ if ((np = node_list)) {
+ node_list = node_list->next;
+ np->next = (node_t*)NULL;
+ }
+ return(np);
+}
+
+/*
+ * eval_unary()
+ */
+static node_t *
+eval_unary(node_t *curnp, int flags)
+{
+ node_t *n0, *n1;
+
+ n0 = curnp;
+
+ /* Peek ahead and make sure there is a next node.
+ * Also check to see if the next node requires
+ * a recursive call to do_eval(). If it does, we'll
+ * let the do_eval() call take care of pulling it
+ * off the list.
+ */
+ if (!node_list) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ if (n0->operator == CAST) {
+ if (node_list->operator == CLOSE_PAREN) {
+
+ /* Free the CLOSE_PAREN and return
+ */
+ free_node(next_node());
+ return(n0);
+ }
+ if (!(node_list->node_type == NUMBER) &&
+ !(node_list->node_type == VADDR) &&
+ !((node_list->operator == ADDRESS) ||
+ (node_list->operator == CAST) ||
+ (node_list->operator == UNARY_MINUS) ||
+ (node_list->operator == UNARY_PLUS) ||
+ (node_list->operator == INDIRECTION) ||
+ (node_list->operator == OPEN_PAREN))) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = node_list->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ }
+ if ((n0->operator == INDIRECTION) ||
+ (n0->operator == ADDRESS) ||
+ (n0->operator == OPEN_PAREN) ||
+ is_unary(node_list->operator)) {
+ n1 = do_eval(flags);
+ if (eval_error) {
+ free_nodes(n0);
+ free_nodes(n1);
+ return((node_t*)NULL);
+ }
+ } else {
+ n1 = next_node();
+ }
+
+ if (n1->operator == OPEN_PAREN) {
+ /* Get the value contained within the parenthesis.
+ * If there was an error, just return.
+ */
+ free_node(n1);
+ n1 = do_eval(flags);
+ if (eval_error) {
+ free_nodes(n1);
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ }
+
+ n0->right = n1;
+ if (replace_unary(n0, flags) == -1) {
+ if (!eval_error) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n0->tok_ptr;
+ }
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ return(n0);
+}
+
+/*
+ * do_eval() -- Reduces an equation to a single value.
+ *
+ * Any parenthesis (and nested parenthesis) within the equation will
+ * be solved first via recursive calls to do_eval().
+ */
+static node_t *
+do_eval(int flags)
+{
+ node_t *root = (node_t*)NULL, *curnp, *n0, *n1;
+
+ /* Loop through the list of nodes until we run out of nodes
+ * or we hit a CLOSE_PAREN. If we hit an OPEN_PAREN, make a
+ * recursive call to do_eval().
+ */
+ curnp = next_node();
+ while (curnp) {
+ n0 = n1 = (node_t *)NULL;
+
+ if (curnp->operator == OPEN_PAREN) {
+ /* Get the value contained within the parenthesis.
+ * If there was an error, just return.
+ */
+ free_node(curnp);
+ n0 = do_eval(flags);
+ if (eval_error) {
+ free_nodes(n0);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ } else if (curnp->operator == SIZEOF) {
+ /* Free the SIZEOF node and then make a call
+ * to the get_sizeof() function (which will
+ * get the next node off the list).
+ */
+ n0 = get_sizeof();
+ if (eval_error) {
+ if (!error_token) {
+ error_token = curnp->tok_ptr;
+ }
+ free_node(curnp);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ free_node(curnp);
+ curnp = (node_t *)NULL;
+ } else if (is_unary(curnp->operator)) {
+ n0 = eval_unary(curnp, flags);
+ } else {
+ n0 = curnp;
+ curnp = (node_t *)NULL;
+ }
+ if (eval_error) {
+ free_nodes(n0);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ /* n0 should now contain a non-operator node. Check to see if
+ * there is a next token. If there isn't, just add the last
+ * rchild and return.
+ */
+ if (!node_list) {
+ if (root) {
+ add_rchild(root, n0);
+ } else {
+ root = n0;
+ }
+ replace(root, flags);
+ if (eval_error) {
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ return(root);
+ }
+
+ /* Make sure the next token is an operator.
+ */
+ if (!node_list->operator) {
+ free_nodes(root);
+ free_node(n0);
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = node_list->tok_ptr;
+ return((node_t *)NULL);
+ } else if ((node_list->operator == CLOSE_PAREN) ||
+ (node_list->operator == CLOSE_SQUARE_BRACKET)) {
+
+ if (root) {
+ add_rchild(root, n0);
+ } else {
+ root = n0;
+ }
+
+ /* Reduce the resulting tree to a single value
+ */
+ replace(root, flags);
+ if (eval_error) {
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ /* Step over the CLOSE_PAREN or CLOSE_SQUARE_BRACKET
+ * and then return.
+ */
+ free_node(next_node());
+ return(root);
+ } else if (node_list->operator == OPEN_SQUARE_BRACKET) {
+next_dimension1:
+ /* skip over the OPEN_SQUARE_BRACKET token
+ */
+ free_node(next_node());
+
+ /* Get the value contained within the brackets. This
+ * value must represent an array index (value or
+ * equation).
+ */
+ n1 = do_eval(0);
+ if (eval_error) {
+ free_nodes(root);
+ free_node(n0);
+ free_node(n1);
+ return((node_t *)NULL);
+ }
+
+ /* Convert the array (or pointer type) to an
+ * element type using the index value obtained
+ * above. Make sure that n0 contains some sort
+ * of type definition first, however.
+ */
+ if (n0->node_type != TYPE_DEF) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ free_nodes(n1);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ array_to_element(n0, n1);
+ free_node(n1);
+ if (eval_error) {
+ free_nodes(root);
+ free_nodes(n0);
+ return((node_t *)NULL);
+ }
+
+ /* If there aren't any more nodes, just
+ * return.
+ */
+ if (!node_list) {
+ return(n0);
+ }
+ if (node_list->operator == OPEN_SQUARE_BRACKET) {
+ goto next_dimension1;
+ }
+ } else if (!is_binary(node_list->operator)) {
+ set_eval_error(E_BAD_OPERATOR);
+ error_token = node_list->tok_ptr;
+ free_nodes(root);
+ free_nodes(n0);
+ return((node_t *)NULL);
+ }
+
+ /* Now get the operator node
+ */
+ if (!(n1 = next_node())) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ /* Check to see if this binary operator is RIGHT_ARROW or DOT.
+ * If it is, we need to reduce it to a single value node now.
+ */
+ while ((n1->operator == RIGHT_ARROW) || (n1->operator == DOT)) {
+
+ /* The next node must contain the name of the
+ * struct|union member.
+ */
+ if (!node_list || (node_list->node_type != MEMBER)) {
+ set_eval_error(E_BAD_MEMBER);
+ error_token = n1->tok_ptr;
+ free_nodes(n0);
+ free_nodes(n1);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ n1->left = n0;
+
+ /* Now get the next node and link it as the
+ * right child.
+ */
+ if (!(n0 = next_node())) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n1->tok_ptr;
+ free_nodes(n1);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ n1->right = n0;
+ if (!(n0 = replace(n1, flags))) {
+ if (!(eval_error)) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n1->tok_ptr;
+ }
+ free_nodes(n1);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ n1 = (node_t *)NULL;
+
+ /* Check to see if there is a next node. If there
+ * is, check to see if it is the operator CLOSE_PAREN.
+ * If it is, then return (skipping over the
+ * CLOSE_PAREN first).
+ */
+ if (node_list && ((node_list->operator == CLOSE_PAREN)
+ || (node_list->operator ==
+ CLOSE_SQUARE_BRACKET))) {
+ if (root) {
+ add_rchild(root, n0);
+ } else {
+ root = n0;
+ }
+
+ /* Reduce the resulting tree to a single
+ * value
+ */
+ replace(root, flags);
+ if (eval_error) {
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ /* Advance the token pointer past the
+ * CLOSE_PAREN and then return.
+ */
+ free_node(next_node());
+ return(root);
+ }
+
+ /* Check to see if the next node is an
+ * OPEN_SQUARE_BRACKET. If it is, then we have to
+ * reduce the contents of the square brackets to
+ * an index array.
+ */
+ if (node_list && (node_list->operator
+ == OPEN_SQUARE_BRACKET)) {
+
+ /* Advance the token pointer and call
+ * do_eval() again.
+ */
+ free_node(next_node());
+next_dimension2:
+ n1 = do_eval(0);
+ if (eval_error) {
+ free_node(n0);
+ free_node(n1);
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+
+ /* Convert the array (or pointer type) to
+ * an element type using the index value
+ * obtained above. Make sure that n0
+ * contains some sort of type definition
+ * first, however.
+ */
+ if (n0->node_type != TYPE_DEF) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n0->tok_ptr;
+ free_node(n0);
+ free_node(n1);
+ free_node(root);
+ return((node_t *)NULL);
+ }
+ array_to_element(n0, n1);
+ free_node(n1);
+ if (eval_error) {
+ free_node(n0);
+ free_node(root);
+ return((node_t *)NULL);
+ }
+ }
+
+ /* Now get the next operator node (if there is one).
+ */
+ if (!node_list) {
+ if (root) {
+ add_rchild(root, n0);
+ } else {
+ root = n0;
+ }
+ return(root);
+ }
+ n1 = next_node();
+ if (n1->operator == OPEN_SQUARE_BRACKET) {
+ goto next_dimension2;
+ }
+ }
+
+ if (n1 && ((n1->operator == CLOSE_PAREN) ||
+ (n1->operator == CLOSE_SQUARE_BRACKET))) {
+ free_node(n1);
+ if (root) {
+ add_rchild(root, n0);
+ } else {
+ root = n0;
+ }
+ replace(root, flags);
+ if (eval_error) {
+ free_nodes(root);
+ return((node_t *)NULL);
+ }
+ return(root);
+ }
+
+ if (!root) {
+ root = n1;
+ n1->left = n0;
+ } else if (precedence(root->operator)
+ >= precedence(n1->operator)) {
+ add_rchild(root, n0);
+ n1->left = root;
+ root = n1;
+ } else {
+ if (!root->right) {
+ n1->left = n0;
+ root->right = n1;
+ } else {
+ add_rchild(root, n0);
+ n1->left = root->right;
+ root->right = n1;
+ }
+ }
+ curnp = next_node();
+ } /* while(curnp) */
+ return(root);
+}
+
+/*
+ * is_unary()
+ */
+static int
+is_unary(int op)
+{
+ switch (op) {
+ case LOGICAL_NEGATION :
+ case ADDRESS :
+ case INDIRECTION :
+ case UNARY_MINUS :
+ case UNARY_PLUS :
+ case ONES_COMPLEMENT :
+ case CAST :
+ return(1);
+
+ default :
+ return(0);
+ }
+}
+
+
+/*
+ * is_binary()
+ */
+static int
+is_binary(int op)
+{
+ switch (op) {
+
+ case BITWISE_OR :
+ case BITWISE_EXCLUSIVE_OR :
+ case BITWISE_AND :
+ case RIGHT_SHIFT :
+ case LEFT_SHIFT :
+ case ADD :
+ case SUBTRACT :
+ case MULTIPLY :
+ case DIVIDE :
+ case MODULUS :
+ case LOGICAL_OR :
+ case LOGICAL_AND :
+ case EQUAL :
+ case NOT_EQUAL :
+ case LESS_THAN :
+ case GREATER_THAN :
+ case LESS_THAN_OR_EQUAL :
+ case GREATER_THAN_OR_EQUAL :
+ case RIGHT_ARROW :
+ case DOT :
+ return(1);
+
+ default :
+ return(0);
+ }
+}
+
+/*
+ * precedence()
+ */
+static int
+precedence(int a)
+{
+ if ((a >= CONDITIONAL) && (a <= CONDITIONAL_ELSE)) {
+ return(1);
+ } else if (a == LOGICAL_OR) {
+ return(2);
+ } else if (a == LOGICAL_AND) {
+ return(3);
+ } else if (a == BITWISE_OR) {
+ return(4);
+ } else if (a == BITWISE_EXCLUSIVE_OR) {
+ return(5);
+ } else if (a == BITWISE_AND) {
+ return(6);
+ } else if ((a >= EQUAL) && (a <= NOT_EQUAL)) {
+ return(7);
+ } else if ((a >= LESS_THAN) && (a <= GREATER_THAN_OR_EQUAL)) {
+ return(8);
+ } else if ((a >= RIGHT_SHIFT) && (a <= LEFT_SHIFT)) {
+ return(9);
+ } else if ((a >= ADD) && (a <= SUBTRACT)) {
+ return(10);
+ } else if ((a >= MULTIPLY) && (a <= MODULUS)) {
+ return(11);
+ } else if ((a >= LOGICAL_NEGATION) && (a <= SIZEOF)) {
+ return(12);
+ } else if ((a >= RIGHT_ARROW) && (a <= DOT)) {
+ return(13);
+ } else {
+ return(0);
+ }
+}
+
+/*
+ * esc_char()
+ */
+char
+esc_char(char *str)
+{
+ long int val;
+ unsigned long uval;
+ char ch;
+
+ if (strlen(str) > 1) {
+ uval = kl_strtoull(str, (char **)NULL, 8);
+ val = uval;
+ ch = (char)val;
+ } else {
+ ch = str[0];
+ }
+ switch (ch) {
+ case 'a' :
+ return((char)7);
+ case 'b' :
+ return((char)8);
+ case 't' :
+ return((char)9);
+ case 'n' :
+ return((char)10);
+ case 'f' :
+ return((char)12);
+ case 'r' :
+ return((char)13);
+ case 'e' :
+ return((char)27);
+ default:
+ return(ch);
+ }
+}
+
+/*
+ * make_node()
+ */
+static node_t *
+make_node(token_t *t, int flags)
+{
+ node_t *np;
+
+ set_eval_error(0);
+ np = (node_t*)kl_alloc_block(sizeof(*np));
+
+ if (t->type == OPERATOR) {
+
+ /* Check to see if this token represents a typecast
+ */
+ if (t->operator == CAST) {
+ type_t *tp;
+
+ if (!(np->type = get_type(t->string, flags))) {
+ set_eval_error(E_BAD_CAST);
+ error_token = t->ptr;
+ free_nodes(np);
+ return((node_t*)NULL);
+ }
+
+ /* Determin if this is a pointer to a type
+ */
+ tp = np->type;
+ if (tp->flag == POINTER_FLAG) {
+ np->flags = POINTER_FLAG;
+ tp = tp->t_next;
+ while (tp->flag == POINTER_FLAG) {
+ tp = tp->t_next;
+ }
+ }
+ switch(tp->flag) {
+ case KLTYPE_FLAG:
+ np->flags |= KLTYPE_FLAG;
+ break;
+
+ default:
+ free_nodes(np);
+ set_eval_error(E_BAD_CAST);
+ error_token = t->ptr;
+ return((node_t*)NULL);
+ }
+ if (!t->next) {
+ if (flags & C_WHATIS) {
+ np->node_type = TYPE_DEF;
+ } else {
+ set_eval_error(E_BAD_CAST);
+ error_token = t->ptr;
+ return((node_t*)NULL);
+ }
+ } else {
+ np->node_type = OPERATOR;
+ np->operator = CAST;
+ }
+ } else {
+ np->node_type = OPERATOR;
+ np->operator = t->operator;
+ }
+ } else if (t->type == MEMBER) {
+ np->name = (char *)dup_block((void *)t->string, strlen(t->string)+1);
+ np->node_type = MEMBER;
+ } else if ((t->type == STRING) || (t->type == TYPE_DEF)) {
+ syment_t *sp;
+ dbg_sym_t *stp;
+ dbg_type_t *sttp;
+
+ if ((sp = kl_lkup_symname(t->string))) {
+ if (!(flags & C_NOVARS)) {
+ int has_type = 0;
+
+ /* The string is a symbol name. We'll treat it as
+ * a global kernel variable and, at least, gather in
+ * the address of the symbol and the value it points
+ * to.
+ */
+ np->address = sp->s_addr;
+ np->flags |= ADDRESS_FLAG;
+ np->name = t->string;
+ t->string = (char*)NULL;
+
+ /* Need to see if there is type information available
+ * for this variable. Since this mapping is not
+ * available yet, we will just attach a type struct
+ * for either uint32_t or uint64_t (depending on the
+ * size of a kernel pointer). That will at least let
+ * us do something and will prevent the scenario where
+ * we have a type node with out a pointer to a type
+ * struct!
+ */
+ np->node_type = TYPE_DEF;
+ np->flags |= KLTYPE_FLAG;
+ np->value = *((kaddr_t *)np->address);
+ /* try to get the actual type info for the variable */
+ if(((stp = dbg_find_sym(sp->s_name, DBG_VAR,
+ (uint64_t)0)) != NULL)){
+ if((sttp = (dbg_type_t *)
+ kl_find_typenum(stp->sym_typenum))
+ != NULL){
+ /* kl_get_typestring(sttp); */
+ has_type = 1;
+ if(sttp->st_klt.kl_type == KLT_POINTER){
+ np->flags ^= KLTYPE_FLAG;
+ np->flags |= POINTER_FLAG;
+ np->type =
+ get_type(sttp->st_typestr,
+ flags);
+ } else {
+ np->type =
+ kl_alloc_block(sizeof(type_t));
+ np->type->un.kltp =
+ &sttp->st_klt;
+ }
+ }
+ }
+ /* no type info for the variable found */
+ if(!has_type){
+ if (ptrsz64) {
+ np->type = get_type("uint64_t", flags);
+ } else {
+ np->type = get_type("uint32_t", flags);
+ }
+ }
+ }
+ kl_free_block((void *)sp);
+ } else if (flags & (C_WHATIS|C_SIZEOF)) {
+
+ kltype_t *kltp;
+
+ if ((kltp = kl_find_type(t->string, KLT_TYPES))) {
+
+ np->node_type = TYPE_DEF;
+ np->flags = KLTYPE_FLAG;
+ np->type = (type_t*)
+ kl_alloc_block(sizeof(type_t));
+ np->type->flag = KLTYPE_FLAG;
+ np->type->t_kltp = kltp;
+ } else {
+ if (get_value(t->string,
+ (uint64_t *)&np->value)) {
+ set_eval_error(E_BAD_VALUE);
+ error_token = t->ptr;
+ free_nodes(np);
+ return((node_t*)NULL);
+ }
+ if (!strncmp(t->string, "0x", 2) ||
+ !strncmp(t->string, "0X", 2)) {
+ np->flags |= UNSIGNED_FLAG;
+ }
+ np->node_type = NUMBER;
+ }
+ np->tok_ptr = t->ptr;
+ return(np);
+ } else {
+ if (get_value(t->string, (uint64_t *)&np->value)) {
+ set_eval_error(E_BAD_VALUE);
+ error_token = t->ptr;
+ free_nodes(np);
+ return((node_t*)NULL);
+ }
+ if (np->value > 0xffffffff) {
+ np->byte_size = 8;
+ } else {
+ np->byte_size = 4;
+ }
+ if (!strncmp(t->string, "0x", 2) ||
+ !strncmp(t->string, "0X", 2)) {
+ np->flags |= UNSIGNED_FLAG;
+ }
+ np->node_type = NUMBER;
+ }
+ } else if (t->type == CHARACTER) {
+ char *cp;
+
+ /* Step over the single quote
+ */
+ cp = (t->ptr + 1);
+ if (*cp == '\\') {
+ int i = 0;
+ char str[16];
+
+ /* Step over the back slash
+ */
+ cp++;
+ while (*cp != '\'') {
+ str[i++] = *cp++;
+ }
+ str[i] = 0;
+ np->value = esc_char(str);
+ } else {
+ np->value = *cp;
+ }
+ np->type = get_type("char", flags);
+ np->node_type = TYPE_DEF;
+ np->flags |= KLTYPE_FLAG;
+ } else if (t->type == TEXT) {
+ np->node_type = TEXT;
+ np->name = t->string;
+ /* So the block doesn't get freed twice */
+ t->string = (char*)NULL;
+ } else {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = t->ptr;
+ return((node_t*)NULL);
+ }
+ np->tok_ptr = t->ptr;
+ return(np);
+}
+
+/*
+ * add_node()
+ */
+static int
+add_node(node_t *root, node_t *new_node)
+{
+ node_t *n = root;
+
+ /* Find the most lower-right node
+ */
+ while (n->right) {
+ n = n->right;
+ }
+
+ /* If the node we found is a leaf node, return an error (we will
+ * have to insert the node instead).
+ */
+ if (n->node_type == NUMBER) {
+ return(-1);
+ } else {
+ n->right = new_node;
+ }
+ return(0);
+}
+
+/*
+ * add_rchild()
+ */
+static int
+add_rchild(node_t *root, node_t *new_node)
+{
+ if (add_node(root, new_node) == -1) {
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * free_type()
+ */
+static void
+free_type(type_t *head)
+{
+ type_t *t0, *t1;
+
+ t0 = head;
+ while(t0) {
+ if (t0->flag == POINTER_FLAG) {
+ t1 = t0->t_next;
+ kl_free_block((void *)t0);
+ t0 = t1;
+ } else {
+ if (t0->flag != KLTYPE_FLAG) {
+ kl_free_block((void *)t0->t_kltp);
+ }
+ kl_free_block((void *)t0);
+ t0 = (type_t *)NULL;
+ }
+ }
+ return;
+}
+
+/*
+ * get_type() -- Convert a typecast string into a type.
+ *
+ * Returns a pointer to a struct containing type information.
+ * The type of struct returned is indicated by the contents
+ * of type. If the typecast contains an asterisk, set ptr_type
+ * equal to one, otherwise set it equal to zero.
+ */
+static type_t *
+get_type(char *s, int flags)
+{
+ int len, type = 0;
+ char *cp, typename[128];
+ type_t *t, *head, *last;
+ kltype_t *kltp;
+
+ head = last = (type_t *)NULL;
+
+ /* Get the type string
+ */
+ if (!strncmp(s, "struct", 6)) {
+ if ((cp = strpbrk(s + 7, " \t*"))) {
+ len = cp - (s + 7);
+ } else {
+ len = strlen(s + 7);
+ }
+ memcpy(typename, s + 7, len);
+ } else if (!strncmp(s, "union", 5)) {
+ if ((cp = strpbrk(s + 6, " \t*"))) {
+ len = cp - (s + 6);
+ } else {
+ len = strlen(s + 6);
+ }
+ memcpy(typename, s + 6, len);
+ } else {
+ if ((cp = strpbrk(s, "*)"))) {
+ len = cp - s;
+ } else {
+ len = strlen(s);
+ }
+ memcpy(typename, s, len);
+ }
+
+ /* Strip off any trailing spaces
+ */
+ while (len && ((typename[len - 1] == ' ') ||
+ (typename[len - 1] == '\t'))) {
+ len--;
+ }
+ typename[len] = 0;
+
+ if (!(kltp = kl_find_type(typename, KLT_TYPES))) {
+ return ((type_t *)NULL);
+ }
+ type = KLTYPE_FLAG;
+
+ /* check to see if this cast is a pointer to a type, a pointer
+ * to a pointer to a type, etc.
+ */
+ cp = s;
+ while ((cp = strpbrk(cp, "*"))) {
+ t = (type_t *)kl_alloc_block(sizeof(type_t));
+ t->flag = POINTER_FLAG;
+ if (last) {
+ last->t_next = t;
+ last = t;
+ } else {
+ head = last = t;
+ }
+ cp++;
+ }
+
+ /* Allocate a type block that will point to the type specific
+ * record.
+ */
+ t = (type_t *)kl_alloc_block(sizeof(type_t));
+ t->flag = type;
+
+ switch (t->flag) {
+
+ case KLTYPE_FLAG:
+ t->t_kltp = kltp;
+ break;
+
+ default:
+ free_type(head);
+ return((type_t*)NULL);
+ }
+ if (last) {
+ last->t_next = t;
+ } else {
+ head = t;
+ }
+ return(head);
+}
+
+/*
+ * free_node()
+ */
+static void
+free_node(node_t *np)
+{
+ /* If there is nothing to free, just return.
+ */
+ if (!np) {
+ return;
+ }
+ if (np->name) {
+ kl_free_block((void *)np->name);
+ }
+ free_type(np->type);
+ kl_free_block((void *)np);
+}
+
+/*
+ * free_nodes()
+ */
+void
+free_nodes(node_t *np)
+{
+ node_t *q;
+
+ /* If there is nothing to free, just return.
+ */
+ if (!np) {
+ return;
+ }
+ if ((q = np->left)) {
+ free_nodes(q);
+ }
+ if ((q = np->right)) {
+ free_nodes(q);
+ }
+ if (np->name) {
+ kl_free_block((void *)np->name);
+ }
+ free_type(np->type);
+ kl_free_block((void *)np);
+}
+
+/*
+ * free_nodelist()
+ */
+static void
+free_nodelist(node_t *np)
+{
+ node_t *nnp;
+
+ while(np) {
+ nnp = np->next;
+ free_node(np);
+ np = nnp;
+ }
+}
+
+extern int alloc_debug;
+
+/*
+ * free_eval_memory()
+ */
+void
+free_eval_memory(void)
+{
+ free_nodelist(node_list);
+ node_list = (node_t*)NULL;
+}
+
+/*
+ * get_sizeof()
+ */
+static node_t *
+get_sizeof()
+{
+ node_t *curnp, *n0 = NULL;
+
+ if (!(curnp = next_node())) {
+ set_eval_error(E_SYNTAX_ERROR);
+ return((node_t*)NULL);
+ }
+
+ /* The next token should be a CAST or an open paren.
+ * If it's something else, then return an error.
+ */
+ if (curnp->operator == OPEN_PAREN) {
+ free_nodes(curnp);
+ n0 = do_eval(C_SIZEOF);
+ if (eval_error) {
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ } else if (curnp->operator == CAST) {
+ n0 = curnp;
+ } else {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+
+ if (!n0->type) {
+ set_eval_error(E_NOTYPE);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+
+ if (n0->type->flag & POINTER_FLAG) {
+ n0->value = sizeof(void *);
+ } else if (n0->type->flag & KLTYPE_FLAG) {
+ kltype_t *kltp;
+
+ kltp = kl_realtype(n0->type->t_kltp, 0);
+
+ if (kltp->kl_bit_size) {
+ n0->value = kltp->kl_bit_size / 8;
+ if (kltp->kl_bit_size % 8) {
+ n0->value += 1;
+ }
+ } else {
+ n0->value = kltp->kl_size;
+ }
+ } else {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n0->tok_ptr;
+ free_nodes(n0);
+ return((node_t*)NULL);
+ }
+ n0->node_type = NUMBER;
+ n0->flags = 0;
+ n0->operator = 0;
+ n0->byte_size = 0;
+ n0->address = 0;
+ if (n0->type) {
+ free_type(n0->type);
+ n0->type = 0;
+ }
+ return(n0);
+}
+
+/*
+ * apply_unary()
+ */
+static int
+apply_unary(node_t *n, uint64_t *value)
+{
+ if (!n || !n->right) {
+ return(-1);
+ }
+
+ switch (n->operator) {
+
+ case UNARY_MINUS :
+ *value = (0 - n->right->value);
+ break;
+
+ case UNARY_PLUS :
+ *value = (n->right->value);
+ break;
+
+ case ONES_COMPLEMENT :
+ *value = ~(n->right->value);
+ break;
+
+ case LOGICAL_NEGATION :
+ if (n->right->value) {
+ *value = 0;
+ } else {
+ *value = 1;
+ }
+ logical_flag++;
+ break;
+
+ default :
+ break;
+ }
+ return(0);
+}
+
+/*
+ * pointer_math()
+ */
+static int
+pointer_math(node_t *np, uint64_t *value, int type, int flags)
+{
+ int size;
+ uint64_t lvalue, rvalue;
+ type_t *tp = NULL, *tp1;
+
+ if (type < 0) {
+ if (np->left->flags & POINTER_FLAG) {
+
+ /* Since we only allow pointer math,
+ * anything other than a pointer causes
+ * failure.
+ */
+ tp = (type_t*)np->left->type;
+ if (tp->flag != POINTER_FLAG) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = np->left->tok_ptr;
+ return(-1);
+ }
+
+ tp = tp->t_next;
+
+ switch (tp->flag) {
+
+ case POINTER_FLAG :
+ size = sizeof(void *);
+ break;
+
+ case KLTYPE_FLAG : {
+ /* Get the size of the real type,
+ * not just the size of a pointer
+ * If there isn't any type info,
+ * then just set size equal to the
+ * size of a pointer.
+ */
+ kltype_t *kltp, *rkltp;
+
+ kltp = tp->t_kltp;
+ rkltp = kl_realtype(kltp, 0);
+ if (!(size = rkltp->kl_size)) {
+ if (kltp != rkltp) {
+ size = kltp->kl_size;
+ } else {
+ size = sizeof(void *);
+ }
+ }
+ break;
+ }
+
+ default :
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = np->left->tok_ptr;
+ return(-1);
+ }
+ lvalue = np->left->value;
+ } else {
+ size = sizeof(void *);
+ lvalue = np->left->address;
+ }
+ switch (np->operator) {
+ case ADD :
+ *value = lvalue + (np->right->value * size);
+ break;
+
+ case SUBTRACT :
+ *value = lvalue - (np->right->value * size);
+ break;
+
+ default :
+ set_eval_error(E_BAD_OPERATOR);
+ error_token = np->tok_ptr;
+ return(-1);
+ }
+ } else if (type > 0) {
+ if (np->right->flags & POINTER_FLAG) {
+
+ /* Since we only allow pointer math,
+ * anything other than a pointer causes
+ * failure.
+ */
+ tp = (type_t*)np->right->type;
+ if (tp->flag != POINTER_FLAG) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = np->right->tok_ptr;
+ return(-1);
+ }
+
+ tp = tp->t_next;
+
+ switch (tp->flag) {
+
+ case POINTER_FLAG :
+ size = sizeof(void *);
+ break;
+
+ case KLTYPE_FLAG :
+ size = tp->t_kltp->kl_size;
+ break;
+
+ default :
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = np->right->tok_ptr;
+ return(-1);
+ }
+ rvalue = np->right->value;
+ } else {
+ size = sizeof(void *);
+ rvalue = np->right->address;
+ }
+ switch (np->operator) {
+ case ADD :
+ *value = rvalue + (np->left->value * size);
+ break;
+
+ case SUBTRACT :
+ *value = rvalue - (np->left->value * size);
+ break;
+
+ default :
+ set_eval_error(E_BAD_OPERATOR);
+ error_token = np->tok_ptr;
+ return(-1);
+ }
+ } else {
+ return(-1);
+ }
+ tp1 = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp1->flag = POINTER_FLAG;
+ np->type = tp1;
+ while (tp->flag == POINTER_FLAG) {
+ tp1->t_next = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp1->flag = POINTER_FLAG;
+ tp1 = tp1->t_next;
+ tp = tp->t_next;
+ }
+ if (tp) {
+ tp1->t_next = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp1 = tp1->t_next;
+ tp1->flag = KLTYPE_FLAG;
+ tp1->t_kltp = tp->t_kltp;
+ if (type < 0) {
+ if (np->left->flags & POINTER_FLAG) {
+ np->flags |= POINTER_FLAG;
+ } else {
+ np->flags |= VADDR;
+ }
+ } else {
+ if (np->right->flags & POINTER_FLAG) {
+ np->flags |= POINTER_FLAG;
+ } else {
+ np->flags |= VADDR;
+ }
+ }
+ }
+ return(0);
+}
+
+/*
+ * check_unsigned()
+ */
+int
+check_unsigned(node_t *np)
+{
+ kltype_t *kltp, *rkltp;
+
+ if (np->flags & UNSIGNED_FLAG) {
+ return(1);
+ }
+ if (!np->type) {
+ return(0);
+ }
+ if (np->type->flag == POINTER_FLAG) {
+ return(0);
+ }
+ kltp = np->type->t_kltp;
+ if ((rkltp = kl_realtype(kltp, 0))) {
+ if (rkltp->kl_encoding == ENC_UNSIGNED) {
+ np->flags |= UNSIGNED_FLAG;
+ return(1);
+ }
+ }
+ return(0);
+}
+
+/*
+ * apply()
+ */
+static int
+apply(node_t *np, uint64_t *value, int flags)
+{
+ int ltype, rtype, do_signed = 0;
+
+ /* There must be two operands
+ */
+ if (!np->right || !np->left) {
+ set_eval_error(E_MISSING_OPERAND);
+ error_token = np->tok_ptr;
+ return(-1);
+ }
+
+ if (np->right->node_type == OPERATOR) {
+ replace(np->right, flags);
+ if (eval_error) {
+ return(-1);
+ }
+ }
+
+ ltype = np->left->node_type;
+ rtype = np->right->node_type;
+ if ((ltype == TYPE_DEF) || (ltype == VADDR)) {
+ if ((rtype == TYPE_DEF) || (rtype == VADDR)) {
+ set_eval_error(E_NO_VALUE);
+ error_token = np->tok_ptr;
+ return(-1);
+ }
+ if (check_unsigned(np->left)) {
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ do_signed++;
+ }
+ if (!type_to_number(np->left)) {
+ return(pointer_math(np, value, -1, flags));
+ }
+ np->byte_size = np->left->byte_size;
+ } else if ((rtype == TYPE_DEF) || (rtype == VADDR)) {
+ if ((ltype == TYPE_DEF) || (ltype == VADDR)) {
+ error_token = np->tok_ptr;
+ set_eval_error(E_NO_VALUE);
+ return(-1);
+ }
+ if (check_unsigned(np->right)) {
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ do_signed++;
+ }
+ if (!type_to_number(np->right)) {
+ return(pointer_math(np, value, 1, flags));
+ }
+ np->byte_size = np->right->byte_size;
+ } else if ((np->left->flags & UNSIGNED_FLAG) ||
+ (np->right->flags & UNSIGNED_FLAG)) {
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ do_signed++;
+ }
+
+ if (do_signed) {
+ switch (np->operator) {
+ case ADD :
+ *value = (int64_t)np->left->value +
+ (int64_t)np->right->value;
+ break;
+
+ case SUBTRACT :
+ *value = (int64_t)np->left->value -
+ (int64_t)np->right->value;
+ break;
+
+ case MULTIPLY :
+ *value = (int64_t)np->left->value *
+ (int64_t)np->right->value;
+ break;
+
+ case DIVIDE :
+ if ((int64_t)np->right->value == 0) {
+ set_eval_error(E_DIVIDE_BY_ZERO);
+ error_token = np->right->tok_ptr;
+ return(-1);
+ }
+ *value = (int64_t)np->left->value /
+ (int64_t)np->right->value;
+ break;
+
+ case BITWISE_OR :
+ *value = (int64_t)np->left->value |
+ (int64_t)np->right->value;
+ break;
+
+ case BITWISE_AND :
+ *value = (int64_t)np->left->value &
+ (int64_t)np->right->value;
+ break;
+
+ case MODULUS :
+ if ((int64_t)np->right->value == 0) {
+ set_eval_error(E_DIVIDE_BY_ZERO);
+ error_token = np->right->tok_ptr;
+ return(-1);
+ }
+ *value = (int64_t)np->left->value %
+ (int64_t)np->right->value;
+ break;
+
+ case RIGHT_SHIFT :
+ *value =
+ (int64_t)np->left->value >>
+ (int64_t)np->right->value;
+ break;
+
+ case LEFT_SHIFT :
+ *value =
+ (int64_t)np->left->value <<
+ (int64_t)np->right->value;
+ break;
+
+ case LOGICAL_OR :
+ if ((int64_t)np->left->value ||
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LOGICAL_AND :
+ if ((int64_t)np->left->value &&
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case EQUAL :
+ if ((int64_t)np->left->value ==
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case NOT_EQUAL :
+ if ((int64_t)np->left->value !=
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LESS_THAN :
+ if ((int64_t)np->left->value <
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case GREATER_THAN :
+ if ((int64_t)np->left->value >
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LESS_THAN_OR_EQUAL :
+ if ((int64_t)np->left->value <=
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case GREATER_THAN_OR_EQUAL :
+ if ((int64_t)np->left->value >=
+ (int64_t)np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ default :
+ break;
+ }
+ } else {
+ switch (np->operator) {
+ case ADD :
+ *value = np->left->value + np->right->value;
+ break;
+
+ case SUBTRACT :
+ *value = np->left->value - np->right->value;
+ break;
+
+ case MULTIPLY :
+ *value = np->left->value * np->right->value;
+ break;
+
+ case DIVIDE :
+ *value = np->left->value / np->right->value;
+ break;
+
+ case BITWISE_OR :
+ *value = np->left->value | np->right->value;
+ break;
+
+ case BITWISE_AND :
+ *value = np->left->value & np->right->value;
+ break;
+
+ case MODULUS :
+ *value = np->left->value % np->right->value;
+ break;
+
+ case RIGHT_SHIFT :
+ *value = np->left->value >> np->right->value;
+ break;
+
+ case LEFT_SHIFT :
+ *value = np->left->value << np->right->value;
+ break;
+
+ case LOGICAL_OR :
+ if (np->left->value || np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LOGICAL_AND :
+ if (np->left->value && np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case EQUAL :
+ if (np->left->value == np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case NOT_EQUAL :
+ if (np->left->value != np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LESS_THAN :
+ if (np->left->value < np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case GREATER_THAN :
+ if (np->left->value > np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case LESS_THAN_OR_EQUAL :
+ if (np->left->value <= np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ case GREATER_THAN_OR_EQUAL :
+ if (np->left->value >= np->right->value) {
+ *value = 1;
+ } else {
+ *value = 0;
+ }
+ logical_flag++;
+ break;
+
+ default :
+ break;
+ }
+ }
+ return(0);
+}
+
+/*
+ * member_to_type()
+ */
+static type_t *
+member_to_type(kltype_t *kltp, int flags)
+{
+ kltype_t *rkltp;
+ type_t *tp, *head = (type_t *)NULL, *last = (type_t *)NULL;
+
+ /* Make sure this is a member
+ */
+ if (kltp->kl_type != KLT_MEMBER) {
+ return((type_t *)NULL);
+ }
+
+ rkltp = kltp->kl_realtype;
+ while (rkltp && rkltp->kl_type == KLT_POINTER) {
+ tp = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp->flag = POINTER_FLAG;
+ if (last) {
+ last->t_next = tp;
+ last = tp;
+ } else {
+ head = last = tp;
+ }
+ rkltp = rkltp->kl_realtype;
+ }
+
+ /* If We step past all the pointer records and don't point
+ * at anything, this must be a void pointer. Setup a VOID
+ * type struct so that we can maintain a pointer to some
+ * type info.
+ */
+ if (!rkltp) {
+ tp = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp->flag = VOID_FLAG;
+ tp->t_kltp = kltp;
+ if (last) {
+ last->t_next = tp;
+ last = tp;
+ } else {
+ head = last = tp;
+ }
+ return(head);
+ }
+
+ tp = (type_t *)kl_alloc_block(sizeof(type_t));
+ tp->flag = KLTYPE_FLAG;
+ tp->t_kltp = kltp;
+ if (last) {
+ last->t_next = tp;
+ } else {
+ head = tp;
+ }
+ return(head);
+}
+
+/*
+ * replace() --
+ *
+ * Replace the tree with a node containing the numerical result of
+ * the equation. If pointer math is performed, the result will have
+ * the same type as the pointer.
+ */
+static node_t *
+replace(node_t *np, int flags)
+{
+ int offset;
+ uint64_t value;
+ node_t *q;
+
+ if (!np) {
+ return((node_t *)NULL);
+ }
+
+ if (np->node_type == OPERATOR) {
+ if (!(q = np->left)) {
+ return((node_t *)NULL);
+ }
+ while (q) {
+ if (!replace(q, flags)) {
+ return((node_t *)NULL);
+ }
+ q = q->right;
+ }
+
+ if ((np->operator == RIGHT_ARROW) || (np->operator == DOT)) {
+ kaddr_t addr = 0;
+ type_t *tp;
+
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return 0;
+ }
+
+ /* The left node must point to a TYPE_DEF
+ */
+ if (np->left->node_type != TYPE_DEF) {
+ if (np->left->flags & NOTYPE_FLAG) {
+ set_eval_error(E_NOTYPE);
+ error_token = np->left->tok_ptr;
+ } else {
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->left->tok_ptr;
+ }
+ return((node_t *)NULL);
+ }
+
+ /* Get the type information. Check to see if we
+ * have a pointer to a type. If we do, we need
+ * to strip off the pointer and get the type info.
+ */
+ if (np->left->type->flag == POINTER_FLAG) {
+ tp = np->left->type->t_next;
+ kl_free_block((void *)np->left->type);
+ } else {
+ tp = np->left->type;
+ }
+
+ /* We need to zero out the left child's type pointer
+ * to prevent the type structs from being prematurely
+ * freed (upon success). We have to remember, however,
+ * to the free the type information before we return.
+ */
+ np->left->type = (type_t*)NULL;
+
+ /* tp should now point at a type_t struct that
+ * references a kltype_t struct. If it points
+ * to anything else, return failure.
+ *
+ */
+ if (tp->flag != KLTYPE_FLAG) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->left->tok_ptr;
+ free_type(tp);
+ return((node_t *)NULL);
+ }
+
+ switch (tp->flag) {
+ case KLTYPE_FLAG: {
+ /* Make sure that the type referenced
+ * is a struct, union, or pointer to
+ * a struct or union. If it isn't one
+ * of these, then return failure.
+ */
+ kltype_t *kltp, *kltmp;
+
+ kltp = kl_realtype(tp->t_kltp, 0);
+ if ((kltp->kl_type != KLT_STRUCT) &&
+ (kltp->kl_type != KLT_UNION)) {
+ error_token =
+ np->left->tok_ptr;
+ set_eval_error(E_BAD_TYPE);
+ free_type(tp);
+ return((node_t *)NULL);
+ }
+
+ /* Get type information for member.
+ * If member is a pointer to a type,
+ * get the pointer address and load
+ * it into value. In any event, load
+ * the struct/union address plus the
+ * offset of the member.
+ */
+ kltmp = kl_get_member(kltp,
+ np->right->name);
+ if (!kltmp) {
+ set_eval_error(E_BAD_MEMBER);
+ error_token =
+ np->right->tok_ptr;
+ free_type(tp);
+ return((node_t *)NULL);
+ }
+
+ /* We can't just use the offset value
+ * for the member. That's because it
+ * may be from an anonymous struct or
+ * union within another struct
+ * definition.
+ */
+ offset = kl_get_member_offset(kltp,
+ np->right->name);
+ np->type = member_to_type(kltmp, flags);
+ if (!np->type) {
+ set_eval_error(E_BAD_MEMBER);
+ error_token =
+ np->right->tok_ptr;
+ free_type(tp);
+ return((node_t *)NULL);
+ }
+
+ /* Now free the struct type information
+ */
+ free_type(tp);
+ np->node_type = TYPE_DEF;
+ np->flags |= KLTYPE_FLAG;
+ np->operator = 0;
+ addr = 0;
+ if (np->left->flags & POINTER_FLAG) {
+ addr = np->left->value +
+ offset;
+ } else if (np->left->flags &
+ ADDRESS_FLAG) {
+ addr = np->left->address +
+ offset;
+ }
+ if (addr) {
+ np->address = addr;
+ np->flags |= ADDRESS_FLAG;
+ }
+
+ if (np->type->flag == POINTER_FLAG) {
+ np->flags |= POINTER_FLAG;
+ np->value = *((kaddr_t *)addr);
+ } else {
+ np->value = addr;
+ }
+ break;
+ }
+ }
+ free_nodes(np->left);
+ free_nodes(np->right);
+ np->left = np->right = (node_t*)NULL;
+ return(np);
+ } else {
+ if (!np->left || !np->right) {
+ set_eval_error(E_MISSING_OPERAND);
+ error_token = np->tok_ptr;
+ return((node_t *)NULL);
+ }
+ if (np->left->byte_size && np->right->byte_size) {
+ if (np->left->byte_size >
+ np->right->byte_size) {
+
+ /* Left byte_size is greater than right
+ */
+ np->byte_size = np->left->byte_size;
+ np->type = np->left->type;
+ np->flags = np->left->flags;
+ free_type(np->right->type);
+ } else if (np->left->byte_size <
+ np->right->byte_size) {
+
+ /* Right byte_size is greater than left
+ */
+ np->byte_size = np->right->byte_size;
+ np->type = np->right->type;
+ np->flags = np->right->flags;
+ free_type(np->left->type);
+ } else {
+
+ /* Left and right byte_size is equal
+ */
+ if (np->left->flags & UNSIGNED_FLAG) {
+ np->byte_size =
+ np->left->byte_size;
+ np->type = np->left->type;
+ np->flags = np->left->flags;
+ free_type(np->right->type);
+ } else if (np->right->flags &
+ UNSIGNED_FLAG) {
+ np->byte_size =
+ np->right->byte_size;
+ np->type = np->right->type;
+ np->flags = np->right->flags;
+ free_type(np->left->type);
+ } else {
+ np->byte_size =
+ np->left->byte_size;
+ np->type = np->left->type;
+ np->flags = np->left->flags;
+ free_type(np->right->type);
+ }
+ }
+ } else if (np->left->byte_size) {
+ np->byte_size = np->left->byte_size;
+ np->type = np->left->type;
+ np->flags = np->left->flags;
+ free_type(np->right->type);
+ } else if (np->right->byte_size) {
+ np->byte_size = np->right->byte_size;
+ np->type = np->right->type;
+ np->flags = np->right->flags;
+ } else {
+ /* XXX - No byte sizes
+ */
+ }
+
+ if (apply(np, &value, flags)) {
+ return((node_t *)NULL);
+ }
+ }
+ np->right->type = np->left->type = (type_t*)NULL;
+
+ /* Flesh out the rest of the node struct.
+ */
+ if (np->type) {
+ np->node_type = TYPE_DEF;
+ np->flags |= KLTYPE_FLAG;
+ } else {
+ np->node_type = NUMBER;
+ np->flags &= ~(KLTYPE_FLAG);
+ }
+ np->operator = 0;
+ np->value = value;
+ kl_free_block((void *)np->left);
+ kl_free_block((void *)np->right);
+ np->left = np->right = (node_t*)NULL;
+ }
+ return(np);
+}
+
+/*
+ * replace_cast()
+ */
+static int
+replace_cast(node_t *n, int flags)
+{
+ type_t *t;
+
+ if (!n) {
+ set_eval_error(E_SYNTAX_ERROR);
+ return(-1);
+ } else if (!n->right) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n->tok_ptr;
+ return(-1);
+ }
+ if (n->flags & POINTER_FLAG) {
+ if (n->right->node_type == VADDR) {
+ if (n->right->flags & ADDRESS_FLAG) {
+ n->value = n->right->address;
+ } else {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ } else {
+ n->value = n->right->value;
+ n->address = 0;
+ }
+ } else if (n->right->flags & ADDRESS_FLAG) {
+ n->flags |= ADDRESS_FLAG;
+ n->address = n->right->address;
+ n->value = n->right->value;
+ } else {
+ kltype_t *kltp;
+
+ if (!(t = eval_type(n))) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->tok_ptr;
+ return(-1);
+ }
+ if (t->t_kltp->kl_type != KLT_BASE) {
+
+ kltp = kl_realtype(t->t_kltp, 0);
+ if (kltp->kl_type != KLT_BASE) {
+ set_eval_error(E_BAD_CAST);
+ error_token = n->tok_ptr;
+ return(-1);
+ }
+ }
+ n->value = n->right->value;
+ n->type = t;
+ }
+ n->node_type = TYPE_DEF;
+ n->operator = 0;
+ free_node(n->right);
+ n->right = (node_t *)NULL;
+ return(0);
+}
+
+/*
+ * replace_indirection()
+ */
+static int
+replace_indirection(node_t *n, int flags)
+{
+ kaddr_t addr;
+ type_t *t, *tp, *rtp;
+
+ /* Make sure there is a right child and that it is a TYPE_DEF.
+ */
+ if (!n->right) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->tok_ptr;
+ return(-1);
+ } else if (n->right->node_type != TYPE_DEF) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ /* Make sure the right node contains a pointer or address value.
+ * Note that it's possible for the whatis command to generate
+ * this case without any actual pointer/address value.
+ */
+ if (!(n->right->flags & (POINTER_FLAG|ADDRESS_FLAG))) {
+ set_eval_error(E_BAD_POINTER);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ /* Get the pointer to the first type struct and make sure
+ * it's a pointer.
+ */
+ if (!(tp = n->right->type) || (tp->flag != POINTER_FLAG)) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ /* Make sure we have a pointer to a type structure.
+ */
+ if (!(n->right->flags & KLTYPE_FLAG)) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ n->node_type = TYPE_DEF;
+ n->flags = KLTYPE_FLAG;
+ n->operator = 0;
+
+ if (!(t = tp->t_next)) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ if (!(rtp = eval_type(n->right))) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+
+ /* Zero out the type field in the right child so
+ * it wont accidently be freed when the right child
+ * is freed (upon success).
+ */
+ n->right->type = (type_t*)NULL;
+
+ n->type = t;
+
+ /* Free the pointer struct
+ */
+ kl_free_block((void *)tp);
+
+ /* Get the pointer address
+ */
+ addr = n->address = n->right->value;
+ n->flags |= ADDRESS_FLAG;
+
+ if (rtp->t_kltp->kl_type == KLT_MEMBER) {
+ /* If this is a member, we have to step over the KLT_MEMBER
+ * struct and then make sure we have a KLT_POINTER struct.
+ * If we do, we step over it too...otherwise return an
+ * error.
+ */
+ if (rtp->t_kltp->kl_realtype->kl_type != KLT_POINTER) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+ rtp->t_kltp = rtp->t_kltp->kl_realtype;
+ }
+
+ if (rtp->t_kltp->kl_type == KLT_POINTER) {
+ /* Strip off the pointer type record so that
+ * we pick up the actual type definition with
+ * our indirection.
+ */
+ rtp->t_kltp = rtp->t_kltp->kl_realtype;
+ if (rtp->t_kltp->kl_name &&
+ !strcmp(rtp->t_kltp->kl_name, "char")) {
+ n->flags |= STRING_FLAG;
+ }
+ }
+
+
+ /* If this is a pointer to a pointer, get the next
+ * pointer value.
+ */
+ if (n->type->flag == POINTER_FLAG) {
+ n->value = *((kaddr_t *)addr);
+
+ /* Set the appropriate node flag values
+ */
+ n->flags |= POINTER_FLAG;
+ free_node(n->right);
+ n->left = n->right = (node_t *)NULL;
+ return(0);
+ }
+ /* Zero out the type field in the right child so it doesn't
+ * accidently get freed up when the right child is freed
+ * (upon success).
+ */
+ n->right->type = (type_t*)NULL;
+ free_node(n->right);
+ n->left = n->right = (node_t *)NULL;
+ return(0);
+}
+
+/*
+ * replace_unary()
+ *
+ * Convert a unary operator node that contains a pointer to a value
+ * with a node containing the numerical result. Free the node that
+ * originally contained the value.
+ */
+static int
+replace_unary(node_t *n, int flags)
+{
+ uint64_t value;
+
+ if (!n->right) {
+ set_eval_error(E_MISSING_OPERAND);
+ error_token = n->tok_ptr;
+ return(-1);
+ }
+ if (is_unary(n->right->operator)) {
+ if (replace_unary(n->right, flags) == -1) {
+ return(-1);
+ }
+ }
+ if (n->operator == CAST) {
+ return(replace_cast(n, flags));
+ } else if (n->operator == INDIRECTION) {
+ return(replace_indirection(n, flags));
+ } else if (n->operator == ADDRESS) {
+ type_t *t;
+
+ if (n->right->node_type == TYPE_DEF) {
+ if (!(n->right->flags & ADDRESS_FLAG)) {
+ set_eval_error(E_NO_ADDRESS);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+ t = n->right->type;
+ } else {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->right->tok_ptr;
+ return(-1);
+ }
+ n->type = (type_t*)kl_alloc_block(sizeof(type_t));
+ n->type->flag = POINTER_FLAG;
+ n->type->t_next = t;
+ n->node_type = TYPE_DEF;
+ n->operator = 0;
+ n->value = n->right->address;
+ n->flags = POINTER_FLAG;
+ if (!(t = eval_type(n))) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = n->tok_ptr;
+ return(-1);
+ }
+ n->flags |= t->flag;
+ n->right->type = 0;
+ free_nodes(n->right);
+ n->left = n->right = (node_t *)NULL;
+ return(0);
+ } else if (apply_unary(n, &value) == -1) {
+ return(-1);
+ }
+ free_nodes(n->right);
+ n->node_type = NUMBER;
+ n->operator = 0;
+ n->left = n->right = (node_t *)NULL;
+ memcpy(&n->value, &value, sizeof(uint64_t));
+ return(0);
+}
+
+/*
+ * pointer_to_element()
+ */
+static void
+pointer_to_element(node_t *n0, node_t *n1)
+{
+ int size;
+ kltype_t *kltp, *rkltp;
+ type_t *tp;
+
+ if (!(tp = n0->type)) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n0->tok_ptr;
+ return;
+ }
+ if (tp->t_next->flag == POINTER_FLAG) {
+ size = sizeof(void *);
+ } else {
+ kltp = tp->t_next->t_kltp;
+ if (!(rkltp = kl_realtype(kltp, 0))) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n0->tok_ptr;
+ return;
+ }
+ size = rkltp->kl_size;
+ }
+
+ /* Get the details on the array element
+ */
+ n0->flags |= ADDRESS_FLAG;
+ n0->address = n0->value + (n1->value * size);
+ n0->type = tp->t_next;
+ kl_free_block((char *)tp);
+ if (tp->t_next->flag == POINTER_FLAG) {
+ n0->flags |= POINTER_FLAG;
+ n0->value = *((kaddr_t *)n0->address);
+ } else {
+ n0->flags &= (~POINTER_FLAG);
+ n0->value = 0;
+ }
+}
+
+/*
+ * array_to_element()
+ */
+static void
+array_to_element(node_t *n0, node_t *n1)
+{
+ kltype_t *kltp, *rkltp, *ip, *ep;
+ type_t *tp, *troot = (type_t *)NULL;
+
+ if (!(tp = n0->type)) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n0->tok_ptr;
+ return;
+ }
+
+ /* If we are indexing a pointer, then make a call to the
+ * pointer_to_element() and return.
+ */
+ if (tp->flag == POINTER_FLAG) {
+ return(pointer_to_element(n0, n1));
+ }
+
+ if (!(kltp = n0->type->t_kltp)) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n0->tok_ptr;
+ return;
+ }
+ if (!(rkltp = kl_realtype(kltp, KLT_ARRAY))) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n0->tok_ptr;
+ return;
+ }
+ ip = rkltp->kl_indextype;
+ ep = rkltp->kl_elementtype;
+ if (!ip || !ep) {
+ set_eval_error(E_BAD_INDEX);
+ error_token = n1->tok_ptr;
+ return;
+ }
+ /* Get the details on the array element
+ */
+ n0->address = n0->address + (n1->value * ep->kl_size);
+ if (ep->kl_type == KLT_POINTER) {
+ n0->flags |= POINTER_FLAG;
+ n0->value = *((kaddr_t *)n0->address);
+ } else {
+ n0->value = 0;
+ }
+ n0->flags |= ADDRESS_FLAG;
+ kltp = ep;
+ while (kltp->kl_type == KLT_POINTER) {
+ if (troot) {
+ tp->t_next = (type_t*)kl_alloc_block(sizeof(type_t));
+ tp = tp->t_next;
+ } else {
+ tp = (type_t*)kl_alloc_block(sizeof(type_t));
+ troot = tp;
+ }
+ tp->flag = POINTER_FLAG;
+ kltp = kltp->kl_realtype;
+ }
+ if (troot) {
+ tp->t_next = (type_t*)kl_alloc_block(sizeof(type_t));
+ tp = tp->t_next;
+ n0->type = troot;
+ } else {
+ tp = (type_t*)kl_alloc_block(sizeof(type_t));
+ n0->type = tp;
+ }
+ tp->flag = KLTYPE_FLAG;
+ tp->t_kltp = ep;
+}
+
+/*
+ * number_to_size()
+ */
+int
+number_to_size(node_t *np)
+{
+ int unsigned_flag = 0;
+
+ if (np->node_type != NUMBER) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ if (np->flags & UNSIGNED_FLAG) {
+ unsigned_flag = 1;
+ }
+ if ((np->value >= 0) && (np->value <= 0xffffffff)) {
+ return(4);
+ } else if (((np->value >> 32) & 0xffffffff) == 0xffffffff) {
+ if (unsigned_flag) {
+ return(8);
+ } else if (sizeof(void *) == 4) {
+ return(4);
+ } else {
+ return(8);
+ }
+ }
+ return(8);
+}
+
+/*
+ * number_to_type()
+ */
+kltype_t *
+number_to_type(node_t *np)
+{
+ int unsigned_flag = 0;
+ kltype_t *kltp, *rkltp = (kltype_t *)NULL;
+
+ if (np->node_type != NUMBER) {
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->tok_ptr;
+ return((kltype_t *)NULL);
+ }
+ if (np->flags & UNSIGNED_FLAG) {
+ unsigned_flag = 1;
+ }
+ if ((np->value >= 0) && (np->value <= 0xffffffff)) {
+ if (unsigned_flag) {
+ kltp = kl_find_type("uint32_t", KLT_TYPEDEF);
+ } else {
+ kltp = kl_find_type("int32_t", KLT_TYPEDEF);
+ }
+ } else if (((np->value >> 32) & 0xffffffff) == 0xffffffff) {
+ if (unsigned_flag) {
+ kltp = kl_find_type("uint64_t", KLT_TYPEDEF);
+ } else if (sizeof(void *) == 4) {
+ kltp = kl_find_type("int32_t", KLT_TYPEDEF);
+ } else {
+ kltp = kl_find_type("int64_t", KLT_TYPEDEF);
+ }
+ } else {
+ if (unsigned_flag) {
+ kltp = kl_find_type("uint64_t", KLT_TYPEDEF);
+ } else {
+ kltp = kl_find_type("int64_t", KLT_TYPEDEF);
+ }
+ }
+ if (kltp) {
+ if (!(rkltp = kl_realtype(kltp, 0))) {
+ rkltp = kltp;
+ }
+ } else {
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->tok_ptr;
+ }
+ return(rkltp);
+}
+
+/*
+ * type_to_number()
+ *
+ * Convert a base type to a numeric value. Return 1 on successful
+ * conversion, 0 if nothing was done.
+ */
+static int
+type_to_number(node_t *np)
+{
+ int byte_size, bit_offset, bit_size, encoding;
+ uint64_t value, value1;
+ kltype_t *kltp, *rkltp;
+
+ /* Sanity check...
+ */
+ if (np->node_type != TYPE_DEF) {
+ set_eval_error(E_NOTYPE);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ if (!np->type) {
+ set_eval_error(E_NOTYPE);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ if (np->type->flag == POINTER_FLAG) {
+ return(0);
+ }
+
+ /* Get the real type record and make sure that it is
+ * for a base type.
+ */
+ kltp = np->type->t_kltp;
+ rkltp = kl_realtype(kltp, 0);
+ if (rkltp->kl_type != KLT_BASE) {
+ set_eval_error(E_NOTYPE);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+
+ byte_size = rkltp->kl_size;
+ bit_offset = rkltp->kl_bit_offset;
+ if (!(bit_size = rkltp->kl_bit_size)) {
+ bit_size = byte_size * 8;
+ }
+ encoding = rkltp->kl_encoding;
+ if (np->flags & ADDRESS_FLAG) {
+ /* FIXME: untested */
+ if (invalid_address(np->address, byte_size)) {
+ kdb_printf("ILLEGAL ADDRESS (%lx)",
+ (uaddr_t)np->address);
+ return (0);
+ }
+ kl_get_block(np->address, byte_size,(void *)&value1,(void *)0);
+ } else {
+ value1 = np->value;
+ }
+ value = kl_get_bit_value(&value1, byte_size, bit_size, bit_offset);
+ switch (byte_size) {
+
+ case 1 :
+ if (encoding == ENC_UNSIGNED) {
+ np->value = (unsigned char)value;
+ np->flags |= UNSIGNED_FLAG;
+ } else if (encoding == ENC_SIGNED) {
+ np->value = (signed char)value;
+ } else {
+ np->value = (char)value;
+ }
+ break;
+
+ case 2 :
+ if (encoding == ENC_UNSIGNED) {
+ np->value = (uint16_t)value;
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ np->value = (int16_t)value;
+ }
+ break;
+
+ case 4 :
+ if (encoding == ENC_UNSIGNED) {
+ np->value = (uint32_t)value;
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ np->value = (int32_t)value;
+ }
+ break;
+
+ case 8 :
+ if (encoding == ENC_UNSIGNED) {
+ np->value = (uint64_t)value;
+ np->flags |= UNSIGNED_FLAG;
+ } else {
+ np->value = (int64_t)value;
+ }
+ break;
+
+ default :
+ set_eval_error(E_BAD_TYPE);
+ error_token = np->tok_ptr;
+ return(0);
+ }
+ np->byte_size = byte_size;
+ np->node_type = NUMBER;
+ return(1);
+}
+
+/*
+ * eval_type()
+ */
+static type_t *
+eval_type(node_t *n)
+{
+ type_t *t;
+
+ if (!(t = n->type)) {
+ return((type_t*)NULL);
+ }
+ while (t->flag == POINTER_FLAG) {
+ t = t->t_next;
+
+ /* If for some reason, there is no type pointer (this shouldn't
+ * happen but...), we have to make sure that we don't try to
+ * reference a NULL pointer and get a SEGV. Return an error if
+ * 't' is NULL.
+ */
+ if (!t) {
+ return((type_t*)NULL);
+ }
+ }
+ if (t->flag == KLTYPE_FLAG) {
+ return (t);
+ }
+ return((type_t*)NULL);
+}
+
+/*
+ * expand_variables()
+ */
+static char *
+expand_variables(char *exp, int flags)
+{
+ return((char *)NULL);
+}
+
+/*
+ * eval()
+ */
+node_t *
+eval(char **exp, int flags)
+{
+ token_t *tok;
+ node_t *n, *root;
+ char *e, *s;
+
+ eval_error = 0;
+ logical_flag = 0;
+
+ /* Make sure there is an expression to evaluate
+ */
+ if (!(*exp)) {
+ return ((node_t*)NULL);
+ }
+
+ /* Expand any variables that are in the expression string. If
+ * a new string is allocated by the expand_variables() function,
+ * we need to make sure the original expression string gets
+ * freed. In any event, point s at the current expression string
+ * so that it gets freed up when we are done.
+ */
+ if ((e = expand_variables(*exp, 0))) {
+ kl_free_block((void *)*exp);
+ *exp = e;
+ } else if (eval_error) {
+ eval_error |= E_BAD_EVAR;
+ error_token = *exp;
+ }
+ s = *exp;
+ tok = get_token_list(s);
+ if (eval_error) {
+ return((node_t*)NULL);
+ }
+
+ /* Get the node_list and evaluate the expression.
+ */
+ node_list = get_node_list(tok, flags);
+ if (eval_error) {
+ free_nodelist(node_list);
+ node_list = (node_t*)NULL;
+ free_tokens(tok);
+ return((node_t*)NULL);
+ }
+ if (!(n = do_eval(flags))) {
+ if (!eval_error) {
+ set_eval_error(E_SYNTAX_ERROR);
+ error_token = s + strlen(s) - 1;
+ }
+ free_nodes(n);
+ free_tokens(tok);
+ return((node_t*)NULL);
+ }
+
+ if (!(root = replace(n, flags))) {
+ if (eval_error) {
+ free_nodes(n);
+ free_tokens(tok);
+ return((node_t*)NULL);
+ }
+ root = n;
+ }
+
+ /* Check to see if the the result should
+ * be interpreted as 'true' or 'false'
+ */
+ if (logical_flag && ((root->value == 0) || (root->value == 1))) {
+ root->flags |= BOOLIAN_FLAG;
+ }
+ free_tokens(tok);
+ return(root);
+}
+
+/*
+ * print_number()
+ */
+void
+print_number(node_t *np, int flags)
+{
+ int size;
+ unsigned long long value;
+
+ if ((size = number_to_size(np)) && (size != sizeof(uint64_t))) {
+ value = np->value & (((uint64_t)1 << (uint64_t)(size*8))-1);
+ } else {
+ value = np->value;
+ }
+ if (flags & C_HEX) {
+ kdb_printf("0x%llx", value);
+ } else if (flags & C_BINARY) {
+ kdb_printf("0b");
+ kl_binary_print(value);
+ } else {
+ if (np->flags & UNSIGNED_FLAG) {
+ kdb_printf("%llu", value);
+ } else {
+ kdb_printf("%lld", np->value);
+ }
+ }
+}
+
+/*
+ * print_string()
+ */
+void
+print_string(kaddr_t addr, int size)
+{
+ int i;
+ char *str;
+
+ if (!size) {
+ size = 255;
+ }
+ /* FIXME: untested */
+ if (invalid_address(addr, size)) {
+ klib_error = KLE_INVALID_PADDR;
+ return;
+ }
+ str = (char*)kl_alloc_block(size);
+ kl_get_block(addr, size, (void *)str, (void *)0);
+ kdb_printf("\"%s", str);
+ for (i = 0; i < size; i++) {
+ if (!str[i]) {
+ break;
+ }
+ }
+ if (KL_ERROR || (i == size)) {
+ kdb_printf("...");
+ }
+ kdb_printf("\"");
+ kl_free_block(str);
+}
+
+/*
+ * kl_print_error()
+ */
+void
+kl_print_error(void)
+{
+ int ecode;
+
+ ecode = klib_error & 0xffffffff;
+ switch(ecode) {
+
+ /** General klib error codes
+ **/
+ case KLE_NO_MEMORY:
+ kdb_printf("insufficient memory");
+ break;
+ case KLE_OPEN_ERROR:
+ kdb_printf("unable to open file");
+ break;
+ case KLE_ZERO_BLOCK:
+ kdb_printf("tried to allocate a zero-sized block");
+ break;
+ case KLE_INVALID_VALUE:
+ kdb_printf("invalid input value");
+ break;
+ case KLE_NULL_BUFF:
+ kdb_printf( "NULL buffer pointer");
+ break;
+ case KLE_ZERO_SIZE:
+ kdb_printf("zero sized block requested");
+ break;
+ case KLE_ACTIVE:
+ kdb_printf("operation not supported on a live system");
+ break;
+ case KLE_UNSUPPORTED_ARCH:
+ kdb_printf("unsupported architecture");
+ break;
+ case KLE_MISC_ERROR:
+ kdb_printf("KLIB error");
+ break;
+ case KLE_NOT_SUPPORTED:
+ kdb_printf("operation not supported");
+ break;
+ case KLE_UNKNOWN_ERROR:
+ kdb_printf("unknown error");
+ break;
+
+ /** memory error codes
+ **/
+ case KLE_BAD_MAP_FILE:
+ kdb_printf("bad map file");
+ break;
+ case KLE_BAD_DUMP:
+ kdb_printf("bad dump file");
+ break;
+ case KLE_BAD_DUMPTYPE:
+ kdb_printf("bad dumptype");
+ break;
+ case KLE_INVALID_LSEEK:
+ kdb_printf("lseek error");
+ break;
+ case KLE_INVALID_READ:
+ kdb_printf("not found in dump file");
+ break;
+ case KLE_BAD_KERNINFO:
+ kdb_printf("bad kerninfo struct");
+ break;
+ case KLE_INVALID_PADDR:
+ kdb_printf("invalid physical address");
+ break;
+ case KLE_INVALID_VADDR:
+ kdb_printf("invalid virtual address");
+ break;
+ case KLE_INVALID_VADDR_ALIGN:
+ kdb_printf("invalid vaddr alignment");
+ break;
+ case KLE_INVALID_MAPPING:
+ kdb_printf("invalid address mapping");
+ break;
+ case KLE_PAGE_NOT_PRESENT:
+ kdb_printf("page not present");
+ break;
+ case KLE_BAD_ELF_FILE:
+ kdb_printf("bad elf file");
+ break;
+ case KLE_ARCHIVE_FILE:
+ kdb_printf("archive file");
+ break;
+ case KLE_MAP_FILE_PRESENT:
+ kdb_printf("map file present");
+ break;
+ case KLE_BAD_MAP_FILENAME:
+ kdb_printf("bad map filename");
+ break;
+ case KLE_BAD_DUMP_FILENAME:
+ kdb_printf("bad dump filename");
+ break;
+ case KLE_BAD_NAMELIST_FILE:
+ kdb_printf("bad namelist file");
+ break;
+ case KLE_BAD_NAMELIST_FILENAME:
+ kdb_printf("bad namelist filename");
+ break;
+
+ /** symbol error codes
+ **/
+ case KLE_NO_SYMTAB:
+ kdb_printf("no symtab");
+ break;
+ case KLE_NO_SYMBOLS:
+ kdb_printf("no symbol information");
+ break;
+ case KLE_NO_MODULE_LIST:
+ kdb_printf("kernel without module support");
+ break;
+
+ /** kernel data error codes
+ **/
+ case KLE_INVALID_KERNELSTACK:
+ kdb_printf("invalid kernel stack");
+ break;
+ case KLE_INVALID_STRUCT_SIZE:
+ kdb_printf("invalid struct size");
+ break;
+ case KLE_BEFORE_RAM_OFFSET:
+ kdb_printf("physical address proceeds start of RAM");
+ break;
+ case KLE_AFTER_MAXPFN:
+ kdb_printf("PFN exceeds maximum PFN");
+ break;
+ case KLE_AFTER_PHYSMEM:
+ kdb_printf("address exceeds physical memory");
+ break;
+ case KLE_AFTER_MAXMEM:
+ kdb_printf("address exceeds maximum physical address");
+ break;
+ case KLE_PHYSMEM_NOT_INSTALLED:
+ kdb_printf("physical memory not installed");
+ break;
+ case KLE_NO_DEFTASK:
+ kdb_printf("default task not set");
+ break;
+ case KLE_PID_NOT_FOUND:
+ kdb_printf("PID not found");
+ break;
+ case KLE_DEFTASK_NOT_ON_CPU:
+ kdb_printf("default task not running on a cpu");
+ break;
+ case KLE_NO_CURCPU:
+ kdb_printf("current cpu could not be determined");
+ break;
+
+ case KLE_KERNEL_MAGIC_MISMATCH:
+ kdb_printf("kernel_magic mismatch "
+ "of map and memory image");
+ break;
+
+ case KLE_INVALID_DUMP_HEADER:
+ kdb_printf("invalid dump header in dump");
+ break;
+
+ case KLE_DUMP_INDEX_CREATION:
+ kdb_printf("cannot create index file");
+ break;
+
+ case KLE_DUMP_HEADER_ONLY:
+ kdb_printf("dump only has a dump header");
+ break;
+
+ case KLE_NO_END_SYMBOL:
+ kdb_printf("no _end symbol in kernel");
+ break;
+
+ case KLE_NO_CPU:
+ kdb_printf("CPU not installed");
+ break;
+
+ default:
+ break;
+ }
+ kdb_printf("\n");
+}
+
+/*
+ * kl_print_string()
+ *
+ * print out a string, translating all embeded control characters
+ * (e.g., '\n' for newline, '\t' for tab, etc.)
+ */
+void
+kl_print_string(char *s)
+{
+ char *sp, *cp;
+
+ kl_reset_error();
+
+ if (!(sp = s)) {
+ klib_error = KLE_BAD_STRING;
+ return;
+ }
+ /* FIXME: untested */
+ if (invalid_address((kaddr_t)sp, 1)) {
+ klib_error = KLE_INVALID_PADDR;
+ return;
+ }
+
+ while (sp) {
+ if ((cp = strchr(sp, '\\'))) {
+ switch (*(cp + 1)) {
+
+ case 'n' :
+ *cp++ = '\n';
+ *cp++ = 0;
+ break;
+
+ case 't' :
+ *cp++ = '\t';
+ *cp++ = 0;
+ break;
+
+ default :
+ if (*(cp + 1) == 0) {
+ klib_error = KLE_BAD_STRING;
+ return;
+ }
+ /* Change the '\' character to a zero
+ * and then print the string (the rest
+ * of the string will be picked
+ * up on the next pass).
+ */
+ *cp++ = 0;
+ break;
+ }
+ kdb_printf("%s", sp);
+ sp = cp;
+ } else {
+ kdb_printf("%s", sp);
+ sp = 0;
+ }
+ }
+}
+
+/*
+ * print_eval_results()
+ */
+int
+print_eval_results(node_t *np, int flags)
+{
+ int size, i, count, ptr_cnt = 0;
+ kaddr_t addr;
+ char *typestr;
+ kltype_t *kltp, *rkltp = NULL, *nkltp;
+ type_t *tp;
+
+ /* Print the results
+ */
+ switch (np->node_type) {
+
+ case NUMBER:
+ print_number(np, flags);
+ break;
+
+ case TYPE_DEF: {
+
+ /* First, determine the number of levels of indirection
+ * by determining the number of pointer type records.
+ */
+ if ((tp = np->type)) {
+ while (tp && (tp->flag == POINTER_FLAG)) {
+ ptr_cnt++;
+ tp = tp->t_next;
+ }
+ if (tp) {
+ rkltp = tp->t_kltp;
+ }
+ }
+ if (!rkltp) {
+ kdb_printf("Type information not available\n");
+ return(1);
+ }
+
+ if (ptr_cnt) {
+
+ /* If this is a member, we need to get the
+ * first type record.
+ */
+ if (rkltp->kl_type == KLT_MEMBER) {
+ /* We need to get down to the first
+ * real type record...
+ */
+ rkltp = rkltp->kl_realtype;
+ }
+
+ /* step over any KLT_POINTER type records.
+ */
+ while (rkltp && rkltp->kl_type == KLT_POINTER) {
+ rkltp = rkltp->kl_realtype;
+ }
+ if (!rkltp) {
+ kdb_printf("Bad type information\n");
+ return(1);
+ }
+ typestr = rkltp->kl_typestr;
+ if (rkltp->kl_type == KLT_FUNCTION) {
+ kdb_printf("%s(", typestr);
+ } else if (rkltp->kl_type == KLT_ARRAY) {
+ kdb_printf("(%s(", typestr);
+ } else {
+ kdb_printf("(%s", typestr);
+ }
+ for (i = 0; i < ptr_cnt; i++) {
+ kdb_printf("*");
+ }
+ if (rkltp->kl_type == KLT_FUNCTION) {
+ kdb_printf(")(");
+ } else if (rkltp->kl_type == KLT_ARRAY) {
+ kdb_printf(")");
+
+ nkltp = rkltp;
+ while (nkltp->kl_type == KLT_ARRAY) {
+ count = nkltp->kl_high_bounds -
+ nkltp->kl_low_bounds + 1;
+ kdb_printf("[%d]", count);
+ nkltp = nkltp->kl_elementtype;
+ }
+ }
+ kdb_printf(") ");
+ kdb_printf("0x%llx", np->value);
+
+ if (ptr_cnt > 1) {
+ break;
+ }
+
+ if ((rkltp->kl_type == KLT_BASE) &&
+ rkltp->kl_encoding == ENC_CHAR) {
+ kdb_printf(" = ");
+ print_string(np->value, 0);
+ }
+ break;
+ }
+ if (np->flags & KLTYPE_FLAG) {
+ void * ptr;
+
+ /* Get the type information. It's possible
+ * that the type is a member. In which case,
+ * the size may only be from this record
+ * (which would be the casse if this is an
+ * array). We must check the original type
+ * record first, and try the realtype record
+ * if the value is zero.
+ */
+ kltp = np->type->t_kltp;
+
+ if (kltp->kl_type == KLT_MEMBER) {
+ rkltp = kltp->kl_realtype;
+ } else {
+ rkltp = kltp;
+ }
+
+ /* Check to see if this is a typedef. If
+ * it is, then it might be a typedef for
+ * a pointer type. Don't walk to the last
+ * type record.
+ */
+ while (rkltp->kl_type == KLT_TYPEDEF) {
+ rkltp = rkltp->kl_realtype;
+ }
+
+ if (rkltp->kl_type == KLT_POINTER) {
+ kdb_printf("0x%llx", np->value);
+ break;
+ }
+ if((rkltp->kl_name != 0) &&
+ !(strcmp(rkltp->kl_name, "void"))) {
+ /* we are about to dereference
+ * a void pointer.
+ */
+ kdb_printf("Can't dereference a "
+ "generic pointer.\n");
+ return(1);
+ }
+
+ size = rkltp->kl_size;
+ if (!size || (size < 0)) {
+ size = kltp->kl_size;
+ }
+
+ if(rkltp->kl_type==KLT_ARRAY) {
+ size = rkltp->kl_high_bounds -
+ rkltp->kl_low_bounds + 1;
+ if(rkltp->kl_elementtype == NULL){
+ kdb_printf("Incomplete array"
+ " type.\n");
+ return(1);
+ }
+ if(rkltp->kl_elementtype->kl_type ==
+ KLT_POINTER){
+ size *= sizeof(void *);
+ } else {
+ size *= rkltp->kl_elementtype->kl_size;
+ }
+ }
+ if(size){
+ ptr = kl_alloc_block(size);
+ } else {
+ ptr = NULL;
+ }
+ if ((rkltp->kl_type == KLT_BASE) &&
+ !(np->flags & ADDRESS_FLAG)) {
+ switch (size) {
+ case 1:
+ *(unsigned char *)ptr =
+ np->value;
+ break;
+
+ case 2:
+ *(unsigned short *)ptr =
+ np->value;
+ break;
+
+ case 4:
+ *(unsigned int *)ptr =
+ np->value;
+ break;
+
+ case 8:
+ *(unsigned long long *)
+ ptr = np->value;
+ break;
+ }
+ kl_print_type(ptr, rkltp, 0,
+ flags|SUPPRESS_NAME);
+ kl_free_block(ptr);
+ return(1);
+ }
+
+ if(size){
+ addr = np->address;
+ if (invalid_address(addr, size)) {
+ kdb_printf (
+ "invalid address %#lx\n",
+ addr);
+ return 1;
+ }
+ kl_get_block(addr, size, (void *)ptr,
+ (void *)0);
+ if (KL_ERROR) {
+ kl_print_error();
+ kl_free_block(ptr);
+ return(1);
+ }
+ }
+ /* Print out the actual type
+ */
+ switch (rkltp->kl_type) {
+ case KLT_STRUCT:
+ case KLT_UNION:
+ kl_print_type(ptr, rkltp, 0,
+ flags);
+ break;
+
+ case KLT_ARRAY:
+ kl_print_type(ptr, rkltp, 0,
+ flags| SUPPRESS_NAME);
+ break;
+
+ default:
+ kl_print_type(ptr, rkltp, 0,
+ (flags|
+ SUPPRESS_NAME|
+ SUPPRESS_NL));
+ break;
+ }
+ if(ptr){
+ kl_free_block(ptr);
+ }
+ }
+ break;
+ }
+
+ case VADDR:
+ /* If we get here, there was no type info available.
+ * The ADDRESS_FLAG should be set (otherwise we
+ * would have returned an error). So, print out
+ * the address.
+ */
+ kdb_printf("0x%lx", np->address);
+ break;
+
+ default:
+ if (np->node_type == TEXT) {
+ kl_print_string(np->name);
+ if (KL_ERROR) {
+ kl_print_error();
+ return(1);
+ }
+ } else if (np->node_type == CHARACTER) {
+ kdb_printf("\'%c\'", (char)np->value);
+ }
+ break;
+ }
+ return(0);
+}
+
+/*
+ * print_eval_error()
+ */
+void
+print_eval_error(
+ char *cmdname,
+ char *s,
+ char *bad_ptr,
+ uint64_t error,
+ int flags)
+{
+ int i, cmd_len;
+
+ kdb_printf("%s %s\n", cmdname, s);
+ cmd_len = strlen(cmdname);
+
+ if (!bad_ptr) {
+ for (i = 0; i < (strlen(s) + cmd_len); i++) {
+ kdb_printf(" ");
+ }
+ } else {
+ for (i = 0; i < (bad_ptr - s + 1 + cmd_len); i++) {
+ kdb_printf(" ");
+ }
+ }
+ kdb_printf("^ ");
+ switch (error) {
+ case E_OPEN_PAREN :
+ kdb_printf("Too many open parenthesis\n");
+ break;
+
+ case E_CLOSE_PAREN :
+ kdb_printf("Too many close parenthesis\n");
+ break;
+
+ case E_BAD_STRUCTURE :
+ kdb_printf("Invalid structure\n");
+ break;
+
+ case E_MISSING_STRUCTURE :
+ kdb_printf("Missing structure\n");
+ break;
+
+ case E_BAD_MEMBER :
+ kdb_printf("No such member\n");
+ break;
+
+ case E_BAD_OPERATOR :
+ kdb_printf("Invalid operator\n");
+ break;
+
+ case E_MISSING_OPERAND :
+ kdb_printf("Missing operand\n");
+ break;
+
+ case E_BAD_OPERAND :
+ kdb_printf("Invalid operand\n");
+ break;
+
+ case E_BAD_TYPE :
+ kdb_printf("Invalid type\n");
+ if (!have_debug_file) {
+ kdb_printf("no debuginfo file\n");
+ return;
+ }
+ break;
+
+ case E_NOTYPE :
+ kdb_printf("Could not find type information\n");
+ break;
+
+ case E_BAD_POINTER :
+ kdb_printf("Invalid pointer\n");
+ break;
+
+ case E_BAD_INDEX :
+ kdb_printf("Invalid array index\n");
+ break;
+
+ case E_BAD_CHAR :
+ kdb_printf("Invalid character value\n");
+ break;
+
+ case E_BAD_STRING :
+ kdb_printf("Non-termining string\n");
+ break;
+
+ case E_END_EXPECTED :
+ kdb_printf(
+ "Expected end of print statement\n");
+ break;
+
+ case E_BAD_EVAR :
+ kdb_printf("Invalid eval variable\n");
+ break;
+
+ case E_BAD_VALUE :
+ kdb_printf("Invalid value\n");
+ break;
+
+ case E_NO_VALUE :
+ kdb_printf("No value supplied\n");
+ break;
+
+ case E_DIVIDE_BY_ZERO :
+ kdb_printf("Divide by zero\n");
+ break;
+
+ case E_BAD_CAST :
+ kdb_printf("Invalid cast\n");
+ break;
+
+ case E_NO_ADDRESS :
+ kdb_printf("Not an address\n");
+ break;
+
+ case E_SINGLE_QUOTE :
+ kdb_printf("Missing single quote\n");
+ break;
+
+ case E_BAD_WHATIS :
+ kdb_printf("Invalid whatis Operation\n");
+ break;
+
+ case E_NOT_IMPLEMENTED :
+ kdb_printf("Not implemented\n");
+ break;
+
+ default :
+ kdb_printf("Syntax error\n");
+ break;
+ }
+}
+
+/*
+ * single_type()
+ */
+void
+single_type(char *str)
+{
+ char buffer[256], *type_name;
+ kltype_t *kltp;
+ syment_t *sp;
+
+ type_name = buffer;
+ strcpy(type_name, str);
+
+ if (have_debug_file) {
+ if ((kltp = kl_find_type(type_name, KLT_TYPE))) {
+ kl_print_type((void *)NULL, kltp, 0, C_SHOWOFFSET);
+ return;
+ }
+ if ((kltp = kl_find_type(type_name, KLT_TYPEDEF))) {
+ kdb_printf ("typedef %s:\n", type_name);
+ kl_print_type((void *)NULL, kltp, 0, C_SHOWOFFSET);
+ return;
+ }
+ }
+ if ((sp = kl_lkup_symname(type_name))) {
+ kdb_printf ("symbol %s value: %#lx\n", str, sp->s_addr);
+ kl_free_block((void *)sp);
+ return;
+ }
+ kdb_printf("could not find type or symbol information for %s\n",
+ type_name);
+ return;
+}
+
+/*
+ * sizeof_type()
+ */
+void
+sizeof_type(char *str)
+{
+ char buffer[256], *type_name;
+ kltype_t *kltp;
+
+ type_name = buffer;
+ strcpy(type_name, str);
+
+ if ((kltp = kl_find_type(type_name, KLT_TYPE))) {
+ kdb_printf ("%s %d %#x\n", kltp->kl_typestr,
+ kltp->kl_size, kltp->kl_size);
+ return;
+ }
+ if ((kltp = kl_find_type(type_name, KLT_TYPEDEF))) {
+ kdb_printf ("%s %d %#x\n", kltp->kl_typestr,
+ kltp->kl_size, kltp->kl_size);
+ return;
+ }
+ kdb_printf("could not find type information for %s\n", type_name);
+}
+
+EXPORT_SYMBOL(have_debug_file);
+EXPORT_SYMBOL(type_tree);
+EXPORT_SYMBOL(typedef_tree);
+
+#if defined(CONFIG_X86_32)
+/* needed for i386: */
+#include <linux/types.h>
+#include <asm/div64.h>
+/*
+ * Generic C version of full 64 bit by 64 bit division
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * Code generated for this function might be very inefficient
+ * for some CPUs, can be overridden by linking arch-specific
+ * assembly versions such as arch/sparc/lib/udivdi.S
+ */
+uint64_t
+__udivdi3(uint64_t dividend, uint64_t divisor)
+{
+ uint32_t d = divisor;
+ /* Scale divisor to 32 bits */
+ if (divisor > 0xffffffffULL) {
+ unsigned int shift = fls(divisor >> 32);
+ d = divisor >> shift;
+ dividend >>= shift;
+ }
+ /* avoid 64 bit division if possible */
+ if (dividend >> 32)
+ do_div(dividend, d);
+ else
+ dividend = (uint32_t) dividend / d;
+ return dividend;
+}
+
+int64_t
+__divdi3(int64_t dividend, int64_t divisor)
+{
+ int32_t d = divisor;
+ /* Scale divisor to 32 bits */
+ if (divisor > 0xffffffffLL) {
+ unsigned int shift = fls(divisor >> 32);
+ d = divisor >> shift;
+ dividend >>= shift;
+ }
+ /* avoid 64 bit division if possible */
+ if (dividend >> 32)
+ do_div(dividend, d);
+ else
+ dividend = (int32_t) dividend / d;
+ return dividend;
+}
+
+uint64_t
+__umoddi3(uint64_t dividend, uint64_t divisor)
+{
+ return dividend - (__udivdi3(dividend, divisor) * divisor);
+}
+
+int64_t
+__moddi3(int64_t dividend, int64_t divisor)
+{
+ return dividend - (__divdi3(dividend, divisor) * divisor);
+}
+#endif /* CONFIG_x86_32 */
--- /dev/null
+/*
+ * Kernel Debugger Architecture Independent Main Code
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
+ */
+
+/*
+ * Updated for Xscale (R) architecture support
+ * Eddie Dong <eddie.dong@intel.com> 8 Jan 03
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/smp.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nmi.h>
+#include <linux/ptrace.h>
+#include <linux/sysctl.h>
+#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
+#include <linux/dump.h>
+#endif
+#include <linux/cpu.h>
+#include <linux/kdebug.h>
+#ifdef CONFIG_KDB_KDUMP
+#include <linux/kexec.h>
+#endif
+
+#include <acpi/acpi_bus.h>
+
+#include <asm/system.h>
+#include <asm/kdebug.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
++#include <linux/slab.h>
+char kdb_debug_info_filename[256] = {""};
+EXPORT_SYMBOL(kdb_debug_info_filename);
+#define GREP_LEN 256
+char kdb_grep_string[GREP_LEN];
+int kdb_grepping_flag;
+EXPORT_SYMBOL(kdb_grepping_flag);
+int kdb_grep_leading;
+int kdb_grep_trailing;
+
+/*
+ * Kernel debugger state flags
+ */
+volatile int kdb_flags;
+atomic_t kdb_event;
+atomic_t kdb_8250;
+
+/*
+ * kdb_lock protects updates to kdb_initial_cpu. Used to
+ * single thread processors through the kernel debugger.
+ */
+static DEFINE_SPINLOCK(kdb_lock);
+volatile int kdb_initial_cpu = -1; /* cpu number that owns kdb */
+int kdb_seqno = 2; /* how many times kdb has been entered */
+
+volatile int kdb_nextline = 1;
+static volatile int kdb_new_cpu; /* Which cpu to switch to */
+
+volatile int kdb_state[NR_CPUS]; /* Per cpu state */
+
+const struct task_struct *kdb_current_task;
+EXPORT_SYMBOL(kdb_current_task);
+struct pt_regs *kdb_current_regs;
+
+#ifdef CONFIG_KDB_OFF
+int kdb_on = 0; /* Default is off */
+#else
+int kdb_on = 1; /* Default is on */
+#endif /* CONFIG_KDB_OFF */
+
+const char *kdb_diemsg;
+static int kdb_go_count;
+#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
+static unsigned int kdb_continue_catastrophic = CONFIG_KDB_CONTINUE_CATASTROPHIC;
+#else
+static unsigned int kdb_continue_catastrophic = 0;
+#endif
+
+#ifdef kdba_setjmp
+ /*
+ * Must have a setjmp buffer per CPU. Switching cpus will
+ * cause the jump buffer to be setup for the new cpu, and
+ * subsequent switches (and pager aborts) will use the
+ * appropriate per-processor values.
+ */
+kdb_jmp_buf *kdbjmpbuf;
+#endif /* kdba_setjmp */
+
+ /*
+ * kdb_commands describes the available commands.
+ */
+static kdbtab_t *kdb_commands;
+static int kdb_max_commands;
+
+typedef struct _kdbmsg {
+ int km_diag; /* kdb diagnostic */
+ char *km_msg; /* Corresponding message text */
+} kdbmsg_t;
+
+#define KDBMSG(msgnum, text) \
+ { KDB_##msgnum, text }
+
+static kdbmsg_t kdbmsgs[] = {
+ KDBMSG(NOTFOUND,"Command Not Found"),
+ KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
+ KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, 8 is only allowed on 64 bit systems"),
+ KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
+ KDBMSG(NOTENV, "Cannot find environment variable"),
+ KDBMSG(NOENVVALUE, "Environment variable should have value"),
+ KDBMSG(NOTIMP, "Command not implemented"),
+ KDBMSG(ENVFULL, "Environment full"),
+ KDBMSG(ENVBUFFULL, "Environment buffer full"),
+ KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
+#ifdef CONFIG_CPU_XSCALE
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
+#else
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
+#endif
+ KDBMSG(DUPBPT, "Duplicate breakpoint address"),
+ KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
+ KDBMSG(BADMODE, "Invalid IDMODE"),
+ KDBMSG(BADINT, "Illegal numeric value"),
+ KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
+ KDBMSG(BADREG, "Invalid register name"),
+ KDBMSG(BADCPUNUM, "Invalid cpu number"),
+ KDBMSG(BADLENGTH, "Invalid length field"),
+ KDBMSG(NOBP, "No Breakpoint exists"),
+ KDBMSG(BADADDR, "Invalid address"),
+};
+#undef KDBMSG
+
+static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
+
+
+/*
+ * Initial environment. This is all kept static and local to
+ * this file. We don't want to rely on the memory allocation
+ * mechanisms in the kernel, so we use a very limited allocate-only
+ * heap for new and altered environment variables. The entire
+ * environment is limited to a fixed number of entries (add more
+ * to __env[] if required) and a fixed amount of heap (add more to
+ * KDB_ENVBUFSIZE if required).
+ */
+
+static char *__env[] = {
+#if defined(CONFIG_SMP)
+ "PROMPT=[%d]kdb> ",
+ "MOREPROMPT=[%d]more> ",
+#else
+ "PROMPT=kdb> ",
+ "MOREPROMPT=more> ",
+#endif
+ "RADIX=16",
+ "LINES=24",
+ "COLUMNS=80",
+ "MDCOUNT=8", /* lines of md output */
+ "BTARGS=9", /* 9 possible args in bt */
+ KDB_PLATFORM_ENV,
+ "DTABCOUNT=30",
+ "NOSECT=1",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+};
+
+static const int __nenv = (sizeof(__env) / sizeof(char *));
+
+/* external commands: */
+int kdb_debuginfo_print(int argc, const char **argv);
+int kdb_pxhelp(int argc, const char **argv);
+int kdb_walkhelp(int argc, const char **argv);
+int kdb_walk(int argc, const char **argv);
+
+/*
+ * kdb_serial_str is the sequence that the user must enter on a serial
+ * console to invoke kdb. It can be a single character such as "\001"
+ * (control-A) or multiple characters such as "\eKDB". NOTE: All except the
+ * last character are passed through to the application reading from the serial
+ * console.
+ *
+ * I tried to make the sequence a CONFIG_ option but most of CML1 cannot cope
+ * with '\' in strings. CML2 would have been able to do it but we lost CML2.
+ * KAO.
+ */
+const char kdb_serial_str[] = "\eKDB";
+EXPORT_SYMBOL(kdb_serial_str);
+
+struct task_struct *
+kdb_curr_task(int cpu)
+{
+ struct task_struct *p = curr_task(cpu);
+#ifdef _TIF_MCA_INIT
+ struct kdb_running_process *krp = kdb_running_process + cpu;
+ if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && krp->p)
+ p = krp->p;
+#endif
+ return p;
+}
+
+/*
+ * kdbgetenv
+ *
+ * This function will return the character string value of
+ * an environment variable.
+ *
+ * Parameters:
+ * match A character string representing an environment variable.
+ * Outputs:
+ * None.
+ * Returns:
+ * NULL No environment variable matches 'match'
+ * char* Pointer to string value of environment variable.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+char *
+kdbgetenv(const char *match)
+{
+ char **ep = __env;
+ int matchlen = strlen(match);
+ int i;
+
+ for(i=0; i<__nenv; i++) {
+ char *e = *ep++;
+
+ if (!e) continue;
+
+ if ((strncmp(match, e, matchlen) == 0)
+ && ((e[matchlen] == '\0')
+ ||(e[matchlen] == '='))) {
+ char *cp = strchr(e, '=');
+ return (cp ? ++cp :"");
+ }
+ }
+ return NULL;
+}
+
+/*
+ * kdballocenv
+ *
+ * This function is used to allocate bytes for environment entries.
+ *
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required. Must be called with all
+ * processors halted.
+ * Remarks:
+ * We use a static environment buffer (envbuffer) to hold the values
+ * of dynamically generated environment variables (see kdb_set). Buffer
+ * space once allocated is never free'd, so over time, the amount of space
+ * (currently 512 bytes) will be exhausted if env variables are changed
+ * frequently.
+ */
+static char *
+kdballocenv(size_t bytes)
+{
+#define KDB_ENVBUFSIZE 512
+ static char envbuffer[KDB_ENVBUFSIZE];
+ static int envbufsize;
+ char *ep = NULL;
+
+ if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
+ ep = &envbuffer[envbufsize];
+ envbufsize += bytes;
+ }
+ return ep;
+}
+
+/*
+ * kdbgetulenv
+ *
+ * This function will return the value of an unsigned long-valued
+ * environment variable.
+ *
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+static int
+kdbgetulenv(const char *match, unsigned long *value)
+{
+ char *ep;
+
+ ep = kdbgetenv(match);
+ if (!ep) return KDB_NOTENV;
+ if (strlen(ep) == 0) return KDB_NOENVVALUE;
+
+ *value = simple_strtoul(ep, NULL, 0);
+
+ return 0;
+}
+
+/*
+ * kdbgetintenv
+ *
+ * This function will return the value of an integer-valued
+ * environment variable.
+ *
+ * Parameters:
+ * match A character string representing an integer-valued env variable
+ * Outputs:
+ * *value the integer representation of the environment variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+int
+kdbgetintenv(const char *match, int *value) {
+ unsigned long val;
+ int diag;
+
+ diag = kdbgetulenv(match, &val);
+ if (!diag) {
+ *value = (int) val;
+ }
+ return diag;
+}
+
+/*
+ * kdbgetularg
+ *
+ * This function will convert a numeric string
+ * into an unsigned long value.
+ *
+ * Parameters:
+ * arg A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of arg.
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Locking:
+ * No locking considerations required.
+ * Remarks:
+ */
+
+int
+kdbgetularg(const char *arg, unsigned long *value)
+{
+ char *endp;
+ unsigned long val;
+
+ val = simple_strtoul(arg, &endp, 0);
+
+ if (endp == arg) {
+ /*
+ * Try base 16, for us folks too lazy to type the
+ * leading 0x...
+ */
+ val = simple_strtoul(arg, &endp, 16);
+ if (endp == arg)
+ return KDB_BADINT;
+ }
+
+ *value = val;
+
+ return 0;
+}
+
+/*
+ * kdb_set
+ *
+ * This function implements the 'set' command. Alter an existing
+ * environment variable or create a new one.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_set(int argc, const char **argv)
+{
+ int i;
+ char *ep;
+ size_t varlen, vallen;
+
+ /*
+ * we can be invoked two ways:
+ * set var=value argv[1]="var", argv[2]="value"
+ * set var = value argv[1]="var", argv[2]="=", argv[3]="value"
+ * - if the latter, shift 'em down.
+ */
+ if (argc == 3) {
+ argv[2] = argv[3];
+ argc--;
+ }
+
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+
+ /*
+ * Check for internal variables
+ */
+ if (strcmp(argv[1], "KDBDEBUG") == 0) {
+ unsigned int debugflags;
+ char *cp;
+
+ debugflags = simple_strtoul(argv[2], &cp, 0);
+ if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
+ kdb_printf("kdb: illegal debug flags '%s'\n",
+ argv[2]);
+ return 0;
+ }
+ kdb_flags = (kdb_flags & ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
+ | (debugflags << KDB_DEBUG_FLAG_SHIFT);
+
+ return 0;
+ }
+
+ /*
+ * Tokenizer squashed the '=' sign. argv[1] is variable
+ * name, argv[2] = value.
+ */
+ varlen = strlen(argv[1]);
+ vallen = strlen(argv[2]);
+ ep = kdballocenv(varlen + vallen + 2);
+ if (ep == (char *)0)
+ return KDB_ENVBUFFULL;
+
+ sprintf(ep, "%s=%s", argv[1], argv[2]);
+
+ ep[varlen+vallen+1]='\0';
+
+ for(i=0; i<__nenv; i++) {
+ if (__env[i]
+ && ((strncmp(__env[i], argv[1], varlen)==0)
+ && ((__env[i][varlen] == '\0')
+ || (__env[i][varlen] == '=')))) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ /*
+ * Wasn't existing variable. Fit into slot.
+ */
+ for(i=0; i<__nenv-1; i++) {
+ if (__env[i] == (char *)0) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ return KDB_ENVFULL;
+}
+
+static int
+kdb_check_regs(void)
+{
+ if (!kdb_current_regs) {
+ kdb_printf("No current kdb registers."
+ " You may need to select another task\n");
+ return KDB_BADREG;
+ }
+ return 0;
+}
+
+/*
+ * kdbgetaddrarg
+ *
+ * This function is responsible for parsing an
+ * address-expression and returning the value of
+ * the expression, symbol name, and offset to the caller.
+ *
+ * The argument may consist of a numeric value (decimal or
+ * hexidecimal), a symbol name, a register name (preceeded
+ * by the percent sign), an environment variable with a numeric
+ * value (preceeded by a dollar sign) or a simple arithmetic
+ * expression consisting of a symbol name, +/-, and a numeric
+ * constant value (offset).
+ *
+ * Parameters:
+ * argc - count of arguments in argv
+ * argv - argument vector
+ * *nextarg - index to next unparsed argument in argv[]
+ * regs - Register state at time of KDB entry
+ * Outputs:
+ * *value - receives the value of the address-expression
+ * *offset - receives the offset specified, if any
+ * *name - receives the symbol name, if any
+ * *nextarg - index to next unparsed argument in argv[]
+ *
+ * Returns:
+ * zero is returned on success, a kdb diagnostic code is
+ * returned on error.
+ *
+ * Locking:
+ * No locking requirements.
+ *
+ * Remarks:
+ *
+ */
+
+int
+kdbgetaddrarg(int argc, const char **argv, int *nextarg,
+ kdb_machreg_t *value, long *offset,
+ char **name)
+{
+ kdb_machreg_t addr;
+ unsigned long off = 0;
+ int positive;
+ int diag;
+ int found = 0;
+ char *symname;
+ char symbol = '\0';
+ char *cp;
+ kdb_symtab_t symtab;
+
+ /*
+ * Process arguments which follow the following syntax:
+ *
+ * symbol | numeric-address [+/- numeric-offset]
+ * %register
+ * $environment-variable
+ */
+
+ if (*nextarg > argc) {
+ return KDB_ARGCOUNT;
+ }
+
+ symname = (char *)argv[*nextarg];
+
+ /*
+ * If there is no whitespace between the symbol
+ * or address and the '+' or '-' symbols, we
+ * remember the character and replace it with a
+ * null so the symbol/value can be properly parsed
+ */
+ if ((cp = strpbrk(symname, "+-")) != NULL) {
+ symbol = *cp;
+ *cp++ = '\0';
+ }
+
+ if (symname[0] == '$') {
+ diag = kdbgetulenv(&symname[1], &addr);
+ if (diag)
+ return diag;
+ } else if (symname[0] == '%') {
+ if ((diag = kdb_check_regs()))
+ return diag;
+ diag = kdba_getregcontents(&symname[1], kdb_current_regs, &addr);
+ if (diag)
+ return diag;
+ } else {
+ found = kdbgetsymval(symname, &symtab);
+ if (found) {
+ addr = symtab.sym_start;
+ } else {
+ diag = kdbgetularg(argv[*nextarg], &addr);
+ if (diag)
+ return diag;
+ }
+ }
+
+ if (!found)
+ found = kdbnearsym(addr, &symtab);
+
+ (*nextarg)++;
+
+ if (name)
+ *name = symname;
+ if (value)
+ *value = addr;
+ if (offset && name && *name)
+ *offset = addr - symtab.sym_start;
+
+ if ((*nextarg > argc)
+ && (symbol == '\0'))
+ return 0;
+
+ /*
+ * check for +/- and offset
+ */
+
+ if (symbol == '\0') {
+ if ((argv[*nextarg][0] != '+')
+ && (argv[*nextarg][0] != '-')) {
+ /*
+ * Not our argument. Return.
+ */
+ return 0;
+ } else {
+ positive = (argv[*nextarg][0] == '+');
+ (*nextarg)++;
+ }
+ } else
+ positive = (symbol == '+');
+
+ /*
+ * Now there must be an offset!
+ */
+ if ((*nextarg > argc)
+ && (symbol == '\0')) {
+ return KDB_INVADDRFMT;
+ }
+
+ if (!symbol) {
+ cp = (char *)argv[*nextarg];
+ (*nextarg)++;
+ }
+
+ diag = kdbgetularg(cp, &off);
+ if (diag)
+ return diag;
+
+ if (!positive)
+ off = -off;
+
+ if (offset)
+ *offset += off;
+
+ if (value)
+ *value += off;
+
+ return 0;
+}
+
+static void
+kdb_cmderror(int diag)
+{
+ int i;
+
+ if (diag >= 0) {
+ kdb_printf("no error detected (diagnostic is %d)\n", diag);
+ return;
+ }
+
+ for(i=0; i<__nkdb_err; i++) {
+ if (kdbmsgs[i].km_diag == diag) {
+ kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
+ return;
+ }
+ }
+
+ kdb_printf("Unknown diag %d\n", -diag);
+}
+
+/*
+ * kdb_defcmd, kdb_defcmd2
+ *
+ * This function implements the 'defcmd' command which defines one
+ * command as a set of other commands, terminated by endefcmd.
+ * kdb_defcmd processes the initial 'defcmd' command, kdb_defcmd2
+ * is invoked from kdb_parse for the following commands until
+ * 'endefcmd'.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+struct defcmd_set {
+ int count;
+ int usable;
+ char *name;
+ char *usage;
+ char *help;
+ char **command;
+};
+static struct defcmd_set *defcmd_set;
+static int defcmd_set_count;
+static int defcmd_in_progress;
+
+/* Forward references */
+static int kdb_exec_defcmd(int argc, const char **argv);
+
+static int
+kdb_defcmd2(const char *cmdstr, const char *argv0)
+{
+ struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
+ char **save_command = s->command;
+ if (strcmp(argv0, "endefcmd") == 0) {
+ defcmd_in_progress = 0;
+ if (!s->count)
+ s->usable = 0;
+ if (s->usable)
+ kdb_register(s->name, kdb_exec_defcmd, s->usage, s->help, 0);
+ return 0;
+ }
+ if (!s->usable)
+ return KDB_NOTIMP;
+ s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+ if (!s->command) {
+ kdb_printf("Could not allocate new kdb_defcmd table for %s\n", cmdstr);
+ s->usable = 0;
+ return KDB_NOTIMP;
+ }
+ memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
+ s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
+ kfree(save_command);
+ return 0;
+}
+
+static int
+kdb_defcmd(int argc, const char **argv)
+{
+ struct defcmd_set *save_defcmd_set = defcmd_set, *s;
+ if (defcmd_in_progress) {
+ kdb_printf("kdb: nested defcmd detected, assuming missing endefcmd\n");
+ kdb_defcmd2("endefcmd", "endefcmd");
+ }
+ if (argc == 0) {
+ int i;
+ for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
+ kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name, s->usage, s->help);
+ for (i = 0; i < s->count; ++i)
+ kdb_printf("%s", s->command[i]);
+ kdb_printf("endefcmd\n");
+ }
+ return 0;
+ }
+ if (argc != 3)
+ return KDB_ARGCOUNT;
+ defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set), GFP_KDB);
+ if (!defcmd_set) {
+ kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
+ defcmd_set = save_defcmd_set;
+ return KDB_NOTIMP;
+ }
+ memcpy(defcmd_set, save_defcmd_set, defcmd_set_count * sizeof(*defcmd_set));
+ kfree(save_defcmd_set);
+ s = defcmd_set + defcmd_set_count;
+ memset(s, 0, sizeof(*s));
+ s->usable = 1;
+ s->name = kdb_strdup(argv[1], GFP_KDB);
+ s->usage = kdb_strdup(argv[2], GFP_KDB);
+ s->help = kdb_strdup(argv[3], GFP_KDB);
+ if (s->usage[0] == '"') {
+ strcpy(s->usage, s->usage+1);
+ s->usage[strlen(s->usage)-1] = '\0';
+ }
+ if (s->help[0] == '"') {
+ strcpy(s->help, s->help+1);
+ s->help[strlen(s->help)-1] = '\0';
+ }
+ ++defcmd_set_count;
+ defcmd_in_progress = 1;
+ return 0;
+}
+
+/*
+ * kdb_exec_defcmd
+ *
+ * Execute the set of commands associated with this defcmd name.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_exec_defcmd(int argc, const char **argv)
+{
+ int i, ret;
+ struct defcmd_set *s;
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+ for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
+ if (strcmp(s->name, argv[0]) == 0)
+ break;
+ }
+ if (i == defcmd_set_count) {
+ kdb_printf("kdb_exec_defcmd: could not find commands for %s\n", argv[0]);
+ return KDB_NOTIMP;
+ }
+ for (i = 0; i < s->count; ++i) {
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+ kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
+ if ((ret = kdb_parse(s->command[i])))
+ return ret;
+ }
+ return 0;
+}
+
+/* Command history */
+#define KDB_CMD_HISTORY_COUNT 32
+#define CMD_BUFLEN 200 /* kdb_printf: max printline size == 256 */
+static unsigned int cmd_head=0, cmd_tail=0;
+static unsigned int cmdptr;
+static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
+static char cmd_cur[CMD_BUFLEN];
+
+/*
+ * The "str" argument may point to something like | grep xyz
+ *
+ */
+static void
+parse_grep(const char *str)
+{
+ int len;
+ char *cp = (char *)str, *cp2;
+
+ /* sanity check: we should have been called with the \ first */
+ if (*cp != '|')
+ return;
+ cp++;
+ while (isspace(*cp)) cp++;
+ if (strncmp(cp,"grep ",5)) {
+ kdb_printf ("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ cp += 5;
+ while (isspace(*cp)) cp++;
+ cp2 = strchr(cp, '\n');
+ if (cp2)
+ *cp2 = '\0'; /* remove the trailing newline */
+ len = strlen(cp);
+ if (len == 0) {
+ kdb_printf ("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ /* now cp points to a nonzero length search string */
+ if (*cp == '"') {
+ /* allow it be "x y z" by removing the "'s - there must
+ be two of them */
+ cp++;
+ cp2 = strchr(cp, '"');
+ if (!cp2) {
+ kdb_printf ("invalid quoted string, see grephelp\n");
+ return;
+ }
+ *cp2 = '\0'; /* end the string where the 2nd " was */
+ }
+ kdb_grep_leading = 0;
+ if (*cp == '^') {
+ kdb_grep_leading = 1;
+ cp++;
+ }
+ len = strlen(cp);
+ kdb_grep_trailing = 0;
+ if (*(cp+len-1) == '$') {
+ kdb_grep_trailing = 1;
+ *(cp+len-1) = '\0';
+ }
+ len = strlen(cp);
+ if (!len) return;
+ if (len >= GREP_LEN) {
+ kdb_printf ("search string too long\n");
+ return;
+ }
+ strcpy(kdb_grep_string, cp);
+ kdb_grepping_flag++;
+ return;
+}
+
+/*
+ * kdb_parse
+ *
+ * Parse the command line, search the command table for a
+ * matching command and invoke the command function.
+ * This function may be called recursively, if it is, the second call
+ * will overwrite argv and cbuf. It is the caller's responsibility to
+ * save their argv if they recursively call kdb_parse().
+ *
+ * Parameters:
+ * cmdstr The input command line to be parsed.
+ * regs The registers at the time kdb was entered.
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero for success, a kdb diagnostic if failure.
+ * Locking:
+ * None.
+ * Remarks:
+ * Limited to 20 tokens.
+ *
+ * Real rudimentary tokenization. Basically only whitespace
+ * is considered a token delimeter (but special consideration
+ * is taken of the '=' sign as used by the 'set' command).
+ *
+ * The algorithm used to tokenize the input string relies on
+ * there being at least one whitespace (or otherwise useless)
+ * character between tokens as the character immediately following
+ * the token is altered in-place to a null-byte to terminate the
+ * token string.
+ */
+
+#define MAXARGC 20
+
+int
+kdb_parse(const char *cmdstr)
+{
+ static char *argv[MAXARGC];
+ static int argc = 0;
+ static char cbuf[CMD_BUFLEN+2];
+ char *cp;
+ char *cpp, quoted;
+ kdbtab_t *tp;
+ int i, escaped, ignore_errors = 0, check_grep;
+
+ /*
+ * First tokenize the command string.
+ */
+ cp = (char *)cmdstr;
+ kdb_grepping_flag = check_grep = 0;
+
+ if (KDB_FLAG(CMD_INTERRUPT)) {
+ /* Previous command was interrupted, newline must not repeat the command */
+ KDB_FLAG_CLEAR(CMD_INTERRUPT);
+ argc = 0; /* no repeat */
+ }
+
+ if (*cp != '\n' && *cp != '\0') {
+ argc = 0;
+ cpp = cbuf;
+ while (*cp) {
+ /* skip whitespace */
+ while (isspace(*cp)) cp++;
+ if ((*cp == '\0') || (*cp == '\n') || (*cp == '#' && !defcmd_in_progress))
+ break;
+ /* special case: check for | grep pattern */
+ if (*cp == '|') {
+ check_grep++;
+ break;
+ }
+ if (cpp >= cbuf + CMD_BUFLEN) {
+ kdb_printf("kdb_parse: command buffer overflow, command ignored\n%s\n", cmdstr);
+ return KDB_NOTFOUND;
+ }
+ if (argc >= MAXARGC - 1) {
+ kdb_printf("kdb_parse: too many arguments, command ignored\n%s\n", cmdstr);
+ return KDB_NOTFOUND;
+ }
+ argv[argc++] = cpp;
+ escaped = 0;
+ quoted = '\0';
+ /* Copy to next unquoted and unescaped whitespace or '=' */
+ while (*cp && *cp != '\n' && (escaped || quoted || !isspace(*cp))) {
+ if (cpp >= cbuf + CMD_BUFLEN)
+ break;
+ if (escaped) {
+ escaped = 0;
+ *cpp++ = *cp++;
+ continue;
+ }
+ if (*cp == '\\') {
+ escaped = 1;
+ ++cp;
+ continue;
+ }
+ if (*cp == quoted) {
+ quoted = '\0';
+ } else if (*cp == '\'' || *cp == '"') {
+ quoted = *cp;
+ }
+ if ((*cpp = *cp++) == '=' && !quoted)
+ break;
+ ++cpp;
+ }
+ *cpp++ = '\0'; /* Squash a ws or '=' character */
+ }
+ }
+ if (!argc)
+ return 0;
+ if (check_grep)
+ parse_grep(cp);
+ if (defcmd_in_progress) {
+ int result = kdb_defcmd2(cmdstr, argv[0]);
+ if (!defcmd_in_progress) {
+ argc = 0; /* avoid repeat on endefcmd */
+ *(argv[0]) = '\0';
+ }
+ return result;
+ }
+ if (argv[0][0] == '-' && argv[0][1] && (argv[0][1] < '0' || argv[0][1] > '9')) {
+ ignore_errors = 1;
+ ++argv[0];
+ }
+
+ for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
+ if (tp->cmd_name) {
+ /*
+ * If this command is allowed to be abbreviated,
+ * check to see if this is it.
+ */
+
+ if (tp->cmd_minlen
+ && (strlen(argv[0]) <= tp->cmd_minlen)) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ tp->cmd_minlen) == 0) {
+ break;
+ }
+ }
+
+ if (strcmp(argv[0], tp->cmd_name)==0) {
+ break;
+ }
+ }
+ }
+
+ /*
+ * If we don't find a command by this name, see if the first
+ * few characters of this match any of the known commands.
+ * e.g., md1c20 should match md.
+ */
+ if (i == kdb_max_commands) {
+ for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
+ if (tp->cmd_name) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ strlen(tp->cmd_name))==0) {
+ break;
+ }
+ }
+ }
+ }
+
+ if (i < kdb_max_commands) {
+ int result;
+ KDB_STATE_SET(CMD);
+ result = (*tp->cmd_func)(argc-1,
+ (const char**)argv);
+ if (result && ignore_errors && result > KDB_CMD_GO)
+ result = 0;
+ KDB_STATE_CLEAR(CMD);
+ switch (tp->cmd_repeat) {
+ case KDB_REPEAT_NONE:
+ argc = 0;
+ if (argv[0])
+ *(argv[0]) = '\0';
+ break;
+ case KDB_REPEAT_NO_ARGS:
+ argc = 1;
+ if (argv[1])
+ *(argv[1]) = '\0';
+ break;
+ case KDB_REPEAT_WITH_ARGS:
+ break;
+ }
+ return result;
+ }
+
+ /*
+ * If the input with which we were presented does not
+ * map to an existing command, attempt to parse it as an
+ * address argument and display the result. Useful for
+ * obtaining the address of a variable, or the nearest symbol
+ * to an address contained in a register.
+ */
+ {
+ kdb_machreg_t value;
+ char *name = NULL;
+ long offset;
+ int nextarg = 0;
+
+ if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
+ &value, &offset, &name)) {
+ return KDB_NOTFOUND;
+ }
+
+ kdb_printf("%s = ", argv[0]);
+ kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
+ kdb_printf("\n");
+ return 0;
+ }
+}
+
+
+static int
+handle_ctrl_cmd(char *cmd)
+{
+#define CTRL_P 16
+#define CTRL_N 14
+
+ /* initial situation */
+ if (cmd_head == cmd_tail) return 0;
+
+ switch(*cmd) {
+ case CTRL_P:
+ if (cmdptr != cmd_tail)
+ cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ case CTRL_N:
+ if (cmdptr != cmd_head)
+ cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * kdb_do_dump
+ *
+ * Call the dump() function if the kernel is configured for LKCD.
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None. dump() may or may not return.
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static void
+kdb_do_dump(void)
+{
+#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
+ kdb_printf("Forcing dump (if configured)\n");
+ console_loglevel = 8; /* to see the dump messages */
+ dump("kdb_do_dump");
+#endif
+}
+
+/*
+ * kdb_reboot
+ *
+ * This function implements the 'reboot' command. Reboot the system
+ * immediately.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Shouldn't return from this function.
+ */
+
+static int
+kdb_reboot(int argc, const char **argv)
+{
+ emergency_restart();
+ kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
+ while (1) {};
+ /* NOTREACHED */
+ return 0;
+}
+
+#ifdef CONFIG_KDB_KDUMP
+
+int kdb_kdump_state = KDB_KDUMP_RESET; /* KDB kdump state */
+
+static int kdb_cpu(int argc, const char **argv);
+
+/*
+ * kdb_kdump_check
+ *
+ * This is where the kdump on monarch cpu is handled.
+ *
+ */
+void kdb_kdump_check(struct pt_regs *regs)
+{
+ if (kdb_kdump_state != KDB_KDUMP_RESET) {
+ crash_kexec(regs);
+
+ /* If the call above returned then something
+ didn't work */
+ kdb_printf("kdb_kdump_check: crash_kexec failed!\n");
+ kdb_printf(" Please check if the kdump kernel has been properly loaded\n");
+ kdb_kdump_state = KDB_KDUMP_RESET;
+ }
+}
+
+
+/*
+ * kdb_kdump
+ *
+ * This function implements the 'kdump' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * envp environment vector
+ * regs registers at time kdb was entered.
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Shouldn't return from this function.
+ */
+
+static int
+kdb_kdump(int argc, const char **argv)
+{
+ char cpu_id[6]; /* up to 99,999 cpus */
+ const char *cpu_argv[] = {NULL, cpu_id, NULL};
+ int ret;
+
+ kdb_kdump_state = KDB_KDUMP_KDUMP;
+ /* Switch back to the initial cpu before process kdump command */
+ if (smp_processor_id() != kdb_initial_cpu) {
+ sprintf(cpu_id, "%d", kdb_initial_cpu);
+ ret = kdb_cpu(1, cpu_argv);
+ if (ret != KDB_CMD_CPU) {
+ kdb_printf("kdump: Failed to switch to initial cpu %d;"
+ " aborted\n", kdb_initial_cpu);
+ kdb_kdump_state = KDB_KDUMP_RESET;
+ }
+ } else
+ ret = KDB_CMD_CPU;
+
+ return ret;
+}
+
+#endif /* CONFIG_KDB_KDUMP */
+
+static int
+kdb_quiet(int reason)
+{
+ return (reason == KDB_REASON_CPU_UP || reason == KDB_REASON_SILENT);
+}
+
+/*
+ * kdb_local
+ *
+ * The main code for kdb. This routine is invoked on a specific
+ * processor, it is not global. The main kdb() routine ensures
+ * that only one processor at a time is in this routine. This
+ * code is called with the real reason code on the first entry
+ * to a kdb session, thereafter it is called with reason SWITCH,
+ * even if the user goes back to the original cpu.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint. NULL
+ * for reason SILENT or CPU_UP, otherwise valid.
+ * db_result Result code from the break or debug point.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * KDB_CMD_GO User typed 'go'.
+ * KDB_CMD_CPU User switched to another cpu.
+ * KDB_CMD_SS Single step.
+ * KDB_CMD_SSB Single step until branch.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+static int
+kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_dbtrap_t db_result)
+{
+ char *cmdbuf;
+ int diag;
+ struct task_struct *kdb_current = kdb_curr_task(smp_processor_id());
+
+#ifdef CONFIG_KDB_KDUMP
+ kdb_kdump_check(regs);
+#endif
+
+ /* If kdb has been entered for an event which has been/will be
+ * recovered then silently return. We have to get this far into kdb in
+ * order to synchronize all the cpus, typically only one cpu (monarch)
+ * knows that the event is recoverable but the other cpus (slaves) may
+ * also be driven into kdb before that decision is made by the monarch.
+ *
+ * To pause in kdb even for recoverable events, 'set RECOVERY_PAUSE 1'
+ */
+ KDB_DEBUG_STATE("kdb_local 1", reason);
+ if (reason == KDB_REASON_ENTER
+ && KDB_FLAG(RECOVERY)
+ && !KDB_FLAG(CATASTROPHIC)) {
+ int recovery_pause = 0;
+ kdbgetintenv("RECOVERY_PAUSE", &recovery_pause);
+ if (recovery_pause == 0)
+ reason = KDB_REASON_SILENT;
+ else
+ kdb_printf("%s: Recoverable error detected but"
+ " RECOVERY_PAUSE is set, staying in KDB\n",
+ __FUNCTION__);
+ }
+
+ KDB_DEBUG_STATE("kdb_local 2", reason);
+ kdb_go_count = 0;
+ if (kdb_quiet(reason)) {
+ /* no message */
+ } else if (reason == KDB_REASON_DEBUG) {
+ /* special case below */
+ } else {
+ kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", smp_processor_id());
+#endif
+ }
+
+ switch (reason) {
+ case KDB_REASON_DEBUG:
+ {
+ /*
+ * If re-entering kdb after a single step
+ * command, don't print the message.
+ */
+ switch(db_result) {
+ case KDB_DB_BPT:
+ kdb_printf("\nEntering kdb (0x%p, pid %d) ", kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", smp_processor_id());
+#endif
+ kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ break;
+ case KDB_DB_SSB:
+ /*
+ * In the midst of ssb command. Just return.
+ */
+ KDB_DEBUG_STATE("kdb_local 3", reason);
+ return KDB_CMD_SSB; /* Continue with SSB command */
+
+ break;
+ case KDB_DB_SS:
+ break;
+ case KDB_DB_SSBPT:
+ KDB_DEBUG_STATE("kdb_local 4", reason);
+ return 1; /* kdba_db_trap did the work */
+ default:
+ kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
+ db_result);
+ break;
+ }
+
+ }
+ break;
+ case KDB_REASON_ENTER:
+ if (KDB_STATE(KEYBOARD))
+ kdb_printf("due to Keyboard Entry\n");
+ else {
+ kdb_printf("due to KDB_ENTER()\n");
+ }
+ break;
+ case KDB_REASON_KEYBOARD:
+ KDB_STATE_SET(KEYBOARD);
+ kdb_printf("due to Keyboard Entry\n");
+ break;
+ case KDB_REASON_ENTER_SLAVE: /* drop through, slaves only get released via cpu switch */
+ case KDB_REASON_SWITCH:
+ kdb_printf("due to cpu switch\n");
+ if (KDB_STATE(GO_SWITCH)) {
+ KDB_STATE_CLEAR(GO_SWITCH);
+ KDB_DEBUG_STATE("kdb_local 5", reason);
+ return KDB_CMD_GO;
+ }
+ break;
+ case KDB_REASON_OOPS:
+ kdb_printf("Oops: %s\n", kdb_diemsg);
+ kdb_printf("due to oops @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ kdba_dumpregs(regs, NULL, NULL);
+ break;
+ case KDB_REASON_NMI:
+ kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n",
+ kdba_getpc(regs));
+ kdba_dumpregs(regs, NULL, NULL);
+ break;
+ case KDB_REASON_BREAK:
+ kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ /*
+ * Determine if this breakpoint is one that we
+ * are interested in.
+ */
+ if (db_result != KDB_DB_BPT) {
+ kdb_printf("kdb: error return from kdba_bp_trap: %d\n", db_result);
+ KDB_DEBUG_STATE("kdb_local 6", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+ break;
+ case KDB_REASON_RECURSE:
+ kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
+ break;
+ case KDB_REASON_CPU_UP:
+ case KDB_REASON_SILENT:
+ KDB_DEBUG_STATE("kdb_local 7", reason);
+ if (reason == KDB_REASON_CPU_UP)
+ kdba_cpu_up();
+ return KDB_CMD_GO; /* Silent entry, silent exit */
+ break;
+ default:
+ kdb_printf("kdb: unexpected reason code: %d\n", reason);
+ KDB_DEBUG_STATE("kdb_local 8", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+
+ kdba_local_arch_setup();
+
+ kdba_set_current_task(kdb_current);
+
+ while (1) {
+ /*
+ * Initialize pager context.
+ */
+ kdb_nextline = 1;
+ KDB_STATE_CLEAR(SUPPRESS);
+#ifdef kdba_setjmp
+ /*
+ * Use kdba_setjmp/kdba_longjmp to break out of
+ * the pager early and to attempt to recover from kdb errors.
+ */
+ KDB_STATE_CLEAR(LONGJMP);
+ if (kdbjmpbuf) {
+ if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) {
+ /* Command aborted (usually in pager) */
+ continue;
+ }
+ else
+ KDB_STATE_SET(LONGJMP);
+ }
+#endif /* kdba_setjmp */
+
+ cmdbuf = cmd_cur;
+ *cmdbuf = '\0';
+ *(cmd_hist[cmd_head])='\0';
+
+ if (KDB_FLAG(ONLY_DO_DUMP)) {
+ /* kdb is off but a catastrophic error requires a dump.
+ * Take the dump and reboot.
+ * Turn on logging so the kdb output appears in the log
+ * buffer in the dump.
+ */
+ const char *setargs[] = { "set", "LOGGING", "1" };
+ kdb_set(2, setargs);
+ kdb_do_dump();
+ kdb_reboot(0, NULL);
+ /*NOTREACHED*/
+ }
+
+do_full_getstr:
+#if defined(CONFIG_SMP)
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), smp_processor_id());
+#else
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
+#endif
+ if (defcmd_in_progress)
+ strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
+
+ /*
+ * Fetch command from keyboard
+ */
+ cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
+ if (*cmdbuf != '\n') {
+ if (*cmdbuf < 32) {
+ if(cmdptr == cmd_head) {
+ strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
+ *(cmd_hist[cmd_head]+strlen(cmd_hist[cmd_head])-1) = '\0';
+ }
+ if(!handle_ctrl_cmd(cmdbuf))
+ *(cmd_cur+strlen(cmd_cur)-1) = '\0';
+ cmdbuf = cmd_cur;
+ goto do_full_getstr;
+ }
+ else
+ strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
+
+ cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
+ if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
+
+ }
+
+ cmdptr = cmd_head;
+ diag = kdb_parse(cmdbuf);
+ if (diag == KDB_NOTFOUND) {
+ kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
+ diag = 0;
+ }
+ if (diag == KDB_CMD_GO
+ || diag == KDB_CMD_CPU
+ || diag == KDB_CMD_SS
+ || diag == KDB_CMD_SSB)
+ break;
+
+ if (diag)
+ kdb_cmderror(diag);
+ }
+
+ kdba_local_arch_cleanup();
+
+ KDB_DEBUG_STATE("kdb_local 9", diag);
+ return diag;
+}
+
+
+/*
+ * kdb_print_state
+ *
+ * Print the state data for the current processor for debugging.
+ *
+ * Inputs:
+ * text Identifies the debug point
+ * value Any integer value to be printed, e.g. reason code.
+ * Returns:
+ * None.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+void kdb_print_state(const char *text, int value)
+{
+ kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
+ text, smp_processor_id(), value, kdb_initial_cpu, kdb_state[smp_processor_id()]);
+}
+
+/*
+ * kdb_previous_event
+ *
+ * Return a count of cpus that are leaving kdb, i.e. the number
+ * of processors that are still handling the previous kdb event.
+ *
+ * Inputs:
+ * None.
+ * Returns:
+ * Count of cpus in previous event.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+static int
+kdb_previous_event(void)
+{
+ int i, leaving = 0;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (KDB_STATE_CPU(LEAVING, i))
+ ++leaving;
+ }
+ return leaving;
+}
+
+/*
+ * kdb_wait_for_cpus
+ *
+ * Invoked once at the start of a kdb event, from the controlling cpu. Wait a
+ * short period for the other cpus to enter kdb state.
+ *
+ * Inputs:
+ * none
+ * Returns:
+ * none
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+int kdb_wait_for_cpus_secs;
+
+static void
+kdb_wait_for_cpus(void)
+{
+#ifdef CONFIG_SMP
+ int online = 0, kdb_data = 0, prev_kdb_data = 0, c, time;
+ mdelay(100);
+ for (time = 0; time < kdb_wait_for_cpus_secs; ++time) {
+ online = 0;
+ kdb_data = 0;
+ for_each_online_cpu(c) {
+ ++online;
+ if (kdb_running_process[c].seqno >= kdb_seqno - 1)
+ ++kdb_data;
+ }
+ if (online == kdb_data)
+ break;
+ if (prev_kdb_data != kdb_data) {
+ kdb_nextline = 0; /* no prompt yet */
+ kdb_printf(" %d out of %d cpus in kdb, waiting for the rest, timeout in %d second(s)\n",
+ kdb_data, online, kdb_wait_for_cpus_secs - time);
+ prev_kdb_data = kdb_data;
+ }
+ touch_nmi_watchdog();
+ mdelay(1000);
+ /* Architectures may want to send a more forceful interrupt */
+ if (time == min(kdb_wait_for_cpus_secs / 2, 5))
+ kdba_wait_for_cpus();
+ if (time % 4 == 0)
+ kdb_printf(".");
+ }
+ if (time) {
+ int wait = online - kdb_data;
+ if (wait == 0)
+ kdb_printf("All cpus are now in kdb\n");
+ else
+ kdb_printf("%d cpu%s not in kdb, %s state is unknown\n",
+ wait,
+ wait == 1 ? " is" : "s are",
+ wait == 1 ? "its" : "their");
+ }
+ /* give back the vector we took over in smp_kdb_stop */
+ kdba_giveback_vector(KDB_VECTOR);
+#endif /* CONFIG_SMP */
+}
+
+/*
+ * kdb_main_loop
+ *
+ * The main kdb loop. After initial setup and assignment of the controlling
+ * cpu, all cpus are in this loop. One cpu is in control and will issue the kdb
+ * prompt, the others will spin until 'go' or cpu switch.
+ *
+ * To get a consistent view of the kernel stacks for all processes, this routine
+ * is invoked from the main kdb code via an architecture specific routine.
+ * kdba_main_loop is responsible for making the kernel stacks consistent for all
+ * processes, there should be no difference between a blocked process and a
+ * running process as far as kdb is concerned.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * reason2 kdb's current reason code. Initially error but can change
+ * acording to kdb state.
+ * db_result Result code from break or debug point.
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it
+ * should always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * Locking:
+ * none
+ * Remarks:
+ * none
+ */
+
+int
+kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ kdb_dbtrap_t db_result, struct pt_regs *regs)
+{
+ int result = 1;
+ /* Stay in kdb() until 'go', 'ss[b]' or an error */
+ while (1) {
+ /*
+ * All processors except the one that is in control
+ * will spin here.
+ */
+ KDB_DEBUG_STATE("kdb_main_loop 1", reason);
+ while (KDB_STATE(HOLD_CPU)) {
+ /* state KDB is turned off by kdb_cpu to see if the
+ * other cpus are still live, each cpu in this loop
+ * turns it back on.
+ */
+ if (!KDB_STATE(KDB)) {
+ KDB_STATE_SET(KDB);
+ }
+
+#if defined(CONFIG_KDB_KDUMP)
+ if (KDB_STATE(KEXEC)) {
+ struct pt_regs r;
+ if (regs == NULL)
+ regs = &r;
+
+ kdba_kdump_shutdown_slave(regs);
+ return 0;
+ }
+#endif
+ }
+
+ KDB_STATE_CLEAR(SUPPRESS);
+ KDB_DEBUG_STATE("kdb_main_loop 2", reason);
+ if (KDB_STATE(LEAVING))
+ break; /* Another cpu said 'go' */
+
+ if (!kdb_quiet(reason))
+ kdb_wait_for_cpus();
+ /* Still using kdb, this processor is in control */
+ result = kdb_local(reason2, error, regs, db_result);
+ KDB_DEBUG_STATE("kdb_main_loop 3", result);
+
+ if (result == KDB_CMD_CPU) {
+ /* Cpu switch, hold the current cpu, release the target one. */
+ reason2 = KDB_REASON_SWITCH;
+ KDB_STATE_SET(HOLD_CPU);
+ KDB_STATE_CLEAR_CPU(HOLD_CPU, kdb_new_cpu);
+ continue;
+ }
+
+ if (result == KDB_CMD_SS) {
+ KDB_STATE_SET(DOING_SS);
+ break;
+ }
+
+ if (result == KDB_CMD_SSB) {
+ KDB_STATE_SET(DOING_SS);
+ KDB_STATE_SET(DOING_SSB);
+ break;
+ }
+
+ if (result && result != 1 && result != KDB_CMD_GO)
+ kdb_printf("\nUnexpected kdb_local return code %d\n", result);
+
+ KDB_DEBUG_STATE("kdb_main_loop 4", reason);
+ break;
+ }
+ if (KDB_STATE(DOING_SS))
+ KDB_STATE_CLEAR(SSBPT);
+ return result;
+}
+
+/* iapc_boot_arch was defined in ACPI 2.0, FADT revision 3 onwards. For any
+ * FADT prior to revision 3, we have to assume that we have an i8042 I/O
+ * device. ACPI initialises after KDB initialises but before using KDB, so
+ * check iapc_boot_arch on each entry to KDB.
+ */
+static void
+kdb_check_i8042(void)
+{
+ KDB_FLAG_CLEAR(NO_I8042);
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.header.revision >= 3 &&
+ (acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) == 0)
+ KDB_FLAG_SET(NO_I8042);
+#endif /* CONFIG_ACPI */
+}
+
+/*
+ * kdb
+ *
+ * This function is the entry point for the kernel debugger. It
+ * provides a command parser and associated support functions to
+ * allow examination and control of an active kernel.
+ *
+ * The breakpoint trap code should invoke this function with
+ * one of KDB_REASON_BREAK (int 03) or KDB_REASON_DEBUG (debug register)
+ *
+ * the die_if_kernel function should invoke this function with
+ * KDB_REASON_OOPS.
+ *
+ * In single step mode, one cpu is released to run without
+ * breakpoints. Interrupts and NMI are reset to their original values,
+ * the cpu is allowed to do one instruction which causes a trap
+ * into kdb with KDB_REASON_DEBUG.
+ *
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint. If reason
+ * is SILENT or CPU_UP then regs is NULL, otherwise it
+ * should always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * Locking:
+ * none
+ * Remarks:
+ * No assumptions of system state. This function may be invoked
+ * with arbitrary locks held. It will stop all other processors
+ * in an SMP environment, disable all interrupts and does not use
+ * the operating systems keyboard driver.
+ *
+ * This code is reentrant but only for cpu switch. Any other
+ * reentrancy is an error, although kdb will attempt to recover.
+ *
+ * At the start of a kdb session the initial processor is running
+ * kdb() and the other processors can be doing anything. When the
+ * initial processor calls smp_kdb_stop() the other processors are
+ * driven through kdb_ipi which calls kdb() with reason SWITCH.
+ * That brings all processors into this routine, one with a "real"
+ * reason code, the other with SWITCH.
+ *
+ * Because the other processors are driven via smp_kdb_stop(),
+ * they enter here from the NMI handler. Until the other
+ * processors exit from here and exit from kdb_ipi, they will not
+ * take any more NMI requests. The initial cpu will still take NMI.
+ *
+ * Multiple race and reentrancy conditions, each with different
+ * advoidance mechanisms.
+ *
+ * Two cpus hit debug points at the same time.
+ *
+ * kdb_lock and kdb_initial_cpu ensure that only one cpu gets
+ * control of kdb. The others spin on kdb_initial_cpu until
+ * they are driven through NMI into kdb_ipi. When the initial
+ * cpu releases the others from NMI, they resume trying to get
+ * kdb_initial_cpu to start a new event.
+ *
+ * A cpu is released from kdb and starts a new event before the
+ * original event has completely ended.
+ *
+ * kdb_previous_event() prevents any cpu from entering
+ * kdb_initial_cpu state until the previous event has completely
+ * ended on all cpus.
+ *
+ * An exception occurs inside kdb.
+ *
+ * kdb_initial_cpu detects recursive entry to kdb and attempts
+ * to recover. The recovery uses longjmp() which means that
+ * recursive calls to kdb never return. Beware of assumptions
+ * like
+ *
+ * ++depth;
+ * kdb();
+ * --depth;
+ *
+ * If the kdb call is recursive then longjmp takes over and
+ * --depth is never executed.
+ *
+ * NMI handling.
+ *
+ * NMI handling is tricky. The initial cpu is invoked by some kdb event,
+ * this event could be NMI driven but usually is not. The other cpus are
+ * driven into kdb() via kdb_ipi which uses NMI so at the start the other
+ * cpus will not accept NMI. Some operations such as SS release one cpu
+ * but hold all the others. Releasing a cpu means it drops back to
+ * whatever it was doing before the kdb event, this means it drops out of
+ * kdb_ipi and hence out of NMI status. But the software watchdog uses
+ * NMI and we do not want spurious watchdog calls into kdb. kdba_read()
+ * resets the watchdog counters in its input polling loop, when a kdb
+ * command is running it is subject to NMI watchdog events.
+ *
+ * Another problem with NMI handling is the NMI used to drive the other
+ * cpus into kdb cannot be distinguished from the watchdog NMI. State
+ * flag WAIT_IPI indicates that a cpu is waiting for NMI via kdb_ipi,
+ * if not set then software NMI is ignored by kdb_ipi.
+ *
+ * Cpu switching.
+ *
+ * All cpus are in kdb (or they should be), all but one are
+ * spinning on KDB_STATE(HOLD_CPU). Only one cpu is not in
+ * HOLD_CPU state, only that cpu can handle commands.
+ *
+ * Go command entered.
+ *
+ * If necessary, go will switch to the initial cpu first. If the event
+ * was caused by a software breakpoint (assumed to be global) that
+ * requires single-step to get over the breakpoint then only release the
+ * initial cpu, after the initial cpu has single-stepped the breakpoint
+ * then release the rest of the cpus. If SSBPT is not required then
+ * release all the cpus at once.
+ */
+
+int
+kdb(kdb_reason_t reason, int error, struct pt_regs *regs)
+{
+ kdb_intstate_t int_state; /* Interrupt state */
+ kdb_reason_t reason2 = reason;
+ int result = 0; /* Default is kdb did not handle it */
+ int ss_event, old_regs_saved = 0;
+ struct pt_regs *old_regs = NULL;
+ kdb_dbtrap_t db_result=KDB_DB_NOBPT;
+ preempt_disable();
+ atomic_inc(&kdb_event);
+
+ switch(reason) {
+ case KDB_REASON_OOPS:
+ case KDB_REASON_NMI:
+ KDB_FLAG_SET(CATASTROPHIC); /* kernel state is dubious now */
+ break;
+ default:
+ break;
+ }
+ switch(reason) {
+ case KDB_REASON_ENTER:
+ case KDB_REASON_ENTER_SLAVE:
+ case KDB_REASON_BREAK:
+ case KDB_REASON_DEBUG:
+ case KDB_REASON_OOPS:
+ case KDB_REASON_SWITCH:
+ case KDB_REASON_KEYBOARD:
+ case KDB_REASON_NMI:
+ if (regs && regs != get_irq_regs()) {
+ old_regs = set_irq_regs(regs);
+ old_regs_saved = 1;
+ }
+ break;
+ default:
+ break;
+ }
+ if (kdb_continue_catastrophic > 2) {
+ kdb_printf("kdb_continue_catastrophic is out of range, setting to 2\n");
+ kdb_continue_catastrophic = 2;
+ }
+ if (!kdb_on && KDB_FLAG(CATASTROPHIC) && kdb_continue_catastrophic == 2) {
+ KDB_FLAG_SET(ONLY_DO_DUMP);
+ }
+ if (!kdb_on && !KDB_FLAG(ONLY_DO_DUMP))
+ goto out;
+
+ KDB_DEBUG_STATE("kdb 1", reason);
+ KDB_STATE_CLEAR(SUPPRESS);
+
+ /* Filter out userspace breakpoints first, no point in doing all
+ * the kdb smp fiddling when it is really a gdb trap.
+ * Save the single step status first, kdba_db_trap clears ss status.
+ * kdba_b[dp]_trap sets SSBPT if required.
+ */
+ ss_event = KDB_STATE(DOING_SS) || KDB_STATE(SSBPT);
+#ifdef CONFIG_CPU_XSCALE
+ if ( KDB_STATE(A_XSC_ICH) ) {
+ /* restore changed I_BIT */
+ KDB_STATE_CLEAR(A_XSC_ICH);
+ kdba_restore_retirq(regs, KDB_STATE(A_XSC_IRQ));
+ if ( !ss_event ) {
+ kdb_printf("Stranger!!! Why IRQ bit is changed====\n");
+ }
+ }
+#endif
+ if (reason == KDB_REASON_BREAK) {
+ db_result = kdba_bp_trap(regs, error); /* Only call this once */
+ }
+ if (reason == KDB_REASON_DEBUG) {
+ db_result = kdba_db_trap(regs, error); /* Only call this once */
+ }
+
+ if ((reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
+ && db_result == KDB_DB_NOBPT) {
+ KDB_DEBUG_STATE("kdb 2", reason);
+ goto out; /* Not one of mine */
+ }
+
+ /* Turn off single step if it was being used */
+ if (ss_event) {
+ kdba_clearsinglestep(regs);
+ /* Single step after a breakpoint removes the need for a delayed reinstall */
+ if (reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
+ KDB_STATE_CLEAR(SSBPT);
+ }
+
+ /* kdb can validly reenter but only for certain well defined conditions */
+ if (reason == KDB_REASON_DEBUG
+ && !KDB_STATE(HOLD_CPU)
+ && ss_event)
+ KDB_STATE_SET(REENTRY);
+ else
+ KDB_STATE_CLEAR(REENTRY);
+
+ /* Wait for previous kdb event to completely exit before starting
+ * a new event.
+ */
+ while (kdb_previous_event())
+ ;
+ KDB_DEBUG_STATE("kdb 3", reason);
+
+ /*
+ * If kdb is already active, print a message and try to recover.
+ * If recovery is not possible and recursion is allowed or
+ * forced recursion without recovery is set then try to recurse
+ * in kdb. Not guaranteed to work but it makes an attempt at
+ * debugging the debugger.
+ */
+ if (reason != KDB_REASON_SWITCH &&
+ reason != KDB_REASON_ENTER_SLAVE) {
+ if (KDB_IS_RUNNING() && !KDB_STATE(REENTRY)) {
+ int recover = 1;
+ unsigned long recurse = 0;
+ kdb_printf("kdb: Debugger re-entered on cpu %d, new reason = %d\n",
+ smp_processor_id(), reason);
+ /* Should only re-enter from released cpu */
+
+ if (KDB_STATE(HOLD_CPU)) {
+ kdb_printf(" Strange, cpu %d should not be running\n", smp_processor_id());
+ recover = 0;
+ }
+ if (!KDB_STATE(CMD)) {
+ kdb_printf(" Not executing a kdb command\n");
+ recover = 0;
+ }
+ if (!KDB_STATE(LONGJMP)) {
+ kdb_printf(" No longjmp available for recovery\n");
+ recover = 0;
+ }
+ kdbgetulenv("RECURSE", &recurse);
+ if (recurse > 1) {
+ kdb_printf(" Forced recursion is set\n");
+ recover = 0;
+ }
+ if (recover) {
+ kdb_printf(" Attempting to abort command and recover\n");
+#ifdef kdba_setjmp
+ kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 0);
+#endif /* kdba_setjmp */
+ }
+ if (recurse) {
+ if (KDB_STATE(RECURSE)) {
+ kdb_printf(" Already in recursive mode\n");
+ } else {
+ kdb_printf(" Attempting recursive mode\n");
+ KDB_STATE_SET(RECURSE);
+ KDB_STATE_SET(REENTRY);
+ reason2 = KDB_REASON_RECURSE;
+ recover = 1;
+ }
+ }
+ if (!recover) {
+ kdb_printf(" Cannot recover, allowing event to proceed\n");
+ /*temp*/
+ while (KDB_IS_RUNNING())
+ cpu_relax();
+ goto out;
+ }
+ }
+ } else if (reason == KDB_REASON_SWITCH && !KDB_IS_RUNNING()) {
+ kdb_printf("kdb: CPU switch without kdb running, I'm confused\n");
+ goto out;
+ }
+
+ /*
+ * Disable interrupts, breakpoints etc. on this processor
+ * during kdb command processing
+ */
+ KDB_STATE_SET(KDB);
+ kdba_disableint(&int_state);
+ if (!KDB_STATE(KDB_CONTROL)) {
+ kdb_bp_remove_local();
+ KDB_STATE_SET(KDB_CONTROL);
+ }
+
+ /*
+ * If not entering the debugger due to CPU switch or single step
+ * reentry, serialize access here.
+ * The processors may race getting to this point - if,
+ * for example, more than one processor hits a breakpoint
+ * at the same time. We'll serialize access to kdb here -
+ * other processors will loop here, and the NMI from the stop
+ * IPI will take them into kdb as switch candidates. Once
+ * the initial processor releases the debugger, the rest of
+ * the processors will race for it.
+ *
+ * The above describes the normal state of affairs, where two or more
+ * cpus that are entering kdb at the "same" time are assumed to be for
+ * separate events. However some processes such as ia64 MCA/INIT will
+ * drive all the cpus into error processing at the same time. For that
+ * case, all of the cpus entering kdb at the "same" time are really a
+ * single event.
+ *
+ * That case is handled by the use of KDB_ENTER by one cpu (the
+ * monarch) and KDB_ENTER_SLAVE on the other cpus (the slaves).
+ * KDB_ENTER_SLAVE maps to KDB_REASON_ENTER_SLAVE. The slave events
+ * will be treated as if they had just responded to the kdb IPI, i.e.
+ * as if they were KDB_REASON_SWITCH.
+ *
+ * Because of races across multiple cpus, ENTER_SLAVE can occur before
+ * the main ENTER. Hold up ENTER_SLAVE here until the main ENTER
+ * arrives.
+ */
+
+ if (reason == KDB_REASON_ENTER_SLAVE) {
+ spin_lock(&kdb_lock);
+ while (!KDB_IS_RUNNING()) {
+ spin_unlock(&kdb_lock);
+ while (!KDB_IS_RUNNING())
+ cpu_relax();
+ spin_lock(&kdb_lock);
+ }
+ reason = KDB_REASON_SWITCH;
+ KDB_STATE_SET(HOLD_CPU);
+ spin_unlock(&kdb_lock);
+ }
+
+ if (reason == KDB_REASON_SWITCH || KDB_STATE(REENTRY))
+ ; /* drop through */
+ else {
+ KDB_DEBUG_STATE("kdb 4", reason);
+ spin_lock(&kdb_lock);
+ while (KDB_IS_RUNNING() || kdb_previous_event()) {
+ spin_unlock(&kdb_lock);
+ while (KDB_IS_RUNNING() || kdb_previous_event())
+ cpu_relax();
+ spin_lock(&kdb_lock);
+ }
+ KDB_DEBUG_STATE("kdb 5", reason);
+
+ kdb_initial_cpu = smp_processor_id();
+ ++kdb_seqno;
+ spin_unlock(&kdb_lock);
+ if (!kdb_quiet(reason))
+ notify_die(DIE_KDEBUG_ENTER, "KDEBUG ENTER", regs, error, 0, 0);
+ }
+
+ if (smp_processor_id() == kdb_initial_cpu
+ && !KDB_STATE(REENTRY)) {
+ KDB_STATE_CLEAR(HOLD_CPU);
+ KDB_STATE_CLEAR(WAIT_IPI);
+ kdb_check_i8042();
+ /*
+ * Remove the global breakpoints. This is only done
+ * once from the initial processor on initial entry.
+ */
+ if (!kdb_quiet(reason) || smp_processor_id() == 0)
+ kdb_bp_remove_global();
+
+ /*
+ * If SMP, stop other processors. The other processors
+ * will enter kdb() with KDB_REASON_SWITCH and spin in
+ * kdb_main_loop().
+ */
+ KDB_DEBUG_STATE("kdb 6", reason);
+ if (NR_CPUS > 1 && !kdb_quiet(reason)) {
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (!cpu_online(i))
+ continue;
+ if (i != kdb_initial_cpu) {
+ KDB_STATE_SET_CPU(HOLD_CPU, i);
+ KDB_STATE_SET_CPU(WAIT_IPI, i);
+ }
+ }
+ KDB_DEBUG_STATE("kdb 7", reason);
+ smp_kdb_stop();
+ KDB_DEBUG_STATE("kdb 8", reason);
+ }
+ }
+
+ if (KDB_STATE(GO1)) {
+ kdb_bp_remove_global(); /* They were set for single-step purposes */
+ KDB_STATE_CLEAR(GO1);
+ reason = KDB_REASON_SILENT; /* Now silently go */
+ }
+
+ /* Set up a consistent set of process stacks before talking to the user */
+ KDB_DEBUG_STATE("kdb 9", result);
+ result = kdba_main_loop(reason, reason2, error, db_result, regs);
+ reason = reason2; /* back to original event type */
+
+ KDB_DEBUG_STATE("kdb 10", result);
+ kdba_adjust_ip(reason, error, regs);
+ KDB_STATE_CLEAR(LONGJMP);
+ KDB_DEBUG_STATE("kdb 11", result);
+ /* go which requires single-step over a breakpoint must only release
+ * one cpu.
+ */
+ if (result == KDB_CMD_GO && KDB_STATE(SSBPT))
+ KDB_STATE_SET(GO1);
+
+ if (smp_processor_id() == kdb_initial_cpu &&
+ !KDB_STATE(DOING_SS) &&
+ !KDB_STATE(RECURSE)) {
+ /*
+ * (Re)install the global breakpoints and cleanup the cached
+ * symbol table. This is only done once from the initial
+ * processor on go.
+ */
+ KDB_DEBUG_STATE("kdb 12", reason);
+ if (!kdb_quiet(reason) || smp_processor_id() == 0) {
+ kdb_bp_install_global(regs);
+ kdbnearsym_cleanup();
+ debug_kusage();
+ }
+ if (!KDB_STATE(GO1)) {
+ /*
+ * Release all other cpus which will see KDB_STATE(LEAVING) is set.
+ */
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (KDB_STATE_CPU(KDB, i))
+ KDB_STATE_SET_CPU(LEAVING, i);
+ KDB_STATE_CLEAR_CPU(WAIT_IPI, i);
+ KDB_STATE_CLEAR_CPU(HOLD_CPU, i);
+ }
+ /* Wait until all the other processors leave kdb */
+ while (kdb_previous_event() != 1)
+ ;
+ if (!kdb_quiet(reason))
+ notify_die(DIE_KDEBUG_LEAVE, "KDEBUG LEAVE", regs, error, 0, 0);
+ kdb_initial_cpu = -1; /* release kdb control */
+ KDB_DEBUG_STATE("kdb 13", reason);
+ }
+ }
+
+ KDB_DEBUG_STATE("kdb 14", result);
+ kdba_restoreint(&int_state);
+#ifdef CONFIG_CPU_XSCALE
+ if ( smp_processor_id() == kdb_initial_cpu &&
+ ( KDB_STATE(SSBPT) | KDB_STATE(DOING_SS) )
+ ) {
+ kdba_setsinglestep(regs);
+ // disable IRQ in stack frame
+ KDB_STATE_SET(A_XSC_ICH);
+ if ( kdba_disable_retirq(regs) ) {
+ KDB_STATE_SET(A_XSC_IRQ);
+ }
+ else {
+ KDB_STATE_CLEAR(A_XSC_IRQ);
+ }
+ }
+#endif
+
+ /* Only do this work if we are really leaving kdb */
+ if (!(KDB_STATE(DOING_SS) || KDB_STATE(SSBPT) || KDB_STATE(RECURSE))) {
+ KDB_DEBUG_STATE("kdb 15", result);
+ kdb_bp_install_local(regs);
+ if (old_regs_saved)
+ set_irq_regs(old_regs);
+ KDB_STATE_CLEAR(KDB_CONTROL);
+ }
+
+ KDB_DEBUG_STATE("kdb 16", result);
+ KDB_FLAG_CLEAR(CATASTROPHIC);
+ KDB_STATE_CLEAR(IP_ADJUSTED); /* Re-adjust ip next time in */
+ KDB_STATE_CLEAR(KEYBOARD);
+ KDB_STATE_CLEAR(KDB); /* Main kdb state has been cleared */
+ KDB_STATE_CLEAR(RECURSE);
+ KDB_STATE_CLEAR(LEAVING); /* No more kdb work after this */
+ KDB_DEBUG_STATE("kdb 17", reason);
+out:
+ atomic_dec(&kdb_event);
+ preempt_enable();
+ return result != 0;
+}
+
+/*
+ * kdb_mdr
+ *
+ * This function implements the guts of the 'mdr' command.
+ *
+ * mdr <addr arg>,<byte count>
+ *
+ * Inputs:
+ * addr Start address
+ * count Number of bytes
+ * Outputs:
+ * None.
+ * Returns:
+ * Always 0. Any errors are detected and printed by kdb_getarea.
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_mdr(kdb_machreg_t addr, unsigned int count)
+{
+ unsigned char c;
+ while (count--) {
+ if (kdb_getarea(c, addr))
+ return 0;
+ kdb_printf("%02x", c);
+ addr++;
+ }
+ kdb_printf("\n");
+ return 0;
+}
+
+/*
+ * kdb_md
+ *
+ * This function implements the 'md', 'md1', 'md2', 'md4', 'md8'
+ * 'mdr' and 'mds' commands.
+ *
+ * md|mds [<addr arg> [<line count> [<radix>]]]
+ * mdWcN [<addr arg> [<line count> [<radix>]]]
+ * where W = is the width (1, 2, 4 or 8) and N is the count.
+ * for eg., md1c20 reads 20 bytes, 1 at a time.
+ * mdr <addr arg>,<byte count>
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static void
+kdb_md_line(const char *fmtstr, kdb_machreg_t addr,
+ int symbolic, int nosect, int bytesperword,
+ int num, int repeat, int phys)
+{
+ /* print just one line of data */
+ kdb_symtab_t symtab;
+ char cbuf[32];
+ char *c = cbuf;
+ int i;
+ unsigned long word;
+
+ memset(cbuf, '\0', sizeof(cbuf));
+ if (phys)
+ kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
+ else
+ kdb_printf(kdb_machreg_fmt0 " ", addr);
+
+ for (i = 0; i < num && repeat--; i++) {
+ if (phys) {
+ if (kdb_getphysword(&word, addr, bytesperword))
+ break;
+ } else if (kdb_getword(&word, addr, bytesperword))
+ break;
+ kdb_printf(fmtstr, word);
+ if (symbolic)
+ kdbnearsym(word, &symtab);
+ else
+ memset(&symtab, 0, sizeof(symtab));
+ if (symtab.sym_name) {
+ kdb_symbol_print(word, &symtab, 0);
+ if (!nosect) {
+ kdb_printf("\n");
+ kdb_printf(" %s %s "
+ kdb_machreg_fmt " " kdb_machreg_fmt " " kdb_machreg_fmt,
+ symtab.mod_name,
+ symtab.sec_name,
+ symtab.sec_start,
+ symtab.sym_start,
+ symtab.sym_end);
+ }
+ addr += bytesperword;
+ } else {
+ union {
+ u64 word;
+ unsigned char c[8];
+ } wc;
+ unsigned char *cp;
+#ifdef __BIG_ENDIAN
+ cp = wc.c + 8 - bytesperword;
+#else
+ cp = wc.c;
+#endif
+ wc.word = word;
+#define printable_char(c) ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.';})
+ switch (bytesperword) {
+ case 8:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 4;
+ case 4:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 2;
+ case 2:
+ *c++ = printable_char(*cp++);
+ addr++;
+ case 1:
+ *c++ = printable_char(*cp++);
+ addr++;
+ break;
+ }
+#undef printable_char
+ }
+ }
+ kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1), " ", cbuf);
+}
+
+static int
+kdb_md(int argc, const char **argv)
+{
+ static kdb_machreg_t last_addr;
+ static int last_radix, last_bytesperword, last_repeat;
+ int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
+ int nosect = 0;
+ char fmtchar, fmtstr[64];
+ kdb_machreg_t addr;
+ unsigned long word;
+ long offset = 0;
+ int symbolic = 0;
+ int valid = 0;
+ int phys = 0;
+
+ kdbgetintenv("MDCOUNT", &mdcount);
+ kdbgetintenv("RADIX", &radix);
+ kdbgetintenv("BYTESPERWORD", &bytesperword);
+
+ /* Assume 'md <addr>' and start with environment values */
+ repeat = mdcount * 16 / bytesperword;
+
+ if (strcmp(argv[0], "mdr") == 0) {
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+ valid = 1;
+ } else if (isdigit(argv[0][2])) {
+ bytesperword = (int)(argv[0][2] - '0');
+ if (bytesperword == 0) {
+ bytesperword = last_bytesperword;
+ if (bytesperword == 0) {
+ bytesperword = 4;
+ }
+ }
+ last_bytesperword = bytesperword;
+ repeat = mdcount * 16 / bytesperword;
+ if (!argv[0][3])
+ valid = 1;
+ else if (argv[0][3] == 'c' && argv[0][4]) {
+ char *p;
+ repeat = simple_strtoul(argv[0]+4, &p, 10);
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ valid = !*p;
+ }
+ last_repeat = repeat;
+ } else if (strcmp(argv[0], "md") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mds") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mdp") == 0) {
+ phys = valid = 1;
+ }
+ if (!valid)
+ return KDB_NOTFOUND;
+
+ if (argc == 0) {
+ if (last_addr == 0)
+ return KDB_ARGCOUNT;
+ addr = last_addr;
+ radix = last_radix;
+ bytesperword = last_bytesperword;
+ repeat = last_repeat;
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ }
+
+ if (argc) {
+ kdb_machreg_t val;
+ int diag, nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ if (argc > nextarg+2)
+ return KDB_ARGCOUNT;
+
+ if (argc >= nextarg) {
+ diag = kdbgetularg(argv[nextarg], &val);
+ if (!diag) {
+ mdcount = (int) val;
+ repeat = mdcount * 16 / bytesperword;
+ }
+ }
+ if (argc >= nextarg+1) {
+ diag = kdbgetularg(argv[nextarg+1], &val);
+ if (!diag)
+ radix = (int) val;
+ }
+ }
+
+ if (strcmp(argv[0], "mdr") == 0) {
+ return kdb_mdr(addr, mdcount);
+ }
+
+ switch (radix) {
+ case 10:
+ fmtchar = 'd';
+ break;
+ case 16:
+ fmtchar = 'x';
+ break;
+ case 8:
+ fmtchar = 'o';
+ break;
+ default:
+ return KDB_BADRADIX;
+ }
+
+ last_radix = radix;
+
+ if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+
+ switch (bytesperword) {
+ case 8:
+ sprintf(fmtstr, "%%16.16l%c ", fmtchar);
+ break;
+ case 4:
+ sprintf(fmtstr, "%%8.8l%c ", fmtchar);
+ break;
+ case 2:
+ sprintf(fmtstr, "%%4.4l%c ", fmtchar);
+ break;
+ case 1:
+ sprintf(fmtstr, "%%2.2l%c ", fmtchar);
+ break;
+ default:
+ return KDB_BADWIDTH;
+ }
+
+ last_repeat = repeat;
+ last_bytesperword = bytesperword;
+
+ if (strcmp(argv[0], "mds") == 0) {
+ symbolic = 1;
+ /* Do not save these changes as last_*, they are temporary mds
+ * overrides.
+ */
+ bytesperword = KDB_WORD_SIZE;
+ repeat = mdcount;
+ kdbgetintenv("NOSECT", &nosect);
+ }
+
+ /* Round address down modulo BYTESPERWORD */
+
+ addr &= ~(bytesperword-1);
+
+ while (repeat > 0) {
+ unsigned long a;
+ int n, z, num = (symbolic ? 1 : (16 / bytesperword));
+
+ for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
+ if (phys) {
+ if (kdb_getphysword(&word, a, bytesperword)
+ || word)
+ break;
+ } else if (kdb_getword(&word, a, bytesperword) || word)
+ break;
+ }
+ n = min(num, repeat);
+ kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword, num, repeat, phys);
+ addr += bytesperword * n;
+ repeat -= n;
+ z = (z + num - 1) / num;
+ if (z > 2) {
+ int s = num * (z-2);
+ kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0 " zero suppressed\n",
+ addr, addr + bytesperword * s - 1);
+ addr += bytesperword * s;
+ repeat -= s;
+ }
+ }
+ last_addr = addr;
+
+ return 0;
+}
+
+/*
+ * kdb_mm
+ *
+ * This function implements the 'mm' command.
+ *
+ * mm address-expression new-value
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * mm works on machine words, mmW works on bytes.
+ */
+
+static int
+kdb_mm(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ unsigned long contents;
+ int nextarg;
+ int width;
+
+ if (argv[0][2] && !isdigit(argv[0][2]))
+ return KDB_NOTFOUND;
+
+ if (argc < 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ return diag;
+
+ if (nextarg > argc)
+ return KDB_ARGCOUNT;
+
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL)))
+ return diag;
+
+ if (nextarg != argc + 1)
+ return KDB_ARGCOUNT;
+
+ width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
+ if ((diag = kdb_putword(addr, contents, width)))
+ return diag;
+
+ kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
+
+ return 0;
+}
+
+/*
+ * kdb_go
+ *
+ * This function implements the 'go' command.
+ *
+ * go [address-expression]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * KDB_CMD_GO for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_go(int argc, const char **argv)
+{
+ kdb_machreg_t addr;
+ int diag;
+ int nextarg;
+ long offset;
+ struct pt_regs *regs = get_irq_regs();
+
+ if (argc == 1) {
+ if (smp_processor_id() != kdb_initial_cpu) {
+ kdb_printf("go <address> must be issued from the initial cpu, do cpu %d first\n", kdb_initial_cpu);
+ return KDB_ARGCOUNT;
+ }
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg,
+ &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ kdba_setpc(regs, addr);
+ } else if (argc)
+ return KDB_ARGCOUNT;
+
+ diag = KDB_CMD_GO;
+ if (KDB_FLAG(CATASTROPHIC)) {
+ kdb_printf("Catastrophic error detected\n");
+ kdb_printf("kdb_continue_catastrophic=%d, ",
+ kdb_continue_catastrophic);
+ if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
+ kdb_printf("type go a second time if you really want to continue\n");
+ return 0;
+ }
+ if (kdb_continue_catastrophic == 2) {
+ kdb_do_dump();
+ kdb_printf("forcing reboot\n");
+ kdb_reboot(0, NULL);
+ }
+ kdb_printf("attempting to continue\n");
+ }
+ if (smp_processor_id() != kdb_initial_cpu) {
+ char buf[80];
+ kdb_printf("go was not issued from initial cpu, switching back to cpu %d\n", kdb_initial_cpu);
+ sprintf(buf, "cpu %d\n", kdb_initial_cpu);
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+ diag = kdb_parse(buf);
+ if (diag == KDB_CMD_CPU)
+ KDB_STATE_SET_CPU(GO_SWITCH, kdb_initial_cpu);
+ }
+ return diag;
+}
+
+/*
+ * kdb_rd
+ *
+ * This function implements the 'rd' command.
+ *
+ * rd display all general registers.
+ * rd c display all control registers.
+ * rd d display all debug registers.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_rd(int argc, const char **argv)
+{
+ int diag;
+ if (argc == 0) {
+ if ((diag = kdb_check_regs()))
+ return diag;
+ return kdba_dumpregs(kdb_current_regs, NULL, NULL);
+ }
+
+ if (argc > 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdb_check_regs()))
+ return diag;
+ return kdba_dumpregs(kdb_current_regs, argv[1], argc==2 ? argv[2]: NULL);
+}
+
+/*
+ * kdb_rm
+ *
+ * This function implements the 'rm' (register modify) command.
+ *
+ * rm register-name new-contents
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Currently doesn't allow modification of control or
+ * debug registers.
+ */
+
+static int
+kdb_rm(int argc, const char **argv)
+{
+ int diag;
+ int ind = 0;
+ kdb_machreg_t contents;
+
+ if (argc != 2) {
+ return KDB_ARGCOUNT;
+ }
+
+ /*
+ * Allow presence or absence of leading '%' symbol.
+ */
+
+ if (argv[1][0] == '%')
+ ind = 1;
+
+ diag = kdbgetularg(argv[2], &contents);
+ if (diag)
+ return diag;
+
+ if ((diag = kdb_check_regs()))
+ return diag;
+ diag = kdba_setregcontents(&argv[1][ind], kdb_current_regs, contents);
+ if (diag)
+ return diag;
+
+ return 0;
+}
+
+#if defined(CONFIG_MAGIC_SYSRQ)
+/*
+ * kdb_sr
+ *
+ * This function implements the 'sr' (SYSRQ key) command which
+ * interfaces to the soi-disant MAGIC SYSRQ functionality.
+ *
+ * sr <magic-sysrq-code>
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * None.
+ */
+static int
+kdb_sr(int argc, const char **argv)
+{
+ extern int __sysrq_enabled;
+ if (argc != 1) {
+ return KDB_ARGCOUNT;
+ }
+ if (!__sysrq_enabled) {
+ kdb_printf("Auto activating sysrq\n");
+ __sysrq_enabled = 1;
+ }
+
+ handle_sysrq(*argv[1], NULL);
+
+ return 0;
+}
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+/*
+ * kdb_ef
+ *
+ * This function implements the 'regs' (display exception frame)
+ * command. This command takes an address and expects to find
+ * an exception frame at that address, formats and prints it.
+ *
+ * regs address-expression
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Not done yet.
+ */
+
+static int
+kdb_ef(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset;
+ int nextarg;
+
+ if (argc == 1) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ return kdba_dumpregs((struct pt_regs *)addr, NULL, NULL);
+ }
+
+ return KDB_ARGCOUNT;
+}
+
+#if defined(CONFIG_MODULES)
+extern struct list_head *kdb_modules;
+extern void free_module(struct module *);
+
+/* modules using other modules */
+struct module_use
+{
+ struct list_head list;
+ struct module *module_which_uses;
+};
+
+/*
+ * kdb_lsmod
+ *
+ * This function implements the 'lsmod' command. Lists currently
+ * loaded kernel modules.
+ *
+ * Mostly taken from userland lsmod.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+static int
+kdb_lsmod(int argc, const char **argv)
+{
+ struct module *mod;
+
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("Module Size modstruct Used by\n");
+ list_for_each_entry(mod, kdb_modules, list) {
+
+ kdb_printf("%-20s%8u 0x%p ", mod->name,
+ mod->core_size, (void *)mod);
+#ifdef CONFIG_MODULE_UNLOAD
+ kdb_printf("%4d ", module_refcount(mod));
+#endif
+ if (mod->state == MODULE_STATE_GOING)
+ kdb_printf(" (Unloading)");
+ else if (mod->state == MODULE_STATE_COMING)
+ kdb_printf(" (Loading)");
+ else
+ kdb_printf(" (Live)");
+
+#ifdef CONFIG_MODULE_UNLOAD
+ {
+ struct module_use *use;
+ kdb_printf(" [ ");
+ list_for_each_entry(use, &mod->modules_which_use_me, list)
+ kdb_printf("%s ", use->module_which_uses->name);
+ kdb_printf("]\n");
+ }
+#endif
+ }
+
+ return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
+/*
+ * kdb_env
+ *
+ * This function implements the 'env' command. Display the current
+ * environment variables.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_env(int argc, const char **argv)
+{
+ int i;
+
+ for(i=0; i<__nenv; i++) {
+ if (__env[i]) {
+ kdb_printf("%s\n", __env[i]);
+ }
+ }
+
+ if (KDB_DEBUG(MASK))
+ kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
+
+ return 0;
+}
+
+/*
+ * kdb_dmesg
+ *
+ * This function implements the 'dmesg' command to display the contents
+ * of the syslog buffer.
+ *
+ * dmesg [lines] [adjust]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * None.
+ */
+
+static int
+kdb_dmesg(int argc, const char **argv)
+{
+ char *syslog_data[4], *start, *end, c = '\0', *p;
+ int diag, logging, logsize, lines = 0, adjust = 0, n;
+
+ if (argc > 2)
+ return KDB_ARGCOUNT;
+ if (argc) {
+ char *cp;
+ lines = simple_strtol(argv[1], &cp, 0);
+ if (*cp)
+ lines = 0;
+ if (argc > 1) {
+ adjust = simple_strtoul(argv[2], &cp, 0);
+ if (*cp || adjust < 0)
+ adjust = 0;
+ }
+ }
+
+ /* disable LOGGING if set */
+ diag = kdbgetintenv("LOGGING", &logging);
+ if (!diag && logging) {
+ const char *setargs[] = { "set", "LOGGING", "0" };
+ kdb_set(2, setargs);
+ }
+
+ /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] logical start, end+1. */
+ debugger_syslog_data(syslog_data);
+ if (syslog_data[2] == syslog_data[3])
+ return 0;
+ logsize = syslog_data[1] - syslog_data[0];
+ start = syslog_data[2];
+ end = syslog_data[3];
+#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
+ for (n = 0, p = start; p < end; ++p) {
+ if ((c = *KDB_WRAP(p)) == '\n')
+ ++n;
+ }
+ if (c != '\n')
+ ++n;
+ if (lines < 0) {
+ if (adjust >= n)
+ kdb_printf("buffer only contains %d lines, nothing printed\n", n);
+ else if (adjust - lines >= n)
+ kdb_printf("buffer only contains %d lines, last %d lines printed\n",
+ n, n - adjust);
+ if (adjust) {
+ for (; start < end && adjust; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --adjust;
+ }
+ if (start < end)
+ ++start;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ ++lines;
+ }
+ end = p;
+ } else if (lines > 0) {
+ int skip = n - (adjust + lines);
+ if (adjust >= n) {
+ kdb_printf("buffer only contains %d lines, nothing printed\n", n);
+ skip = n;
+ } else if (skip < 0) {
+ lines += skip;
+ skip = 0;
+ kdb_printf("buffer only contains %d lines, first %d lines printed\n",
+ n, lines);
+ }
+ for (; start < end && skip; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --skip;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ --lines;
+ }
+ end = p;
+ }
+ /* Do a line at a time (max 200 chars) to reduce protocol overhead */
+ c = '\n';
+ while (start != end) {
+ char buf[201];
+ p = buf;
+ while (start < end && (c = *KDB_WRAP(start)) && (p - buf) < sizeof(buf)-1) {
+ ++start;
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+ *p = '\0';
+ kdb_printf("%s", buf);
+ }
+ if (c != '\n')
+ kdb_printf("\n");
+
+ return 0;
+}
+
+/*
+ * kdb_cpu
+ *
+ * This function implements the 'cpu' command.
+ *
+ * cpu [<cpunum>]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * KDB_CMD_CPU for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * All cpu's should be spinning in kdb(). However just in case
+ * a cpu did not take the smp_kdb_stop NMI, check that a cpu
+ * entered kdb() before passing control to it.
+ */
+
+static void
+kdb_cpu_status(void)
+{
+ int i, start_cpu, first_print = 1;
+ char state, prev_state = '?';
+
+ kdb_printf("Currently on cpu %d\n", smp_processor_id());
+ kdb_printf("Available cpus: ");
+ for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ state = 'F'; /* cpu is offline */
+ else {
+ struct kdb_running_process *krp = kdb_running_process+i;
+ if (KDB_STATE_CPU(KDB, i)) {
+ state = ' '; /* cpu is responding to kdb */
+ if (kdb_task_state_char(krp->p) == 'I')
+ state = 'I'; /* running the idle task */
+ } else if (krp->seqno && krp->p && krp->seqno >= kdb_seqno - 1)
+ state = '+'; /* some kdb data, but not responding */
+ else
+ state = '*'; /* no kdb data */
+ }
+ if (state != prev_state) {
+ if (prev_state != '?') {
+ if (!first_print)
+ kdb_printf(", ");
+ first_print = 0;
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ prev_state = state;
+ start_cpu = i;
+ }
+ }
+ /* print the trailing cpus, ignoring them if they are all offline */
+ if (prev_state != 'F') {
+ if (!first_print)
+ kdb_printf(", ");
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ kdb_printf("\n");
+}
+
+static int
+kdb_cpu(int argc, const char **argv)
+{
+ unsigned long cpunum;
+ int diag, i;
+
+ /* ask the other cpus if they are still active */
+ for (i=0; i<NR_CPUS; i++) {
+ if (cpu_online(i))
+ KDB_STATE_CLEAR_CPU(KDB, i);
+ }
+ KDB_STATE_SET(KDB);
+ barrier();
+ /* wait for the other cpus to notice and set state KDB again,
+ * see kdb_main_loop
+ */
+ udelay(1000);
+
+ if (argc == 0) {
+ kdb_cpu_status();
+ return 0;
+ }
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ diag = kdbgetularg(argv[1], &cpunum);
+ if (diag)
+ return diag;
+
+ /*
+ * Validate cpunum
+ */
+ if ((cpunum > NR_CPUS)
+ || !cpu_online(cpunum)
+ || !KDB_STATE_CPU(KDB, cpunum))
+ return KDB_BADCPUNUM;
+
+ kdb_new_cpu = cpunum;
+
+ /*
+ * Switch to other cpu
+ */
+ return KDB_CMD_CPU;
+}
+
+/* The user may not realize that ps/bta with no parameters does not print idle
+ * or sleeping system daemon processes, so tell them how many were suppressed.
+ */
+void
+kdb_ps_suppressed(void)
+{
+ int idle = 0, daemon = 0;
+ unsigned long mask_I = kdb_task_state_string("I"),
+ mask_M = kdb_task_state_string("M");
+ unsigned long cpu;
+ const struct task_struct *p, *g;
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_online(cpu))
+ continue;
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask_I))
+ ++idle;
+ }
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_state(p, mask_M))
+ ++daemon;
+ } kdb_while_each_thread(g, p);
+ if (idle || daemon) {
+ if (idle)
+ kdb_printf("%d idle process%s (state I)%s\n",
+ idle, idle == 1 ? "" : "es",
+ daemon ? " and " : "");
+ if (daemon)
+ kdb_printf("%d sleeping system daemon (state M) process%s",
+ daemon, daemon == 1 ? "" : "es");
+ kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
+ }
+}
+
+/*
+ * kdb_ps
+ *
+ * This function implements the 'ps' command which shows
+ * a list of the active processes.
+ *
+ * ps [DRSTCZEUIMA] All processes, optionally filtered by state
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+void
+kdb_ps1(const struct task_struct *p)
+{
+ struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
+ kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n",
+ (void *)p, p->pid, p->parent->pid,
+ kdb_task_has_cpu(p), kdb_process_cpu(p),
+ kdb_task_state_char(p),
+ (void *)(&p->thread),
+ p == kdb_curr_task(smp_processor_id()) ? '*': ' ',
+ p->comm);
+ if (kdb_task_has_cpu(p)) {
+ if (!krp->seqno || !krp->p)
+ kdb_printf(" Error: no saved data for this cpu\n");
+ else {
+ if (krp->seqno < kdb_seqno - 1)
+ kdb_printf(" Warning: process state is stale\n");
+ if (krp->p != p)
+ kdb_printf(" Error: does not match running process table (0x%p)\n", krp->p);
+ }
+ }
+}
+
+static int
+kdb_ps(int argc, const char **argv)
+{
+ struct task_struct *g, *p;
+ unsigned long mask, cpu;
+
+ if (argc == 0)
+ kdb_ps_suppressed();
+ kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n",
+ (int)(2*sizeof(void *))+2, "Task Addr",
+ (int)(2*sizeof(void *))+2, "Thread");
+ mask = kdb_task_state_string(argc ? argv[1] : NULL);
+ /* Run the active tasks first */
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_online(cpu))
+ continue;
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ }
+ kdb_printf("\n");
+ /* Now the real tasks */
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ } kdb_while_each_thread(g, p);
+
+ return 0;
+}
+
+/*
+ * kdb_pid
+ *
+ * This function implements the 'pid' command which switches
+ * the currently active process.
+ *
+ * pid [<pid> | R]
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+
+static int
+kdb_pid(int argc, const char **argv)
+{
+ struct task_struct *p;
+ unsigned long val;
+ int diag;
+
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+
+ if (argc) {
+ if (strcmp(argv[1], "R") == 0) {
+ p = KDB_RUNNING_PROCESS_ORIGINAL[kdb_initial_cpu].p;
+ } else {
+ diag = kdbgetularg(argv[1], &val);
+ if (diag)
+ return KDB_BADINT;
+
+ p = find_task_by_pid_ns((pid_t)val, &init_pid_ns);
+ if (!p) {
+ kdb_printf("No task with pid=%d\n", (pid_t)val);
+ return 0;
+ }
+ }
+
+ kdba_set_current_task(p);
+ }
+
+ kdb_printf("KDB current process is %s(pid=%d)\n", kdb_current_task->comm,
+ kdb_current_task->pid);
+
+ return 0;
+}
+
+/*
+ * kdb_ll
+ *
+ * This function implements the 'll' command which follows a linked
+ * list and executes an arbitrary command for each element.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_ll(int argc, const char **argv)
+{
+ int diag;
+ kdb_machreg_t addr;
+ long offset = 0;
+ kdb_machreg_t va;
+ unsigned long linkoffset;
+ int nextarg;
+ const char *command;
+
+ if (argc != 3) {
+ return KDB_ARGCOUNT;
+ }
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ diag = kdbgetularg(argv[2], &linkoffset);
+ if (diag)
+ return diag;
+
+ /*
+ * Using the starting address as
+ * the first element in the list, and assuming that
+ * the list ends with a null pointer.
+ */
+
+ va = addr;
+ if (!(command = kdb_strdup(argv[3], GFP_KDB))) {
+ kdb_printf("%s: cannot duplicate command\n", __FUNCTION__);
+ return 0;
+ }
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+
+ while (va) {
+ char buf[80];
+
+ sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
+ diag = kdb_parse(buf);
+ if (diag)
+ return diag;
+
+ addr = va + linkoffset;
+ if (kdb_getword(&va, addr, sizeof(va)))
+ return 0;
+ }
+ kfree(command);
+
+ return 0;
+}
+
+/*
+ * kdb_help
+ *
+ * This function implements the 'help' and '?' commands.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_help(int argc, const char **argv)
+{
+ kdbtab_t *kt;
+ int i;
+
+ kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
+ kdb_printf("----------------------------------------------------------\n");
+ for(i=0, kt=kdb_commands; i<kdb_max_commands; i++, kt++) {
+ if (kt->cmd_name)
+ kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
+ kt->cmd_usage, kt->cmd_help);
+ }
+ return 0;
+}
+
+extern int kdb_wake_up_process(struct task_struct * p);
+
+/*
+ * kdb_kill
+ *
+ * This function implements the 'kill' commands.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_kill(int argc, const char **argv)
+{
+ long sig, pid;
+ char *endp;
+ struct task_struct *p;
+ struct siginfo info;
+
+ if (argc!=2)
+ return KDB_ARGCOUNT;
+
+ sig = simple_strtol(argv[1], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (sig >= 0 ) {
+ kdb_printf("Invalid signal parameter.<-signal>\n");
+ return 0;
+ }
+ sig=-sig;
+
+ pid = simple_strtol(argv[2], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (pid <=0 ) {
+ kdb_printf("Process ID must be large than 0.\n");
+ return 0;
+ }
+
+ /* Find the process. */
+ if (!(p = find_task_by_pid_ns(pid, &init_pid_ns))) {
+ kdb_printf("The specified process isn't found.\n");
+ return 0;
+ }
+ p = p->group_leader;
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = pid; /* use same capabilities as process being signalled */
+ info.si_uid = 0; /* kdb has root authority */
+ kdb_send_sig_info(p, &info, kdb_seqno);
+ return 0;
+}
+
+struct kdb_tm {
+ int tm_sec; /* seconds */
+ int tm_min; /* minutes */
+ int tm_hour; /* hours */
+ int tm_mday; /* day of the month */
+ int tm_mon; /* month */
+ int tm_year; /* year */
+};
+
+static void
+kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
+{
+ /* This will work from 1970-2099, 2100 is not a leap year */
+ static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+ memset(tm, 0, sizeof(*tm));
+ tm->tm_sec = tv->tv_sec % (24 * 60 * 60);
+ tm->tm_mday = tv->tv_sec / (24 * 60 * 60) + (2 * 365 + 1); /* shift base from 1970 to 1968 */
+ tm->tm_min = tm->tm_sec / 60 % 60;
+ tm->tm_hour = tm->tm_sec / 60 / 60;
+ tm->tm_sec = tm->tm_sec % 60;
+ tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
+ tm->tm_mday %= (4*365+1);
+ mon_day[1] = 29;
+ while (tm->tm_mday >= mon_day[tm->tm_mon]) {
+ tm->tm_mday -= mon_day[tm->tm_mon];
+ if (++tm->tm_mon == 12) {
+ tm->tm_mon = 0;
+ ++tm->tm_year;
+ mon_day[1] = 28;
+ }
+ }
+ ++tm->tm_mday;
+}
+
+/*
+ * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
+ * I cannot call that code directly from kdb, it has an unconditional
+ * cli()/sti() and calls routines that take locks which can stop the debugger.
+ */
+
+static void
+kdb_sysinfo(struct sysinfo *val)
+{
+ struct timespec uptime;
+ do_posix_clock_monotonic_gettime(&uptime);
+ memset(val, 0, sizeof(*val));
+ val->uptime = uptime.tv_sec;
+ val->loads[0] = avenrun[0];
+ val->loads[1] = avenrun[1];
+ val->loads[2] = avenrun[2];
+ val->procs = nr_threads-1;
+ si_meminfo(val);
+ kdb_si_swapinfo(val);
+
+ return;
+}
+
+/*
+ * kdb_summary
+ *
+ * This function implements the 'summary' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_summary(int argc, const char **argv)
+{
+ extern struct timespec xtime;
+ extern struct timezone sys_tz;
+ struct kdb_tm tm;
+ struct sysinfo val;
+
+ if (argc)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("sysname %s\n", init_uts_ns.name.sysname);
+ kdb_printf("release %s\n", init_uts_ns.name.release);
+ kdb_printf("version %s\n", init_uts_ns.name.version);
+ kdb_printf("machine %s\n", init_uts_ns.name.machine);
+ kdb_printf("nodename %s\n", init_uts_ns.name.nodename);
+ kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
+ kdb_printf("ccversion %s\n", __stringify(CCVERSION));
+
+ kdb_gmtime(&xtime, &tm);
+ kdb_printf("date %04d-%02d-%02d %02d:%02d:%02d tz_minuteswest %d\n",
+ 1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ sys_tz.tz_minuteswest);
+
+ kdb_sysinfo(&val);
+ kdb_printf("uptime ");
+ if (val.uptime > (24*60*60)) {
+ int days = val.uptime / (24*60*60);
+ val.uptime %= (24*60*60);
+ kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
+ }
+ kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
+
+ /* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+ kdb_printf("load avg %ld.%02ld %ld.%02ld %ld.%02ld\n",
+ LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
+ LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
+ LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
+ kdb_printf("\n");
+#undef LOAD_INT
+#undef LOAD_FRAC
+
+ kdb_meminfo_proc_show(); /* in fs/proc/meminfo.c */
+
+ return 0;
+}
+
+/*
+ * kdb_per_cpu
+ *
+ * This function implements the 'per_cpu' command.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ */
+
+static int
+kdb_per_cpu(int argc, const char **argv)
+{
+ char buf[256], fmtstr[64];
+ kdb_symtab_t symtab;
+ cpumask_t suppress;
+ int cpu, diag;
+ unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
+
+ if (argc < 1 || argc > 3)
+ return KDB_ARGCOUNT;
+
+ cpus_clear(suppress);
+ snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
+ if (!kdbgetsymval(buf, &symtab)) {
+ kdb_printf("%s is not a per_cpu variable\n", argv[1]);
+ return KDB_BADADDR;
+ }
+ if (argc >=2 && (diag = kdbgetularg(argv[2], &bytesperword)))
+ return diag;
+ if (!bytesperword)
+ bytesperword = KDB_WORD_SIZE;
+ else if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+ sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
+ if (argc >= 3) {
+ if ((diag = kdbgetularg(argv[3], &whichcpu)))
+ return diag;
+ if (!cpu_online(whichcpu)) {
+ kdb_printf("cpu %ld is not online\n", whichcpu);
+ return KDB_BADCPUNUM;
+ }
+ }
+
+ /* Most architectures use __per_cpu_offset[cpu], some use
+ * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
+ */
+#ifdef __per_cpu_offset
+#define KDB_PCU(cpu) __per_cpu_offset(cpu)
+#else
+#ifdef CONFIG_SMP
+#define KDB_PCU(cpu) __per_cpu_offset[cpu]
+#else
+#define KDB_PCU(cpu) 0
+#endif
+#endif
+
+ for_each_online_cpu(cpu) {
+ if (whichcpu != ~0UL && whichcpu != cpu)
+ continue;
+ addr = symtab.sym_start + KDB_PCU(cpu);
+ if ((diag = kdb_getword(&val, addr, bytesperword))) {
+ kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to read, diag=%d\n",
+ cpu, addr, diag);
+ continue;
+ }
+#ifdef CONFIG_SMP
+ if (!val) {
+ cpu_set(cpu, suppress);
+ continue;
+ }
+#endif /* CONFIG_SMP */
+ kdb_printf("%5d ", cpu);
+ kdb_md_line(fmtstr, addr,
+ bytesperword == KDB_WORD_SIZE,
+ 1, bytesperword, 1, 1, 0);
+ }
+ if (cpus_weight(suppress) == 0)
+ return 0;
+ kdb_printf("Zero suppressed cpu(s):");
+ for_each_cpu_mask(cpu, suppress) {
+ kdb_printf(" %d", cpu);
+ if (cpu == NR_CPUS-1 || next_cpu(cpu, suppress) != cpu + 1)
+ continue;
+ while (cpu < NR_CPUS && next_cpu(cpu, suppress) == cpu + 1)
+ ++cpu;
+ kdb_printf("-%d", cpu);
+ }
+ kdb_printf("\n");
+
+#undef KDB_PCU
+
+ return 0;
+}
+
+/*
+ * display help for the use of cmd | grep pattern
+ */
+static int
+kdb_grep_help(int argc, const char **argv)
+{
+ kdb_printf ("Usage of cmd args | grep pattern:\n");
+ kdb_printf (" Any command's output may be filtered through an ");
+ kdb_printf ("emulated 'pipe'.\n");
+ kdb_printf (" 'grep' is just a key word.\n");
+ kdb_printf
+ (" The pattern may include a very limited set of metacharacters:\n");
+ kdb_printf (" pattern or ^pattern or pattern$ or ^pattern$\n");
+ kdb_printf
+ (" And if there are spaces in the pattern, you may quote it:\n");
+ kdb_printf
+ (" \"pat tern\" or \"^pat tern\" or \"pat tern$\" or \"^pat tern$\"\n");
+ return 0;
+}
+
+/*
+ * kdb_register_repeat
+ *
+ * This function is used to register a kernel debugger command.
+ *
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * repeat Does the command auto repeat on enter?
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one if a duplicate command.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+#define kdb_command_extend 50 /* arbitrary */
+int
+kdb_register_repeat(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen,
+ kdb_repeat_t repeat)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * Brute force method to determine duplicates
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
+ kdb_printf("Duplicate kdb command registered: "
+ "%s, func %p help %s\n", cmd, func, help);
+ return 1;
+ }
+ }
+
+ /*
+ * Insert command into first available location in table
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name == NULL) {
+ break;
+ }
+ }
+
+ if (i >= kdb_max_commands) {
+ kdbtab_t *new = kmalloc((kdb_max_commands + kdb_command_extend) * sizeof(*new), GFP_KDB);
+ if (!new) {
+ kdb_printf("Could not allocate new kdb_command table\n");
+ return 1;
+ }
+ if (kdb_commands) {
+ memcpy(new, kdb_commands, kdb_max_commands * sizeof(*new));
+ kfree(kdb_commands);
+ }
+ memset(new + kdb_max_commands, 0, kdb_command_extend * sizeof(*new));
+ kdb_commands = new;
+ kp = kdb_commands + kdb_max_commands;
+ kdb_max_commands += kdb_command_extend;
+ }
+
+ kp->cmd_name = cmd;
+ kp->cmd_func = func;
+ kp->cmd_usage = usage;
+ kp->cmd_help = help;
+ kp->cmd_flags = 0;
+ kp->cmd_minlen = minlen;
+ kp->cmd_repeat = repeat;
+
+ return 0;
+}
+
+/*
+ * kdb_register
+ *
+ * Compatibility register function for commands that do not need to
+ * specify a repeat state. Equivalent to kdb_register_repeat with
+ * KDB_REPEAT_NONE.
+ *
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one if a duplicate command.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+int
+kdb_register(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen)
+{
+ return kdb_register_repeat(cmd, func, usage, help, minlen, KDB_REPEAT_NONE);
+}
+
+/*
+ * kdb_unregister
+ *
+ * This function is used to unregister a kernel debugger command.
+ * It is generally called when a module which implements kdb
+ * commands is unloaded.
+ *
+ * Inputs:
+ * cmd Command name
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, one command not registered.
+ * Locking:
+ * none.
+ * Remarks:
+ *
+ */
+
+int
+kdb_unregister(char *cmd)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * find the command.
+ */
+ for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
+ kp->cmd_name = NULL;
+ return 0;
+ }
+ }
+
+ /*
+ * Couldn't find it.
+ */
+ return 1;
+}
+
+/*
+ * kdb_inittab
+ *
+ * This function is called by the kdb_init function to initialize
+ * the kdb command table. It must be called prior to any other
+ * call to kdb_register_repeat.
+ *
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ */
+
+static void __init
+kdb_inittab(void)
+{
+ int i;
+ kdbtab_t *kp;
+
+ for(i=0, kp=kdb_commands; i < kdb_max_commands; i++,kp++) {
+ kp->cmd_name = NULL;
+ }
+
+ kdb_register_repeat("md", kdb_md, "<vaddr>", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>", "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>", "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mds", kdb_md, "<vaddr>", "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("id", kdb_id, "<vaddr>", "Display Instructions", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("go", kdb_go, "[<vaddr>]", "Continue Execution", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("rd", kdb_rd, "", "Display Registers", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("rm", kdb_rm, "<reg> <contents>", "Modify Registers", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ef", kdb_ef, "<vaddr>", "Display exception frame", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bt", kdb_bt, "[<vaddr>]", "Stack traceback", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("btp", kdb_bt, "<pid>", "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]", "Display stack all processes", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btc", kdb_bt, "", "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btt", kdb_bt, "<vaddr>", "Backtrace process given its struct task address", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>", "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("env", kdb_env, "", "Show environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("set", kdb_set, "", "Set environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("help", kdb_help, "", "Display Help Message", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("?", kdb_help, "", "Display Help Message", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("cpu", kdb_cpu, "<cpunum>","Switch to new cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ps", kdb_ps, "[<flags>|A]", "Display active task list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pid", kdb_pid, "<pidnum>", "Switch to another task", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("reboot", kdb_reboot, "", "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+#if defined(CONFIG_KDB_KDUMP)
+ kdb_register_repeat("kdump", kdb_kdump, "", "Calls kdump mode", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MODULES)
+ kdb_register_repeat("lsmod", kdb_lsmod, "", "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MAGIC_SYSRQ)
+ kdb_register_repeat("sr", kdb_sr, "<key>", "Magic SysRq key", 0, KDB_REPEAT_NONE);
+#endif
+ kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", "Display syslog buffer", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>", "Send a signal to a process", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("summary", kdb_summary, "", "Summarize the system", 4, KDB_REPEAT_NONE);
+ kdb_register_repeat("per_cpu", kdb_per_cpu, "", "Display per_cpu variables", 3, KDB_REPEAT_NONE);
+ kdb_register_repeat("grephelp", kdb_grep_help, "",
+ "Display help on | grep", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("print", kdb_debuginfo_print, "<expression>",
+ "Type casting, as in lcrash", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("px", kdb_debuginfo_print, "<expression>",
+ "Print in hex (type casting) (see 'pxhelp')", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pxhelp", kdb_pxhelp, "",
+ "Display help for the px command", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pd", kdb_debuginfo_print, "<expression>",
+ "Print in decimal (type casting)", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("whatis", kdb_debuginfo_print,"<type or symbol>",
+ "Display the type, or the address for a symbol", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("sizeof", kdb_debuginfo_print, "<type>",
+ "Display the size of a structure, typedef, etc.", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("walk", kdb_walk, "",
+ "Walk a linked list (see 'walkhelp')", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("walkhelp", kdb_walkhelp, "",
+ "Display help for the walk command", 0, KDB_REPEAT_NONE);
+}
+
+/*
+ * The user has written to our "file"
+ * file: the /proc file
+ * buffer: user address of the data he is writing
+ * count: number of bytes in the user's buffer
+ */
+static int
+kdb_write_proc_filename(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ int ret_count;
+
+ /* our buffer is kdb_debug_info_filename[256] */
+ if (count > 256) {
+ return 0;
+ }
+ if (copy_from_user(kdb_debug_info_filename, buffer, count)) {
+ return 0;
+ }
+ ret_count = count; /* actual count */
+ /* remove any newline from the end of the file name */
+ if (kdb_debug_info_filename[count-1] == '\n') count--;
+ kdb_debug_info_filename[count] = '\0';
+
+ return ret_count;
+}
+
+/*
+ * The user is reading from our "file"
+ * page: the beginning of the user's buffer
+ * start: pointer to the user's pointer (tells him where we put the data)
+ * off: offset into the resource to be read
+ * count: length of the read
+ */
+static int
+kdb_read_proc_filename(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ /* give him kdb_debug_info_filename[]; */
+ return snprintf(page, count, "%s\n", kdb_debug_info_filename);
+}
+
+/*
+ * kdb_proc_filename
+ *
+ * create /proc/kdb/debug_info_name
+ */
+static void
+kdb_proc_filename(void)
+{
+ struct proc_dir_entry *kdb_dir_entry, *kdb_file_entry;
+
+ /* create /proc/kdb */
+ kdb_dir_entry = proc_mkdir("kdb", NULL);
+ if (!kdb_dir_entry) {
+ printk ("kdb could not create /proc/kdb\n");
+ return;
+ }
+
+ /* read/write by owner (root) only */
+ kdb_file_entry = create_proc_entry("debug_info_name",
+ S_IRUSR | S_IWUSR, kdb_dir_entry);
+ if (!kdb_file_entry) {
+ printk ("kdb could not create /proc/kdb/kdb_dir_entry\n");
+ return;
+ }
+ kdb_file_entry->nlink = 1;
+ kdb_file_entry->data = (void *)NULL;
+ kdb_file_entry->read_proc = kdb_read_proc_filename;
+ kdb_file_entry->write_proc = kdb_write_proc_filename;
+ return;
+}
+
+/*
+ * kdb_cmd_init
+ *
+ * This function is called by the kdb_init function to execute any
+ * commands defined in kdb_cmds.
+ *
+ * Inputs:
+ * Commands in *kdb_cmds[];
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ */
+
+static void __init
+kdb_cmd_init(void)
+{
+ int i, diag;
+ for (i = 0; kdb_cmds[i]; ++i) {
+ if (!defcmd_in_progress)
+ if (console_loglevel >= 6 /* KERN_INFO */)
+ kdb_printf("kdb_cmd[%d]: %s", i, kdb_cmds[i]);
+ diag = kdb_parse(kdb_cmds[i]);
+ if (diag)
+ kdb_printf("kdb command %s failed, kdb diag %d\n",
+ kdb_cmds[i], diag);
+ }
+ if (defcmd_in_progress) {
+ kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
+ kdb_parse("endefcmd");
+ }
+}
+
+/*
+ * kdb_panic
+ *
+ * Invoked via the panic_notifier_list.
+ *
+ * Inputs:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero.
+ * Locking:
+ * None.
+ * Remarks:
+ * When this function is called from panic(), the other cpus have already
+ * been stopped.
+ *
+ */
+
+static int
+kdb_panic(struct notifier_block *self, unsigned long command, void *ptr)
+{
+ KDB_FLAG_SET(CATASTROPHIC); /* kernel state is dubious now */
+ KDB_ENTER();
+ return 0;
+}
+
+static struct notifier_block kdb_block = { kdb_panic, NULL, 0 };
+
+#ifdef CONFIG_SYSCTL
+static int proc_do_kdb(ctl_table *table, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ if (KDB_FLAG(NO_CONSOLE) && write) {
+ printk(KERN_ERR "kdb has no working console and has switched itself off\n");
+ return -EINVAL;
+ }
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
+static ctl_table kdb_kern_table[] = {
+ {
+ .procname = "kdb",
+ .data = &kdb_on,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_kdb,
+ },
+ {}
+};
+
+static ctl_table kdb_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdb_kern_table,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static int
+kdb_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ if (action == CPU_ONLINE) {
+ int cpu =(unsigned long)hcpu;
+ cpumask_t save_cpus_allowed = current->cpus_allowed;
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on this cpu */
+ set_cpus_allowed_ptr(current, &save_cpus_allowed);
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kdb_cpu_nfb = {
+ .notifier_call = kdb_cpu_callback
+};
+
+/*
+ * kdb_init
+ *
+ * Initialize the kernel debugger environment.
+ *
+ * Parameters:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ * None.
+ */
+
+void __init
+kdb_init(void)
+{
+ kdb_initial_cpu = smp_processor_id();
+ /*
+ * This must be called before any calls to kdb_printf.
+ */
+ kdb_io_init();
+
+ kdb_inittab(); /* Initialize Command Table */
+ kdb_initbptab(); /* Initialize Breakpoint Table */
+ kdb_id_init(); /* Initialize Disassembler */
+ kdba_init(); /* Architecture Dependent Initialization */
+
+ /*
+ * Use printk() to get message in log_buf[];
+ */
+ printk("kdb version %d.%d%s by Keith Owens, Scott Lurndal. "\
+ "Copyright SGI, All Rights Reserved\n",
+ KDB_MAJOR_VERSION, KDB_MINOR_VERSION, KDB_TEST_VERSION);
+
+ kdb_cmd_init(); /* Preset commands from kdb_cmds */
+ kdb_initial_cpu = -1; /* Avoid recursion problems */
+ kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on boot cpu */
+ kdb_initial_cpu = smp_processor_id();
+ atomic_notifier_chain_register(&panic_notifier_list, &kdb_block);
+ register_cpu_notifier(&kdb_cpu_nfb);
+
+#ifdef kdba_setjmp
+ kdbjmpbuf = vmalloc(NR_CPUS * sizeof(*kdbjmpbuf));
+ if (!kdbjmpbuf)
+ printk(KERN_ERR "Cannot allocate kdbjmpbuf, no kdb recovery will be possible\n");
+#endif /* kdba_setjmp */
+
+ kdb_initial_cpu = -1;
+ kdb_wait_for_cpus_secs = 2*num_online_cpus();
+ kdb_wait_for_cpus_secs = max(kdb_wait_for_cpus_secs, 10);
+}
+
+#ifdef CONFIG_SYSCTL
+static int __init
+kdb_late_init(void)
+{
+ register_sysctl_table(kdb_root_table);
+ /* seems that we cannot allocate with kmalloc until now */
+ kdb_proc_filename();
+ return 0;
+}
+
+__initcall(kdb_late_init);
+#endif
+
+EXPORT_SYMBOL(kdb_register);
+EXPORT_SYMBOL(kdb_register_repeat);
+EXPORT_SYMBOL(kdb_unregister);
+EXPORT_SYMBOL(kdb_getarea_size);
+EXPORT_SYMBOL(kdb_putarea_size);
+EXPORT_SYMBOL(kdb_getuserarea_size);
+EXPORT_SYMBOL(kdb_putuserarea_size);
+EXPORT_SYMBOL(kdbgetularg);
+EXPORT_SYMBOL(kdbgetenv);
+EXPORT_SYMBOL(kdbgetintenv);
+EXPORT_SYMBOL(kdbgetaddrarg);
+EXPORT_SYMBOL(kdb);
+EXPORT_SYMBOL(kdb_on);
+EXPORT_SYMBOL(kdb_seqno);
+EXPORT_SYMBOL(kdb_initial_cpu);
+EXPORT_SYMBOL(kdbnearsym);
+EXPORT_SYMBOL(kdb_printf);
+EXPORT_SYMBOL(kdb_symbol_print);
+EXPORT_SYMBOL(kdb_running_process);
--- /dev/null
+/*
+ * Kernel Debugger Architecture Independent Support Functions
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * 03/02/13 added new 2.5 kallsyms <xavier.bru@bull.net>
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kallsyms.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
++#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+
+/*
+ * Symbol table functions.
+ */
+
+/*
+ * kdbgetsymval
+ *
+ * Return the address of the given symbol.
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * symtab Structure to receive results
+ * Outputs:
+ * Returns:
+ * 0 Symbol not found, symtab zero filled
+ * 1 Symbol mapped to module/symbol/section, data in symtab
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
+{
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, symtab);
+ memset(symtab, 0, sizeof(*symtab));
+
+ if ((symtab->sym_start = kallsyms_lookup_name(symname))) {
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: returns 1, symtab->sym_start=0x%lx\n", symtab->sym_start);
+ return 1;
+ }
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: returns 0\n");
+ return 0;
+}
+EXPORT_SYMBOL(kdbgetsymval);
+
+/*
+ * kdbnearsym
+ *
+ * Return the name of the symbol with the nearest address
+ * less than 'addr'.
+ *
+ * Parameters:
+ * addr Address to check for symbol near
+ * symtab Structure to receive results
+ * Outputs:
+ * Returns:
+ * 0 No sections contain this address, symtab zero filled
+ * 1 Address mapped to module/symbol/section, data in symtab
+ * Locking:
+ * None.
+ * Remarks:
+ * 2.6 kallsyms has a "feature" where it unpacks the name into a string.
+ * If that string is reused before the caller expects it then the caller
+ * sees its string change without warning. To avoid cluttering up the
+ * main kdb code with lots of kdb_strdup, tests and kfree calls, kdbnearsym
+ * maintains an LRU list of the last few unique strings. The list is sized
+ * large enough to hold active strings, no kdb caller of kdbnearsym makes
+ * more than ~20 later calls before using a saved value.
+ */
+
+static char *kdb_name_table[100]; /* arbitrary size */
+
+int
+kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
+{
+ int ret = 0;
+ unsigned long symbolsize;
+ unsigned long offset;
+#define knt1_size 128 /* must be >= kallsyms table size */
+ char *knt1 = NULL;
+
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
+ memset(symtab, 0, sizeof(*symtab));
+
+ if (addr < 4096)
+ goto out;
+ knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
+ if (!knt1) {
+ kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n", addr);
+ goto out;
+ }
+ symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset, (char **)(&symtab->mod_name), knt1);
+ if (offset > 8*1024*1024) {
+ symtab->sym_name = NULL;
+ addr = offset = symbolsize = 0;
+ }
+ symtab->sym_start = addr - offset;
+ symtab->sym_end = symtab->sym_start + symbolsize;
+ ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
+
+ if (ret) {
+ int i;
+ /* Another 2.6 kallsyms "feature". Sometimes the sym_name is
+ * set but the buffer passed into kallsyms_lookup is not used,
+ * so it contains garbage. The caller has to work out which
+ * buffer needs to be saved.
+ *
+ * What was Rusty smoking when he wrote that code?
+ */
+ if (symtab->sym_name != knt1) {
+ strncpy(knt1, symtab->sym_name, knt1_size);
+ knt1[knt1_size-1] = '\0';
+ }
+ for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+ if (kdb_name_table[i] && strcmp(kdb_name_table[i], knt1) == 0)
+ break;
+ }
+ if (i >= ARRAY_SIZE(kdb_name_table)) {
+ debug_kfree(kdb_name_table[0]);
+ memcpy(kdb_name_table, kdb_name_table+1,
+ sizeof(kdb_name_table[0])*(ARRAY_SIZE(kdb_name_table)-1));
+ } else {
+ debug_kfree(knt1);
+ knt1 = kdb_name_table[i];
+ memcpy(kdb_name_table+i, kdb_name_table+i+1,
+ sizeof(kdb_name_table[0])*(ARRAY_SIZE(kdb_name_table)-i-1));
+ }
+ i = ARRAY_SIZE(kdb_name_table) - 1;
+ kdb_name_table[i] = knt1;
+ symtab->sym_name = kdb_name_table[i];
+ knt1 = NULL;
+ }
+
+ if (symtab->mod_name == NULL)
+ symtab->mod_name = "kernel";
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name);
+
+out:
+ debug_kfree(knt1);
+ return ret;
+}
+
+void
+kdbnearsym_cleanup(void)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+ if (kdb_name_table[i]) {
+ debug_kfree(kdb_name_table[i]);
+ kdb_name_table[i] = NULL;
+ }
+ }
+}
+
+/*
+ * kallsyms_symbol_complete
+ *
+ * Parameters:
+ * prefix_name prefix of a symbol name to lookup
+ * max_len maximum length that can be returned
+ * Returns:
+ * Number of symbols which match the given prefix.
+ * Notes:
+ * prefix_name is changed to contain the longest unique prefix that
+ * starts with this prefix (tab completion).
+ */
+
+static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
+
+int kallsyms_symbol_complete(char *prefix_name, int max_len)
+{
+ loff_t pos = 0;
+ int prefix_len = strlen(prefix_name), prev_len = 0;
+ int i, number = 0;
+ const char *name;
+
+ while ((name = kdb_walk_kallsyms(&pos))) {
+ if (strncmp(name, prefix_name, prefix_len) == 0) {
+ strcpy(ks_namebuf, name);
+ /* Work out the longest name that matches the prefix */
+ if (++number == 1) {
+ prev_len = min_t(int, max_len-1, strlen(ks_namebuf));
+ memcpy(ks_namebuf_prev, ks_namebuf, prev_len);
+ ks_namebuf_prev[prev_len] = '\0';
+ } else for (i = 0; i < prev_len; ++i) {
+ if (ks_namebuf[i] != ks_namebuf_prev[i]) {
+ prev_len = i;
+ ks_namebuf_prev[i] = '\0';
+ break;
+ }
+ }
+ }
+ }
+ if (prev_len > prefix_len)
+ memcpy(prefix_name, ks_namebuf_prev, prev_len+1);
+ return number;
+}
+
+/*
+ * kallsyms_symbol_next
+ *
+ * Parameters:
+ * prefix_name prefix of a symbol name to lookup
+ * flag 0 means search from the head, 1 means continue search.
+ * Returns:
+ * 1 if a symbol matches the given prefix.
+ * 0 if no string found
+ */
+
+int kallsyms_symbol_next(char *prefix_name, int flag)
+{
+ int prefix_len = strlen(prefix_name);
+ static loff_t pos;
+ const char *name;
+
+ if (!flag)
+ pos = 0;
+
+ while ((name = kdb_walk_kallsyms(&pos))) {
+ if (strncmp(name, prefix_name, prefix_len) == 0) {
+ strncpy(prefix_name, name, strlen(name)+1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#if defined(CONFIG_SMP)
+/*
+ * kdb_ipi
+ *
+ * This function is called from the non-maskable interrupt
+ * handler to handle a kdb IPI instruction.
+ *
+ * Inputs:
+ * regs = Exception frame pointer
+ * Outputs:
+ * None.
+ * Returns:
+ * 0 - Did not handle NMI
+ * 1 - Handled NMI
+ * Locking:
+ * None.
+ * Remarks:
+ * Initially one processor is invoked in the kdb() code. That
+ * processor sends an ipi which drives this routine on the other
+ * processors. All this does is call kdb() with reason SWITCH.
+ * This puts all processors into the kdb() routine and all the
+ * code for breakpoints etc. is in one place.
+ * One problem with the way the kdb NMI is sent, the NMI has no
+ * identification that says it came from kdb. If the cpu's kdb state is
+ * marked as "waiting for kdb_ipi" then the NMI is treated as coming from
+ * kdb, otherwise it is assumed to be for another reason and is ignored.
+ */
+
+int
+kdb_ipi(struct pt_regs *regs, void (*ack_interrupt)(void))
+{
+ /* Do not print before checking and clearing WAIT_IPI, IPIs are
+ * going all the time.
+ */
+ if (KDB_STATE(WAIT_IPI)) {
+ /*
+ * Stopping other processors via smp_kdb_stop().
+ */
+ if (ack_interrupt)
+ (*ack_interrupt)(); /* Acknowledge the interrupt */
+ KDB_STATE_CLEAR(WAIT_IPI);
+ KDB_DEBUG_STATE("kdb_ipi 1", 0);
+ kdb(KDB_REASON_SWITCH, 0, regs); /* Spin in kdb() */
+ KDB_DEBUG_STATE("kdb_ipi 2", 0);
+ return 1;
+ }
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * kdb_symbol_print
+ *
+ * Standard method for printing a symbol name and offset.
+ * Inputs:
+ * addr Address to be printed.
+ * symtab Address of symbol data, if NULL this routine does its
+ * own lookup.
+ * punc Punctuation for string, bit field.
+ * Outputs:
+ * None.
+ * Returns:
+ * Always 0.
+ * Locking:
+ * none.
+ * Remarks:
+ * The string and its punctuation is only printed if the address
+ * is inside the kernel, except that the value is always printed
+ * when requested.
+ */
+
+void
+kdb_symbol_print(kdb_machreg_t addr, const kdb_symtab_t *symtab_p, unsigned int punc)
+{
+ kdb_symtab_t symtab, *symtab_p2;
+ if (symtab_p) {
+ symtab_p2 = (kdb_symtab_t *)symtab_p;
+ }
+ else {
+ symtab_p2 = &symtab;
+ kdbnearsym(addr, symtab_p2);
+ }
+ if (symtab_p2->sym_name || (punc & KDB_SP_VALUE)) {
+ ; /* drop through */
+ }
+ else {
+ return;
+ }
+ if (punc & KDB_SP_SPACEB) {
+ kdb_printf(" ");
+ }
+ if (punc & KDB_SP_VALUE) {
+ kdb_printf(kdb_machreg_fmt0, addr);
+ }
+ if (symtab_p2->sym_name) {
+ if (punc & KDB_SP_VALUE) {
+ kdb_printf(" ");
+ }
+ if (punc & KDB_SP_PAREN) {
+ kdb_printf("(");
+ }
+ if (strcmp(symtab_p2->mod_name, "kernel")) {
+ kdb_printf("[%s]", symtab_p2->mod_name);
+ }
+ kdb_printf("%s", symtab_p2->sym_name);
+ if (addr != symtab_p2->sym_start) {
+ kdb_printf("+0x%lx", addr - symtab_p2->sym_start);
+ }
+ if (punc & KDB_SP_SYMSIZE) {
+ kdb_printf("/0x%lx", symtab_p2->sym_end - symtab_p2->sym_start);
+ }
+ if (punc & KDB_SP_PAREN) {
+ kdb_printf(")");
+ }
+ }
+ if (punc & KDB_SP_SPACEA) {
+ kdb_printf(" ");
+ }
+ if (punc & KDB_SP_NEWLINE) {
+ kdb_printf("\n");
+ }
+}
+
+/*
+ * kdb_strdup
+ *
+ * kdb equivalent of strdup, for disasm code.
+ * Inputs:
+ * str The string to duplicate.
+ * type Flags to kmalloc for the new string.
+ * Outputs:
+ * None.
+ * Returns:
+ * Address of the new string, NULL if storage could not be allocated.
+ * Locking:
+ * none.
+ * Remarks:
+ * This is not in lib/string.c because it uses kmalloc which is not
+ * available when string.o is used in boot loaders.
+ */
+
+char *kdb_strdup(const char *str, gfp_t type)
+{
+ int n = strlen(str)+1;
+ char *s = kmalloc(n, type);
+ if (!s) return NULL;
+ return strcpy(s, str);
+}
+
+/*
+ * kdb_getarea_size
+ *
+ * Read an area of data. The kdb equivalent of copy_from_user, with
+ * kdb messages for invalid addresses.
+ * Inputs:
+ * res Pointer to the area to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+
+int kdb_getarea_size(void *res, unsigned long addr, size_t size)
+{
+ int ret = kdba_getarea_size(res, addr, size);
+ if (ret) {
+ if (!KDB_STATE(SUPPRESS)) {
+ kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
+ KDB_STATE_SET(SUPPRESS);
+ }
+ ret = KDB_BADADDR;
+ }
+ else {
+ KDB_STATE_CLEAR(SUPPRESS);
+ }
+ return(ret);
+}
+
+/*
+ * kdb_putarea_size
+ *
+ * Write an area of data. The kdb equivalent of copy_to_user, with
+ * kdb messages for invalid addresses.
+ * Inputs:
+ * addr Address of the area to write to.
+ * res Pointer to the area holding the data.
+ * size Size of the area.
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+
+int kdb_putarea_size(unsigned long addr, void *res, size_t size)
+{
+ int ret = kdba_putarea_size(addr, res, size);
+ if (ret) {
+ if (!KDB_STATE(SUPPRESS)) {
+ kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
+ KDB_STATE_SET(SUPPRESS);
+ }
+ ret = KDB_BADADDR;
+ }
+ else {
+ KDB_STATE_CLEAR(SUPPRESS);
+ }
+ return(ret);
+}
+
+/*
+ * kdb_getphys
+ *
+ * Read data from a physical address. Validate the address is in range,
+ * use kmap_atomic() to get data
+ *
+ * Similar to kdb_getarea() - but for phys addresses
+ *
+ * Inputs:
+ * res Pointer to the word to receive the result
+ * addr Physical address of the area to copy
+ * size Size of the area
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+static int kdb_getphys(void *res, unsigned long addr, size_t size)
+{
+ unsigned long pfn;
+ void *vaddr;
+ struct page *page;
+
+ pfn = (addr >> PAGE_SHIFT);
+ if (!pfn_valid(pfn))
+ return 1;
+ page = pfn_to_page(pfn);
+ vaddr = kmap_atomic(page, KM_KDB);
+ memcpy(res, vaddr + (addr & (PAGE_SIZE -1)), size);
+ kunmap_atomic(vaddr, KM_KDB);
+
+ return 0;
+}
+
+/*
+ * kdb_getphysword
+ *
+ * Inputs:
+ * word Pointer to the word to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ *word = 0; /* Default value if addr or size is invalid */
+
+ switch (size) {
+ case 1:
+ if (!(diag = kdb_getphys(&w1, addr, sizeof(w1))))
+ *word = w1;
+ break;
+ case 2:
+ if (!(diag = kdb_getphys(&w2, addr, sizeof(w2))))
+ *word = w2;
+ break;
+ case 4:
+ if (!(diag = kdb_getphys(&w4, addr, sizeof(w4))))
+ *word = w4;
+ break;
+ case 8:
+ if (size <= sizeof(*word)) {
+ if (!(diag = kdb_getphys(&w8, addr, sizeof(w8))))
+ *word = w8;
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
+ }
+ return(diag);
+}
+
+/*
+ * kdb_getword
+ *
+ * Read a binary value. Unlike kdb_getarea, this treats data as numbers.
+ * Inputs:
+ * word Pointer to the word to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+
+int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ *word = 0; /* Default value if addr or size is invalid */
+ switch (size) {
+ case 1:
+ if (!(diag = kdb_getarea(w1, addr)))
+ *word = w1;
+ break;
+ case 2:
+ if (!(diag = kdb_getarea(w2, addr)))
+ *word = w2;
+ break;
+ case 4:
+ if (!(diag = kdb_getarea(w4, addr)))
+ *word = w4;
+ break;
+ case 8:
+ if (size <= sizeof(*word)) {
+ if (!(diag = kdb_getarea(w8, addr)))
+ *word = w8;
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_getword: bad width %ld\n", (long) size);
+ }
+ return(diag);
+}
+
+/*
+ * kdb_putword
+ *
+ * Write a binary value. Unlike kdb_putarea, this treats data as numbers.
+ * Inputs:
+ * addr Address of the area to write to..
+ * word The value to set.
+ * size Size of the area.
+ * Outputs:
+ * none.
+ * Returns:
+ * 0 for success, < 0 for error.
+ * Locking:
+ * none.
+ */
+
+int kdb_putword(unsigned long addr, unsigned long word, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ switch (size) {
+ case 1:
+ w1 = word;
+ diag = kdb_putarea(addr, w1);
+ break;
+ case 2:
+ w2 = word;
+ diag = kdb_putarea(addr, w2);
+ break;
+ case 4:
+ w4 = word;
+ diag = kdb_putarea(addr, w4);
+ break;
+ case 8:
+ if (size <= sizeof(word)) {
+ w8 = word;
+ diag = kdb_putarea(addr, w8);
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_putword: bad width %ld\n", (long) size);
+ }
+ return(diag);
+}
+
+/*
+ * kdb_task_state_string
+ *
+ * Convert a string containing any of the letters DRSTCZEUIMA to a mask
+ * for the process state field and return the value. If no argument is
+ * supplied, return the mask that corresponds to environment variable PS,
+ * DRSTCZEU by default.
+ * Inputs:
+ * s String to convert
+ * Outputs:
+ * none.
+ * Returns:
+ * Mask for process state.
+ * Locking:
+ * none.
+ * Notes:
+ * The mask folds data from several sources into a single long value, so
+ * be carefull not to overlap the bits. TASK_* bits are in the LSB,
+ * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there
+ * is no overlap between TASK_* and EXIT_* but that may not always be
+ * true, so EXIT_* bits are shifted left 16 bits before being stored in
+ * the mask.
+ */
+
+#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1)) /* unrunnable is < 0 */
+#define RUNNING (1UL << (8*sizeof(unsigned long) - 2))
+#define IDLE (1UL << (8*sizeof(unsigned long) - 3))
+#define DAEMON (1UL << (8*sizeof(unsigned long) - 4))
+
+unsigned long
+kdb_task_state_string(const char *s)
+{
+ long res = 0;
+ if (!s && !(s = kdbgetenv("PS"))) {
+ s = "DRSTCZEU"; /* default value for ps */
+ }
+ while (*s) {
+ switch (*s) {
+ case 'D': res |= TASK_UNINTERRUPTIBLE; break;
+ case 'R': res |= RUNNING; break;
+ case 'S': res |= TASK_INTERRUPTIBLE; break;
+ case 'T': res |= TASK_STOPPED; break;
+ case 'C': res |= TASK_TRACED; break;
+ case 'Z': res |= EXIT_ZOMBIE << 16; break;
+ case 'E': res |= EXIT_DEAD << 16; break;
+ case 'U': res |= UNRUNNABLE; break;
+ case 'I': res |= IDLE; break;
+ case 'M': res |= DAEMON; break;
+ case 'A': res = ~0UL; break;
+ default:
+ kdb_printf("%s: unknown flag '%c' ignored\n", __FUNCTION__, *s);
+ break;
+ }
+ ++s;
+ }
+ return res;
+}
+
+/*
+ * kdb_task_state_char
+ *
+ * Return the character that represents the task state.
+ * Inputs:
+ * p struct task for the process
+ * Outputs:
+ * none.
+ * Returns:
+ * One character to represent the task state.
+ * Locking:
+ * none.
+ */
+
+char
+kdb_task_state_char (const struct task_struct *p)
+{
+ int cpu = kdb_process_cpu(p);
+ struct kdb_running_process *krp = kdb_running_process + cpu;
+ char state = (p->state == 0) ? 'R' :
+ (p->state < 0) ? 'U' :
+ (p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (p->state & TASK_STOPPED) ? 'T' :
+ (p->state & TASK_TRACED) ? 'C' :
+ (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
+ (p->exit_state & EXIT_DEAD) ? 'E' :
+ (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+ if (p->pid == 0) {
+ /* Idle task. Is it really idle, apart from the kdb interrupt? */
+ if (!kdb_task_has_cpu(p) || krp->irq_depth == 1) {
+ /* There is a corner case when the idle task takes an
+ * interrupt and dies in the interrupt code. It has an
+ * interrupt count of 1 but that did not come from kdb.
+ * This corner case can only occur on the initial cpu,
+ * all the others were entered via the kdb IPI.
+ */
+ if (cpu != kdb_initial_cpu || KDB_STATE_CPU(KEYBOARD, cpu))
+ state = 'I'; /* idle task */
+ }
+ }
+ else if (!p->mm && state == 'S') {
+ state = 'M'; /* sleeping system daemon */
+ }
+ return state;
+}
+
+/*
+ * kdb_task_state
+ *
+ * Return true if a process has the desired state given by the mask.
+ * Inputs:
+ * p struct task for the process
+ * mask mask from kdb_task_state_string to select processes
+ * Outputs:
+ * none.
+ * Returns:
+ * True if the process matches at least one criteria defined by the mask.
+ * Locking:
+ * none.
+ */
+
+unsigned long
+kdb_task_state(const struct task_struct *p, unsigned long mask)
+{
+ char state[] = { kdb_task_state_char(p), '\0' };
+ return (mask & kdb_task_state_string(state)) != 0;
+}
+
+struct kdb_running_process kdb_running_process[NR_CPUS];
+
+/* Save the state of a running process and invoke kdb_main_loop. This is
+ * invoked on the current process on each cpu (assuming the cpu is responding).
+ */
+
+int
+kdb_save_running(struct pt_regs *regs, kdb_reason_t reason,
+ kdb_reason_t reason2, int error, kdb_dbtrap_t db_result)
+{
+ struct kdb_running_process *krp = kdb_running_process + smp_processor_id();
+ krp->p = current;
+ krp->regs = regs;
+ krp->seqno = kdb_seqno;
+ krp->irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
+ kdba_save_running(&(krp->arch), regs);
+ return kdb_main_loop(reason, reason2, error, db_result, regs);
+}
+
+/*
+ * kdb_unsave_running
+ *
+ * Reverse the effect of kdb_save_running.
+ * Inputs:
+ * regs struct pt_regs for the process
+ * Outputs:
+ * Updates kdb_running_process[] for this cpu.
+ * Returns:
+ * none.
+ * Locking:
+ * none.
+ */
+
+void
+kdb_unsave_running(struct pt_regs *regs)
+{
+ struct kdb_running_process *krp = kdb_running_process + smp_processor_id();
+ kdba_unsave_running(&(krp->arch), regs);
+ krp->seqno = 0;
+}
+
+
+/*
+ * kdb_print_nameval
+ *
+ * Print a name and its value, converting the value to a symbol lookup
+ * if possible.
+ * Inputs:
+ * name field name to print
+ * val value of field
+ * Outputs:
+ * none.
+ * Returns:
+ * none.
+ * Locking:
+ * none.
+ */
+
+void
+kdb_print_nameval(const char *name, unsigned long val)
+{
+ kdb_symtab_t symtab;
+ kdb_printf(" %-11.11s ", name);
+ if (kdbnearsym(val, &symtab))
+ kdb_symbol_print(val, &symtab, KDB_SP_VALUE|KDB_SP_SYMSIZE|KDB_SP_NEWLINE);
+ else
+ kdb_printf("0x%lx\n", val);
+}
+
+static struct page * kdb_get_one_user_page(const struct task_struct *tsk, unsigned long start,
+ int len, int write)
+{
+ struct mm_struct *mm = tsk->mm;
+ unsigned int flags;
+ struct vm_area_struct * vma;
+
+ /* shouldn't cross a page boundary. */
+ if ((start & PAGE_MASK) != ((start+len) & PAGE_MASK))
+ return NULL;
+
+ /* we need to align start address to the current page boundy, PAGE_ALIGN
+ * aligns to next page boundry.
+ * FIXME: What about hugetlb?
+ */
+ start = start & PAGE_MASK;
+ flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+
+ vma = find_extend_vma(mm, start);
+
+ /* may be we can allow access to VM_IO pages inside KDB? */
+ if (!vma || (vma->vm_flags & VM_IO) || !(flags & vma->vm_flags))
+ return NULL;
+
+ return follow_page(vma, start, write ? FOLL_WRITE : 0);
+}
+
+int kdb_getuserarea_size(void *to, unsigned long from, size_t size)
+{
+ struct page *page;
+ void *vaddr;
+
+ page = kdb_get_one_user_page(kdb_current_task, from, size, 0);
+ if (!page)
+ return size;
+
+ vaddr = kmap_atomic(page, KM_KDB);
+ memcpy(to, vaddr+ (from & (PAGE_SIZE - 1)), size);
+ kunmap_atomic(vaddr, KM_KDB);
+
+ return 0;
+}
+
+int kdb_putuserarea_size(unsigned long to, void *from, size_t size)
+{
+ struct page *page;
+ void *vaddr;
+
+ page = kdb_get_one_user_page(kdb_current_task, to, size, 1);
+ if (!page)
+ return size;
+
+ vaddr = kmap_atomic(page, KM_KDB);
+ memcpy(vaddr+ (to & (PAGE_SIZE - 1)), from, size);
+ kunmap_atomic(vaddr, KM_KDB);
+
+ return 0;
+}
+
+/* Last ditch allocator for debugging, so we can still debug even when the
+ * GFP_ATOMIC pool has been exhausted. The algorithms are tuned for space
+ * usage, not for speed. One smallish memory pool, the free chain is always in
+ * ascending address order to allow coalescing, allocations are done in brute
+ * force best fit.
+ */
+
+struct debug_alloc_header {
+ u32 next; /* offset of next header from start of pool */
+ u32 size;
+ void *caller;
+};
+
+/* The memory returned by this allocator must be aligned, which means so must
+ * the header size. Do not assume that sizeof(struct debug_alloc_header) is a
+ * multiple of the alignment, explicitly calculate the overhead of this header,
+ * including the alignment. The rest of this code must not use sizeof() on any
+ * header or pointer to a header.
+ */
+#define dah_align 8
+#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
+
+static u64 debug_alloc_pool_aligned[256*1024/dah_align]; /* 256K pool */
+static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
+static u32 dah_first, dah_first_call = 1, dah_used = 0, dah_used_max = 0;
+
+/* Locking is awkward. The debug code is called from all contexts, including
+ * non maskable interrupts. A normal spinlock is not safe in NMI context. Try
+ * to get the debug allocator lock, if it cannot be obtained after a second
+ * then give up. If the lock could not be previously obtained on this cpu then
+ * only try once.
+ *
+ * sparse has no annotation for "this function _sometimes_ acquires a lock", so
+ * fudge the acquire/release notation.
+ */
+static DEFINE_SPINLOCK(dap_lock);
+static int
+get_dap_lock(void)
+ __acquires(dap_lock)
+{
+ static int dap_locked = -1;
+ int count;
+ if (dap_locked == smp_processor_id())
+ count = 1;
+ else
+ count = 1000;
+ while (1) {
+ if (spin_trylock(&dap_lock)) {
+ dap_locked = -1;
+ return 1;
+ }
+ if (!count--)
+ break;
+ udelay(1000);
+ }
+ dap_locked = smp_processor_id();
+ __acquire(dap_lock);
+ return 0;
+}
+
+void
+*debug_kmalloc(size_t size, gfp_t flags)
+{
+ unsigned int rem, h_offset;
+ struct debug_alloc_header *best, *bestprev, *prev, *h;
+ void *p = NULL;
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return NULL;
+ }
+ h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+ if (dah_first_call) {
+ h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
+ dah_first_call = 0;
+ }
+ size = ALIGN(size, dah_align);
+ prev = best = bestprev = NULL;
+ while (1) {
+ if (h->size >= size && (!best || h->size < best->size)) {
+ best = h;
+ bestprev = prev;
+ if (h->size == size)
+ break;
+ }
+ if (!h->next)
+ break;
+ prev = h;
+ h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
+ }
+ if (!best)
+ goto out;
+ rem = best->size - size;
+ /* The pool must always contain at least one header */
+ if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
+ goto out;
+ if (rem >= dah_overhead) {
+ best->size = size;
+ h_offset = ((char *)best - debug_alloc_pool) +
+ dah_overhead + best->size;
+ h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
+ h->size = rem - dah_overhead;
+ h->next = best->next;
+ } else
+ h_offset = best->next;
+ best->caller = __builtin_return_address(0);
+ dah_used += best->size;
+ dah_used_max = max(dah_used, dah_used_max);
+ if (bestprev)
+ bestprev->next = h_offset;
+ else
+ dah_first = h_offset;
+ p = (char *)best + dah_overhead;
+ memset(p, POISON_INUSE, best->size - 1);
+ *((char *)p + best->size - 1) = POISON_END;
+out:
+ spin_unlock(&dap_lock);
+ return p;
+}
+
+void
+debug_kfree(void *p)
+{
+ struct debug_alloc_header *h;
+ unsigned int h_offset;
+ if (!p)
+ return;
+ if ((char *)p < debug_alloc_pool ||
+ (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
+ kfree(p);
+ return;
+ }
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return; /* memory leak, cannot be helped */
+ }
+ h = (struct debug_alloc_header *)((char *)p - dah_overhead);
+ memset(p, POISON_FREE, h->size - 1);
+ *((char *)p + h->size - 1) = POISON_END;
+ h->caller = NULL;
+ dah_used -= h->size;
+ h_offset = (char *)h - debug_alloc_pool;
+ if (h_offset < dah_first) {
+ h->next = dah_first;
+ dah_first = h_offset;
+ } else {
+ struct debug_alloc_header *prev;
+ unsigned int prev_offset;
+ prev = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+ while (1) {
+ if (!prev->next || prev->next > h_offset)
+ break;
+ prev = (struct debug_alloc_header *)
+ (debug_alloc_pool + prev->next);
+ }
+ prev_offset = (char *)prev - debug_alloc_pool;
+ if (prev_offset + dah_overhead + prev->size == h_offset) {
+ prev->size += dah_overhead + h->size;
+ memset(h, POISON_FREE, dah_overhead - 1);
+ *((char *)h + dah_overhead - 1) = POISON_END;
+ h = prev;
+ h_offset = prev_offset;
+ } else {
+ h->next = prev->next;
+ prev->next = h_offset;
+ }
+ }
+ if (h_offset + dah_overhead + h->size == h->next) {
+ struct debug_alloc_header *next;
+ next = (struct debug_alloc_header *)
+ (debug_alloc_pool + h->next);
+ h->size += dah_overhead + next->size;
+ h->next = next->next;
+ memset(next, POISON_FREE, dah_overhead - 1);
+ *((char *)next + dah_overhead - 1) = POISON_END;
+ }
+ spin_unlock(&dap_lock);
+}
+
+void
+debug_kusage(void)
+{
+ struct debug_alloc_header *h_free, *h_used;
+#ifdef CONFIG_IA64
+ /* FIXME: using dah for ia64 unwind always results in a memory leak.
+ * Fix that memory leak first, then set debug_kusage_one_time = 1 for
+ * all architectures.
+ */
+ static int debug_kusage_one_time = 0;
+#else
+ static int debug_kusage_one_time = 1;
+#endif
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return;
+ }
+ h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+ if (dah_first == 0 &&
+ (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
+ dah_first_call))
+ goto out;
+ if (!debug_kusage_one_time)
+ goto out;
+ debug_kusage_one_time = 0;
+ kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n",
+ __FUNCTION__, dah_first);
+ if (dah_first) {
+ h_used = (struct debug_alloc_header *)debug_alloc_pool;
+ kdb_printf("%s: h_used %p size %d\n", __FUNCTION__, h_used, h_used->size);
+ }
+ do {
+ h_used = (struct debug_alloc_header *)
+ ((char *)h_free + dah_overhead + h_free->size);
+ kdb_printf("%s: h_used %p size %d caller %p\n",
+ __FUNCTION__, h_used, h_used->size, h_used->caller);
+ h_free = (struct debug_alloc_header *)
+ (debug_alloc_pool + h_free->next);
+ } while (h_free->next);
+ h_used = (struct debug_alloc_header *)
+ ((char *)h_free + dah_overhead + h_free->size);
+ if ((char *)h_used - debug_alloc_pool !=
+ sizeof(debug_alloc_pool_aligned))
+ kdb_printf("%s: h_used %p size %d caller %p\n",
+ __FUNCTION__, h_used, h_used->size, h_used->caller);
+out:
+ spin_unlock(&dap_lock);
+}
+
+/* Maintain a small stack of kdb_flags to allow recursion without disturbing
+ * the global kdb state.
+ */
+
+static int kdb_flags_stack[4], kdb_flags_index;
+
+void
+kdb_save_flags(void)
+{
+ BUG_ON(kdb_flags_index >= ARRAY_SIZE(kdb_flags_stack));
+ kdb_flags_stack[kdb_flags_index++] = kdb_flags;
+}
+
+void
+kdb_restore_flags(void)
+{
+ BUG_ON(kdb_flags_index <= 0);
+ kdb_flags = kdb_flags_stack[--kdb_flags_index];
+}
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
++#include <linux/slab.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug page information");
+MODULE_LICENSE("GPL");
+
+/* Standard Linux page stuff */
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+/* From include/linux/page-flags.h */
+static char *pg_flag_vals[] = {
+ "PG_locked", "PG_error", "PG_referenced", "PG_uptodate",
+ "PG_dirty", "PG_lru", "PG_active", "PG_slab",
+ "PG_owner_priv_1", "PG_arch_1", "PG_reserved", "PG_private",
+ "PG_writeback",
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ "PG_head", "PG_tail",
+#else
+ "PG_compound",
+#endif
+ "PG_swapcache", "PG_mappedtodisk", "PG_reclaim", "PG_buddy",
+#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+ "PG_uncached",
+#endif
+ NULL };
+#endif
+
+/* From include/linux/buffer_head.h */
+static char *bh_state_vals[] = {
+ "Uptodate", "Dirty", "Lock", "Req",
+ "Uptodate_Lock", "Mapped", "New", "Async_read",
+ "Async_write", "Delay", "Boundary", "Write_EIO",
+ "Ordered", "Eopnotsupp", "Unwritten", "PriavateStart",
+ NULL };
+
+/* From include/linux/bio.h */
+static char *bio_flag_vals[] = {
+ "Uptodate", "RW_block", "EOF", "Seg_valid",
+ "Cloned", "Bounced", "User_mapped", "Eopnotsupp",
+ NULL };
+
+/* From include/linux/fs.h */
+static char *inode_flag_vals[] = {
+ "I_DIRTY_SYNC", "I_DIRTY_DATASYNC", "I_DIRTY_PAGES", "I_NEW",
+ "I_WILL_FREE", "I_FREEING", "I_CLEAR", "I_LOCK",
+ "I_SYNC", NULL };
+
+static char *map_flags(unsigned long flags, char *mapping[])
+{
+ static char buffer[256];
+ int index;
+ int offset = 12;
+
+ buffer[0] = '\0';
+
+ for (index = 0; flags && mapping[index]; flags >>= 1, index++) {
+ if (flags & 1) {
+ if ((offset + strlen(mapping[index]) + 1) >= 80) {
+ strcat(buffer, "\n ");
+ offset = 12;
+ } else if (offset > 12) {
+ strcat(buffer, " ");
+ offset++;
+ }
+ strcat(buffer, mapping[index]);
+ offset += strlen(mapping[index]);
+ }
+ }
+
+ return (buffer);
+}
+
+static int
+kdbm_buffers(int argc, const char **argv)
+{
+ struct buffer_head bh;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(bh, addr)))
+ return(diag);
+
+ kdb_printf("buffer_head at 0x%lx\n", addr);
+ kdb_printf(" bno %llu size %llu dev 0x%x\n",
+ (unsigned long long)bh.b_blocknr,
+ (unsigned long long)bh.b_size,
+ bh.b_bdev ? bh.b_bdev->bd_dev : 0);
+ kdb_printf(" count %d state 0x%lx [%s]\n",
+ bh.b_count.counter, bh.b_state,
+ map_flags(bh.b_state, bh_state_vals));
+ kdb_printf(" b_data 0x%p\n",
+ bh.b_data);
+ kdb_printf(" b_page 0x%p b_this_page 0x%p b_private 0x%p\n",
+ bh.b_page, bh.b_this_page, bh.b_private);
+ kdb_printf(" b_end_io ");
+ if (bh.b_end_io)
+ kdb_symbol_print(kdba_funcptr_value(bh.b_end_io), NULL, KDB_SP_VALUE);
+ else
+ kdb_printf("(NULL)");
+ kdb_printf("\n");
+
+ return 0;
+}
+
+static int
+print_biovec(struct bio_vec *vec, int vcount)
+{
+ struct bio_vec bvec;
+ unsigned long addr;
+ int diag;
+ int i;
+
+ if (vcount < 1 || vcount > BIO_MAX_PAGES) {
+ kdb_printf(" [skipped iovecs, vcnt is %d]\n", vcount);
+ return 0;
+ }
+
+ addr = (unsigned long)vec;
+ for (i = 0; i < vcount; i++) {
+ if ((diag = kdb_getarea(bvec, addr)))
+ return(diag);
+ addr += sizeof(bvec);
+ kdb_printf(" [%d] page 0x%p length=%u offset=%u\n",
+ i, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
+ }
+ return 0;
+}
+
+static int
+kdbm_bio(int argc, const char **argv)
+{
+ struct bio bio;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(bio, addr)))
+ return(diag);
+
+ kdb_printf("bio at 0x%lx\n", addr);
+ kdb_printf(" bno %llu next 0x%p dev 0x%x\n",
+ (unsigned long long)bio.bi_sector,
+ bio.bi_next, bio.bi_bdev ? bio.bi_bdev->bd_dev : 0);
+ kdb_printf(" vcnt %u vec 0x%p rw 0x%lx flags 0x%lx [%s]\n",
+ bio.bi_vcnt, bio.bi_io_vec, bio.bi_rw, bio.bi_flags,
+ map_flags(bio.bi_flags, bio_flag_vals));
+ print_biovec(bio.bi_io_vec, bio.bi_vcnt);
+ kdb_printf(" count %d private 0x%p\n",
+ atomic_read(&bio.bi_cnt), bio.bi_private);
+ kdb_printf(" bi_end_io ");
+ if (bio.bi_end_io)
+ kdb_symbol_print(kdba_funcptr_value(bio.bi_end_io), NULL, KDB_SP_VALUE);
+ else
+ kdb_printf("(NULL)");
+ kdb_printf("\n");
+
+ return 0;
+}
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+static char *page_flags(unsigned long flags)
+{
+ return(map_flags(flags, pg_flag_vals));
+}
+
+static int
+kdbm_page(int argc, const char **argv)
+{
+ struct page page;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+#ifdef __ia64__
+ if (rgn_index(addr) == 0)
+ addr = (unsigned long) &mem_map[addr]; /* assume region 0 is a page index, not an address */
+#else
+ if (addr < PAGE_OFFSET)
+ addr = (unsigned long) &mem_map[addr];
+#endif
+
+ if ((diag = kdb_getarea(page, addr)))
+ return(diag);
+
+ kdb_printf("struct page at 0x%lx\n", addr);
+ kdb_printf(" addr space 0x%p index %lu (offset 0x%llx)\n",
+ page.mapping, page.index,
+ (unsigned long long)page.index << PAGE_CACHE_SHIFT);
+ kdb_printf(" count %d flags %s\n",
+ page._count.counter, page_flags(page.flags));
+ kdb_printf(" virtual 0x%p\n", page_address((struct page *)addr));
+ if (page_has_buffers(&page))
+ kdb_printf(" buffers 0x%p\n", page_buffers(&page));
+ else
+ kdb_printf(" private 0x%lx\n", page_private(&page));
+
+ return 0;
+}
+#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
+
+static unsigned long
+print_request(unsigned long addr)
+{
+ struct request rq;
+
+ if (kdb_getarea(rq, addr))
+ return(0);
+
+ kdb_printf("struct request at 0x%lx\n", addr);
+ kdb_printf(" errors %d sector %llu nr_sectors %llu\n",
+ rq.errors,
+ (unsigned long long)blk_rq_pos(&rq),
+ (unsigned long long)blk_rq_sectors(&rq));
+
+ return (unsigned long) rq.queuelist.next;
+}
+
+static int
+kdbm_request(int argc, const char **argv)
+{
+ long offset = 0;
+ unsigned long addr;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ print_request(addr);
+ return 0;
+}
+
+
+static int
+kdbm_rqueue(int argc, const char **argv)
+{
+ struct request_queue rq;
+ unsigned long addr, head_addr, next;
+ long offset = 0;
+ int nextarg;
+ int i, diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(rq, addr)))
+ return(diag);
+
+ kdb_printf("struct request_queue at 0x%lx\n", addr);
+ i = 0;
+ next = (unsigned long)rq.queue_head.next;
+ head_addr = addr + offsetof(struct request_queue, queue_head);
+ kdb_printf(" request queue: %s\n", next == head_addr ?
+ "empty" : "");
+ while (next != head_addr) {
+ i++;
+ next = print_request(next);
+ }
+
+ if (i)
+ kdb_printf("%d requests found\n", i);
+
+ return 0;
+}
+
+
+static void
+do_buffer(unsigned long addr)
+{
+ struct buffer_head bh;
+
+ if (kdb_getarea(bh, addr))
+ return;
+
+ kdb_printf("\tbh 0x%lx bno %8llu [%s]\n", addr,
+ (unsigned long long)bh.b_blocknr,
+ map_flags(bh.b_state, bh_state_vals));
+}
+
+static void
+kdbm_show_page(struct page *page, int first)
+{
+ if (first)
+ kdb_printf("page_struct index cnt zone nid flags\n");
+ kdb_printf("%p%s %6lu %5d %3d %3d 0x%lx",
+ page_address(page), sizeof(void *) == 4 ? " " : "",
+ page->index, atomic_read(&(page->_count)),
+ page_zonenum(page), page_to_nid(page),
+ page->flags & (~0UL >> ZONES_SHIFT));
+#define kdb_page_flags(page, type) if (Page ## type(page)) kdb_printf(" " #type);
+ kdb_page_flags(page, Locked);
+ kdb_page_flags(page, Error);
+ kdb_page_flags(page, Referenced);
+ kdb_page_flags(page, Uptodate);
+ kdb_page_flags(page, Dirty);
+ kdb_page_flags(page, LRU);
+ kdb_page_flags(page, Active);
+ kdb_page_flags(page, Slab);
+ kdb_page_flags(page, Checked);
+ if (page->flags & (1UL << PG_arch_1))
+ kdb_printf(" arch_1");
+ kdb_page_flags(page, Reserved);
+ kdb_page_flags(page, Private);
+ kdb_page_flags(page, Writeback);
+ kdb_page_flags(page, Compound);
+ kdb_page_flags(page, SwapCache);
+ kdb_page_flags(page, MappedToDisk);
+ kdb_page_flags(page, Reclaim);
+ kdb_page_flags(page, Buddy);
+
+ /* PageHighMem is not a flag any more, but treat it as one */
+ kdb_page_flags(page, HighMem);
+
+ if (page_has_buffers(page)) {
+ struct buffer_head *head, *bh;
+ kdb_printf("\n");
+ head = bh = page_buffers(page);
+ do {
+ do_buffer((unsigned long) bh);
+ } while ((bh = bh->b_this_page) != head);
+ } else if (page_private(page)) {
+ kdb_printf(" private= 0x%lx", page_private(page));
+ }
+ /* Cannot use page_mapping(page) here, it needs swapper_space which is
+ * not exported.
+ */
+ if (page->mapping)
+ kdb_printf(" mapping= %p", page->mapping);
+ kdb_printf("\n");
+#undef kdb_page_flags
+}
+
+static int
+kdbm_inode_pages(int argc, const char **argv)
+{
+ struct inode *inode = NULL;
+ struct address_space *ap = NULL;
+ unsigned long addr, addr1 = 0;
+ long offset = 0;
+ int nextarg;
+ int diag;
+ pgoff_t next = 0;
+ struct page *page;
+ int first;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ goto out;
+
+ if (argc == 2) {
+ nextarg = 2;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr1,
+ &offset, NULL);
+ if (diag)
+ goto out;
+ kdb_printf("Looking for page index 0x%lx ... \n", addr1);
+ next = addr1;
+ }
+
+ if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode_pages: cannot kmalloc inode\n");
+ goto out;
+ }
+ if (!(ap = kmalloc(sizeof(*ap), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode_pages: cannot kmalloc ap\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*inode, addr)))
+ goto out;
+ if (!inode->i_mapping) {
+ kdb_printf("inode has no mapping\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*ap, (unsigned long) inode->i_mapping)))
+ goto out;
+
+ /* Run the pages in the radix tree, printing the state of each page */
+ first = 1;
+ while (radix_tree_gang_lookup(&ap->page_tree, (void **)&page, next, 1)) {
+ kdbm_show_page(page, first);
+ if (addr1)
+ break;
+ first = 0;
+ next = page->index + 1;
+ }
+
+out:
+ if (inode)
+ kfree(inode);
+ if (ap)
+ kfree(ap);
+ return diag;
+}
+
+static int
+kdbm_inode(int argc, const char **argv)
+{
+ struct inode *inode = NULL;
+ unsigned long addr;
+ unsigned char *iaddr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
+ kdb_printf("kdbm_inode: cannot kmalloc inode\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*inode, addr)))
+ goto out;
+
+ kdb_printf("struct inode at 0x%lx\n", addr);
+
+ kdb_printf(" i_ino = %lu i_count = %u i_size %Ld\n",
+ inode->i_ino, atomic_read(&inode->i_count),
+ inode->i_size);
+
+ kdb_printf(" i_mode = 0%o i_nlink = %d i_rdev = 0x%x\n",
+ inode->i_mode, inode->i_nlink,
+ inode->i_rdev);
+
+ kdb_printf(" i_hash.nxt = 0x%p i_hash.pprev = 0x%p\n",
+ inode->i_hash.next,
+ inode->i_hash.pprev);
+
+ kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
+ list_entry(inode->i_list.next, struct inode, i_list),
+ list_entry(inode->i_list.prev, struct inode, i_list));
+
+ kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
+ list_entry(inode->i_dentry.next, struct dentry, d_alias),
+ list_entry(inode->i_dentry.prev, struct dentry, d_alias));
+
+ kdb_printf(" i_sb = 0x%p i_op = 0x%p i_data = 0x%lx nrpages = %lu\n",
+ inode->i_sb, inode->i_op,
+ addr + offsetof(struct inode, i_data),
+ inode->i_data.nrpages);
+ kdb_printf(" i_fop= 0x%p i_flock = 0x%p i_mapping = 0x%p\n",
+ inode->i_fop, inode->i_flock, inode->i_mapping);
+
+ kdb_printf(" i_flags 0x%x i_state 0x%lx [%s]",
+ inode->i_flags, inode->i_state,
+ map_flags(inode->i_state, inode_flag_vals));
+
+ iaddr = (char *)addr;
+ iaddr += offsetof(struct inode, i_private);
+
+ kdb_printf(" fs specific info @ 0x%p\n", iaddr);
+out:
+ if (inode)
+ kfree(inode);
+ return diag;
+}
+
+static int
+kdbm_sb(int argc, const char **argv)
+{
+ struct super_block *sb = NULL;
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sb = kmalloc(sizeof(*sb), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sb: cannot kmalloc sb\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sb, addr)))
+ goto out;
+
+ kdb_printf("struct super_block at 0x%lx\n", addr);
+ kdb_printf(" s_dev 0x%x blocksize 0x%lx\n", sb->s_dev, sb->s_blocksize);
+ kdb_printf(" s_flags 0x%lx s_root 0x%p\n", sb->s_flags, sb->s_root);
+ kdb_printf(" s_frozen %d s_id [%s]\n", sb->s_frozen, sb->s_id);
+out:
+ if (sb)
+ kfree(sb);
+ return diag;
+}
+
+
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+/* According to Steve Lord, this code is ix86 specific. Patches to extend it to
+ * other architectures will be greatefully accepted.
+ */
+static int
+kdbm_memmap(int argc, const char **argv)
+{
+ struct page page;
+ int i, page_count;
+ int slab_count = 0;
+ int dirty_count = 0;
+ int locked_count = 0;
+ int page_counts[10]; /* [8] = large counts, [9] = -1 counts */
+ int buffered_count = 0;
+#ifdef buffer_delay
+ int delay_count = 0;
+#endif
+ int diag;
+ unsigned long addr;
+#ifdef CONFIG_DISCONTIGMEM
+ int node_id = -1, found_node = 0;
+ int tot_page_count = 0;
+ unsigned long unode_id;
+ pg_data_t *pgdat;
+
+ if (argc == 1) { /* node_id was specified */
+ diag = kdbgetularg(argv[argc], &unode_id);
+ if (diag)
+ return diag;
+ node_id = (int)unode_id;
+ }
+ else if (argc)
+ return KDB_ARGCOUNT;
+
+ tot_page_count = 0;
+ memset(page_counts, 0, sizeof(page_counts));
+
+ for_each_online_pgdat(pgdat) {
+ if ((node_id != -1) && (pgdat->node_id != node_id))
+ continue;
+ found_node = 1;
+ addr = (unsigned long)pgdat->node_mem_map;
+ page_count = pgdat->node_spanned_pages;
+ tot_page_count += page_count;
+#else
+ addr = (unsigned long)mem_map;
+ page_count = max_mapnr;
+ memset(page_counts, 0, sizeof(page_counts));
+#endif
+ for (i = 0; i < page_count; i++) {
+ if ((diag = kdb_getarea(page, addr)))
+ return(diag);
+ addr += sizeof(page);
+
+ if (PageSlab(&page))
+ slab_count++;
+ if (PageDirty(&page))
+ dirty_count++;
+ if (PageLocked(&page))
+ locked_count++;
+ if (page._count.counter == -1)
+ page_counts[9]++;
+ else if (page._count.counter < 8)
+ page_counts[page._count.counter]++;
+ else
+ page_counts[8]++;
+ if (page_has_buffers(&page)) {
+ buffered_count++;
+#ifdef buffer_delay
+ if (buffer_delay(page.buffers))
+ delay_count++;
+#endif
+ }
+ }
+#ifdef CONFIG_DISCONTIGMEM
+ }
+ page_count = tot_page_count;
+ if (node_id != -1) {
+ if (!found_node) {
+ kdb_printf("Node %d does not exist.\n", node_id);
+ return 0;
+ }
+ kdb_printf("Node %d pages:\n", node_id);
+ }
+#endif
+ kdb_printf(" Total pages: %6d\n", page_count);
+ kdb_printf(" Slab pages: %6d\n", slab_count);
+ kdb_printf(" Dirty pages: %6d\n", dirty_count);
+ kdb_printf(" Locked pages: %6d\n", locked_count);
+ kdb_printf(" Buffer pages: %6d\n", buffered_count);
+#ifdef buffer_delay
+ kdb_printf(" Delalloc pages: %6d\n", delay_count);
+#endif
+ kdb_printf(" -1 page count: %6d\n", page_counts[9]);
+ for (i = 0; i < 8; i++) {
+ kdb_printf(" %d page count: %6d\n",
+ i, page_counts[i]);
+ }
+ kdb_printf(" high page count: %6d\n", page_counts[8]);
+ return 0;
+}
+#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
+
+static int __init kdbm_pg_init(void)
+{
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_register("page", kdbm_page, "<vaddr>", "Display page", 0);
+#endif
+ kdb_register("inode", kdbm_inode, "<vaddr>", "Display inode", 0);
+ kdb_register("sb", kdbm_sb, "<vaddr>", "Display super_block", 0);
+ kdb_register("bh", kdbm_buffers, "<buffer head address>", "Display buffer", 0);
+ kdb_register("bio", kdbm_bio, "<bio address>", "Display bio", 0);
+ kdb_register("inode_pages", kdbm_inode_pages, "<inode *>", "Display pages in an inode", 0);
+ kdb_register("req", kdbm_request, "<vaddr>", "dump request struct", 0);
+ kdb_register("rqueue", kdbm_rqueue, "<vaddr>", "dump request queue", 0);
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_register("memmap", kdbm_memmap, "", "page table summary", 0);
+#endif
+
+ return 0;
+}
+
+
+static void __exit kdbm_pg_exit(void)
+{
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_unregister("page");
+#endif
+ kdb_unregister("inode");
+ kdb_unregister("sb");
+ kdb_unregister("bh");
+ kdb_unregister("bio");
+ kdb_unregister("inode_pages");
+ kdb_unregister("req");
+ kdb_unregister("rqueue");
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
+ kdb_unregister("memmap");
+#endif
+}
+
+module_init(kdbm_pg_init)
+module_exit(kdbm_pg_exit)
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
++#include <linux/slab.h>
+#include <asm/signal.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug struct task and sigset information");
+MODULE_LICENSE("GPL");
+
+static char *
+kdb_cpus_allowed_string(struct task_struct *tp)
+{
+ static char maskbuf[NR_CPUS * 8];
+ if (cpus_equal(tp->cpus_allowed, cpu_online_map))
+ strcpy(maskbuf, "ALL");
+ else if (cpus_empty(tp->cpus_allowed))
+ strcpy(maskbuf, "NONE");
+ else if (cpus_weight(tp->cpus_allowed) == 1)
+ snprintf(maskbuf, sizeof(maskbuf), "ONLY(%d)", first_cpu(tp->cpus_allowed));
+ else
+ cpulist_scnprintf(maskbuf, sizeof(maskbuf), &tp->cpus_allowed);
+ return maskbuf;
+}
+
+static int
+kdbm_task(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset=0;
+ int nextarg;
+ int e = 0;
+ struct task_struct *tp = NULL, *tp1;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ return(e);
+
+ if (!(tp = kmalloc(sizeof(*tp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc tp\n", __FUNCTION__);
+ goto out;
+ }
+ if ((e = kdb_getarea(*tp, addr))) {
+ kdb_printf("%s: invalid task address\n", __FUNCTION__);
+ goto out;
+ }
+
+ tp1 = (struct task_struct *)addr;
+ kdb_printf(
+ "struct task at 0x%lx, pid=%d flags=0x%x state=%ld comm=\"%s\"\n",
+ addr, tp->pid, tp->flags, tp->state, tp->comm);
+
+ kdb_printf(" cpu=%d policy=%u ", kdb_process_cpu(tp), tp->policy);
+ kdb_printf(
+ "prio=%d static_prio=%d cpus_allowed=",
+ tp->prio, tp->static_prio);
+ {
+ /* The cpus allowed string may be longer than kdb_printf() can
+ * handle. Print it in chunks.
+ */
+ char c, *p;
+ p = kdb_cpus_allowed_string(tp);
+ while (1) {
+ if (strlen(p) < 100) {
+ kdb_printf("%s", p);
+ break;
+ }
+ c = p[100];
+ p[100] = '\0';
+ kdb_printf("%s", p);
+ p[100] = c;
+ p += 100;
+ }
+ }
+ kdb_printf(" &thread=0x%p\n", &tp1->thread);
+
+ kdb_printf(" need_resched=%d ",
+ test_tsk_thread_flag(tp, TIF_NEED_RESCHED));
+ kdb_printf(
+ "time_slice=%u",
+ tp->rt.time_slice);
+ kdb_printf(" lock_depth=%d\n", tp->lock_depth);
+
+ kdb_printf(
+ " fs=0x%p files=0x%p mm=0x%p\n",
+ tp->fs, tp->files, tp->mm);
+
+ if (tp->sysvsem.undo_list)
+ kdb_printf(
+ " sysvsem.sem_undo refcnt %d list_proc=0x%p\n",
+ atomic_read(&tp->sysvsem.undo_list->refcnt),
+ &tp->sysvsem.undo_list->list_proc);
+
+ kdb_printf(
+ " signal=0x%p &blocked=0x%p &pending=0x%p\n",
+ tp->signal, &tp1->blocked, &tp1->pending);
+
+ kdb_printf(
+ " utime=%ld stime=%ld cutime=%ld cstime=%ld\n",
+ tp->utime, tp->stime,
+ tp->signal ? tp->signal->cutime : 0L,
+ tp->signal ? tp->signal->cstime : 0L);
+
+ kdb_printf(" thread_info=0x%p\n", task_thread_info(tp));
+ kdb_printf(" ti flags=0x%lx\n", (unsigned long)task_thread_info(tp)->flags);
+
+#ifdef CONFIG_NUMA
+ kdb_printf(
+ " mempolicy=0x%p il_next=%d\n",
+ tp->mempolicy, tp->il_next);
+#endif
+
+out:
+ if (tp)
+ kfree(tp);
+ return e;
+}
+
+static int
+kdbm_sigset(int argc, const char **argv)
+{
+ sigset_t *sp = NULL;
+ unsigned long addr;
+ long offset=0;
+ int nextarg;
+ int e = 0;
+ int i;
+ char fmt[32];
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+#ifndef _NSIG_WORDS
+ kdb_printf("unavailable on this platform, _NSIG_WORDS not defined.\n");
+#else
+ nextarg = 1;
+ if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ return(e);
+
+ if (!(sp = kmalloc(sizeof(*sp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc sp\n", __FUNCTION__);
+ goto out;
+ }
+ if ((e = kdb_getarea(*sp, addr))) {
+ kdb_printf("%s: invalid sigset address\n", __FUNCTION__);
+ goto out;
+ }
+
+ sprintf(fmt, "[%%d]=0x%%0%dlx ", (int)sizeof(sp->sig[0])*2);
+ kdb_printf("sigset at 0x%p : ", sp);
+ for (i=_NSIG_WORDS-1; i >= 0; i--) {
+ if (i == 0 || sp->sig[i]) {
+ kdb_printf(fmt, i, sp->sig[i]);
+ }
+ }
+ kdb_printf("\n");
+#endif /* _NSIG_WORDS */
+
+out:
+ if (sp)
+ kfree(sp);
+ return e;
+}
+
+static int __init kdbm_task_init(void)
+{
+ kdb_register("task", kdbm_task, "<vaddr>", "Display task_struct", 0);
+ kdb_register("sigset", kdbm_sigset, "<vaddr>", "Display sigset_t", 0);
+
+ return 0;
+}
+
+static void __exit kdbm_task_exit(void)
+{
+ kdb_unregister("task");
+ kdb_unregister("sigset");
+}
+
+module_init(kdbm_task_init)
+module_exit(kdbm_task_exit)
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
++#include <linux/slab.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <asm/pgtable.h>
+
+MODULE_AUTHOR("SGI");
+MODULE_DESCRIPTION("Debug VM information");
+MODULE_LICENSE("GPL");
+
+struct __vmflags {
+ unsigned long mask;
+ char *name;
+};
+
+static struct __vmflags vmflags[] = {
+ { VM_READ, "VM_READ " },
+ { VM_WRITE, "VM_WRITE " },
+ { VM_EXEC, "VM_EXEC " },
+ { VM_SHARED, "VM_SHARED " },
+ { VM_MAYREAD, "VM_MAYREAD " },
+ { VM_MAYWRITE, "VM_MAYWRITE " },
+ { VM_MAYEXEC, "VM_MAYEXEC " },
+ { VM_MAYSHARE, "VM_MAYSHARE " },
+ { VM_GROWSDOWN, "VM_GROWSDOWN " },
+ { VM_GROWSUP, "VM_GROWSUP " },
+ { VM_PFNMAP, "VM_PFNMAP " },
+ { VM_DENYWRITE, "VM_DENYWRITE " },
+ { VM_EXECUTABLE, "VM_EXECUTABLE " },
+ { VM_LOCKED, "VM_LOCKED " },
+ { VM_IO, "VM_IO " },
+ { VM_SEQ_READ, "VM_SEQ_READ " },
+ { VM_RAND_READ, "VM_RAND_READ " },
+ { VM_DONTCOPY, "VM_DONTCOPY " },
+ { VM_DONTEXPAND, "VM_DONTEXPAND " },
+ { VM_RESERVED, "VM_RESERVED " },
+ { VM_ACCOUNT, "VM_ACCOUNT " },
+ { VM_HUGETLB, "VM_HUGETLB " },
+ { VM_NONLINEAR, "VM_NONLINEAR " },
+ { VM_MAPPED_COPY, "VM_MAPPED_COPY " },
+ { VM_INSERTPAGE, "VM_INSERTPAGE " },
+ { 0, "" }
+};
+
+static int
+kdbm_print_vm(struct vm_area_struct *vp, unsigned long addr, int verbose_flg)
+{
+ struct __vmflags *tp;
+
+ kdb_printf("struct vm_area_struct at 0x%lx for %d bytes\n",
+ addr, (int) sizeof (struct vm_area_struct));
+
+ kdb_printf("vm_start = 0x%p vm_end = 0x%p\n", (void *) vp->vm_start,
+ (void *) vp->vm_end);
+ kdb_printf("vm_page_prot = 0x%llx\n",
+ (unsigned long long)pgprot_val(vp->vm_page_prot));
+
+ kdb_printf("vm_flags: ");
+ for (tp = vmflags; tp->mask; tp++) {
+ if (vp->vm_flags & tp->mask) {
+ kdb_printf(" %s", tp->name);
+ }
+ }
+ kdb_printf("\n");
+
+ if (!verbose_flg)
+ return 0;
+
+ kdb_printf("vm_mm = 0x%p\n", (void *) vp->vm_mm);
+ kdb_printf("vm_next = 0x%p\n", (void *) vp->vm_next);
+ kdb_printf("shared.vm_set.list.next = 0x%p\n", (void *) vp->shared.vm_set.list.next);
+ kdb_printf("shared.vm_set.list.prev = 0x%p\n", (void *) vp->shared.vm_set.list.prev);
+ kdb_printf("shared.vm_set.parent = 0x%p\n", (void *) vp->shared.vm_set.parent);
+ kdb_printf("shared.vm_set.head = 0x%p\n", (void *) vp->shared.vm_set.head);
+ kdb_printf("anon_vma_chain.next = 0x%p\n", (void *) vp->anon_vma_chain.next);
+ kdb_printf("anon_vma_chain.prev = 0x%p\n", (void *) vp->anon_vma_chain.prev);
+ kdb_printf("vm_ops = 0x%p\n", (void *) vp->vm_ops);
+ if (vp->vm_ops != NULL) {
+ kdb_printf("vm_ops->open = 0x%p\n", vp->vm_ops->open);
+ kdb_printf("vm_ops->close = 0x%p\n", vp->vm_ops->close);
+ kdb_printf("vm_ops->fault = 0x%p\n", vp->vm_ops->fault);
+#ifdef HAVE_VMOP_MPROTECT
+ kdb_printf("vm_ops->mprotect = 0x%p\n", vp->vm_ops->mprotect);
+#endif
+#ifdef CONFIG_NUMA
+ kdb_printf("vm_ops->set_policy = 0x%p\n", vp->vm_ops->set_policy);
+ kdb_printf("vm_ops->get_policy = 0x%p\n", vp->vm_ops->get_policy);
+#endif
+ }
+ kdb_printf("vm_pgoff = 0x%lx\n", vp->vm_pgoff);
+ kdb_printf("vm_file = 0x%p\n", (void *) vp->vm_file);
+ kdb_printf("vm_private_data = 0x%p\n", vp->vm_private_data);
+#ifdef CONFIG_NUMA
+ kdb_printf("vm_policy = 0x%p\n", vp->vm_policy);
+#endif
+
+ return 0;
+}
+
+static int
+kdbm_print_vmp(struct vm_area_struct *vp, int verbose_flg)
+{
+ struct __vmflags *tp;
+
+ if (verbose_flg) {
+ kdb_printf("0x%lx: ", (unsigned long) vp);
+ }
+
+ kdb_printf("0x%p 0x%p ", (void *) vp->vm_start, (void *) vp->vm_end);
+
+ for (tp = vmflags; tp->mask; tp++) {
+ if (vp->vm_flags & tp->mask) {
+ kdb_printf(" %s", tp->name);
+ }
+ }
+ kdb_printf("\n");
+
+ return 0;
+}
+
+
+#ifdef CONFIG_NUMA
+#include <linux/mempolicy.h>
+
+/*
+ * kdbm_mpol
+ *
+ * This function implements the 'mempolicy' command.
+ * Print a struct mempolicy.
+ *
+ * mempolicy <address> Print struct mempolicy at <address>
+ */
+static int
+kdbm_mpol(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int err = 0;
+ struct mempolicy *mp = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((err = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL)) != 0)
+ return(err);
+
+ if (!(mp = kmalloc(sizeof(*mp), GFP_ATOMIC))) {
+ kdb_printf("%s: cannot kmalloc mp\n", __FUNCTION__);
+ goto out;
+ }
+
+ if ((err = kdb_getarea(*mp, addr))) {
+ kdb_printf("%s: invalid mempolicy address\n", __FUNCTION__);
+ goto out;
+ }
+
+ kdb_printf("struct mempolicy at 0x%p\n", (struct mempolicy *)addr);
+ kdb_printf(" refcnt %d\n", atomic_read(&mp->refcnt));
+
+ switch (mp->mode) {
+ case MPOL_DEFAULT:
+ kdb_printf(" mode %d (MPOL_DEFAULT)\n", mp->mode);
+ break;
+
+ case MPOL_PREFERRED:
+ kdb_printf(" mode %d (MPOL_PREFERRED)\n", mp->mode);
+ if (mp->flags & MPOL_F_LOCAL)
+ kdb_printf(" preferred_node local\n");
+ else
+ kdb_printf(" preferred_node %d\n", mp->v.preferred_node);
+ break;
+
+ case MPOL_BIND:
+ case MPOL_INTERLEAVE:
+ {
+ int i, nlongs;
+ unsigned long *longp;
+
+ kdb_printf(" mode %d (%s)\n", mp->mode,
+ mp->mode == MPOL_INTERLEAVE
+ ? "MPOL_INTERLEAVE"
+ : "MPOL_BIND");
+ nlongs = (int)BITS_TO_LONGS(MAX_NUMNODES);
+ kdb_printf(" nodes:");
+ longp = mp->v.nodes.bits;
+ for (i = 0; i < nlongs; i++, longp++)
+ kdb_printf(" 0x%lx ", *longp);
+ kdb_printf("\n");
+ break;
+ }
+
+ default:
+ kdb_printf(" mode %d (unknown)\n", mp->mode);
+ break;
+ }
+out:
+ if (mp)
+ kfree(mp);
+ return err;
+}
+
+#endif /* CONFIG_NUMA */
+
+/*
+ * kdbm_pgdat
+ *
+ * This function implements the 'pgdat' command.
+ * Print a struct pglist_data (pg_dat_t).
+ *
+ * pgdat <node_id> Print struct pglist_data for node <node_id>.
+ *
+ * Print pglist_data for node 0 if node_id not specified,
+ * or print the one pglist_data structure if !CONFIG_NUMA.
+ */
+static int
+kdbm_pgdat(int argc, const char **argv)
+{
+ int err = 0, node_id = 0, i;
+ pg_data_t *pgdatp = NULL;
+
+#ifdef CONFIG_NUMA
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+ if (argc == 1) {
+ int nextarg;
+ long offset = 0;
+ unsigned long node_id_ul;
+
+ nextarg = 1;
+ if ((err = kdbgetaddrarg(argc, argv, &nextarg, &node_id_ul,
+ &offset, NULL)) != 0) {
+ return(err);
+ }
+ node_id = (int)node_id_ul;
+ }
+#endif
+ for_each_online_pgdat(pgdatp) {
+ if (pgdatp->node_id == node_id)
+ break;
+ }
+ if (!pgdatp) {
+ kdb_printf("%s: specified node not found\n", __FUNCTION__);
+ return 0;
+ }
+ kdb_printf("struct pglist_data at 0x%p node_id = %d\n",
+ pgdatp, pgdatp->node_id);
+
+ for (i = 0; i < MAX_ZONELISTS; i++) {
+ int zr;
+ struct zoneref *zonerefp;
+ struct zone *zonep;
+
+ zonerefp = pgdatp->node_zonelists[i]._zonerefs;
+ kdb_printf(" _zonerefs[%d] at 0x%p\n", i, zonerefp);
+
+ for (zr = 0; zr <= MAX_ZONES_PER_ZONELIST; zr++, zonerefp++) {
+ int z;
+ pg_data_t *tmp_pgdatp;
+
+ zonep = zonelist_zone(zonerefp);
+ if (!zonep)
+ break;
+
+ kdb_printf(" 0x%p", zonep);
+
+ for_each_online_pgdat(tmp_pgdatp) {
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ if (zonep == &tmp_pgdatp->node_zones[z]) {
+ kdb_printf (" (node %d node_zones[%d])",
+ tmp_pgdatp->node_id, z);
+ break;
+ }
+ }
+ if (z != MAX_NR_ZONES)
+ break; /* found it */
+ }
+ kdb_printf("\n");
+ }
+ }
+
+ kdb_printf(" nr_zones = %d", pgdatp->nr_zones);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ kdb_printf(" node_mem_map = 0x%p\n", pgdatp->node_mem_map);
+#endif
+#ifndef CONFIG_NO_BOOTMEM
+ kdb_printf(" bdata = 0x%p", pgdatp->bdata);
+#endif
+ kdb_printf(" node_start_pfn = 0x%lx\n", pgdatp->node_start_pfn);
+ kdb_printf(" node_present_pages = %ld (0x%lx)\n",
+ pgdatp->node_present_pages, pgdatp->node_present_pages);
+ kdb_printf(" node_spanned_pages = %ld (0x%lx)\n",
+ pgdatp->node_spanned_pages, pgdatp->node_spanned_pages);
+ kdb_printf(" kswapd = 0x%p\n", pgdatp->kswapd);
+
+ return err;
+}
+
+/*
+ * kdbm_vm
+ *
+ * This function implements the 'vm' command. Print a vm_area_struct.
+ *
+ * vm [-v] <address> Print vm_area_struct at <address>
+ * vmp [-v] <pid> Print all vm_area_structs for <pid>
+ */
+
+static int
+kdbm_vm(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ int diag;
+ int verbose_flg = 0;
+
+ if (argc == 2) {
+ if (strcmp(argv[1], "-v") != 0) {
+ return KDB_ARGCOUNT;
+ }
+ verbose_flg = 1;
+ } else if (argc != 1) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[0], "vmp") == 0) {
+ struct task_struct *g, *tp;
+ struct vm_area_struct *vp;
+ pid_t pid;
+
+ if ((diag = kdbgetularg(argv[argc], (unsigned long *) &pid)))
+ return diag;
+
+ kdb_do_each_thread(g, tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ if (verbose_flg)
+ kdb_printf
+ ("vm_area_struct ");
+ kdb_printf
+ ("vm_start vm_end vm_flags\n");
+ vp = tp->mm->mmap;
+ while (vp != NULL) {
+ kdbm_print_vmp(vp, verbose_flg);
+ vp = vp->vm_next;
+ }
+ }
+ return 0;
+ }
+ } kdb_while_each_thread(g, tp);
+
+ kdb_printf("No process with pid == %d found\n", pid);
+
+ } else {
+ struct vm_area_struct v;
+
+ nextarg = argc;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(v, addr)))
+ return (diag);
+
+ kdbm_print_vm(&v, addr, verbose_flg);
+ }
+
+ return 0;
+}
+
+static int
+kdbm_print_pte(pte_t * pte)
+{
+ kdb_printf("0x%lx (", (unsigned long) pte_val(*pte));
+
+ if (pte_present(*pte)) {
+#ifdef pte_exec
+ if (pte_exec(*pte))
+ kdb_printf("X");
+#endif
+ if (pte_write(*pte))
+ kdb_printf("W");
+#ifdef pte_read
+ if (pte_read(*pte))
+ kdb_printf("R");
+#endif
+ if (pte_young(*pte))
+ kdb_printf("A");
+ if (pte_dirty(*pte))
+ kdb_printf("D");
+
+ } else {
+ kdb_printf("OFFSET=0x%lx ", swp_offset(pte_to_swp_entry(*pte)));
+ kdb_printf("TYPE=0x%ulx", swp_type(pte_to_swp_entry(*pte)));
+ }
+
+ kdb_printf(")");
+
+ /* final newline is output by caller of kdbm_print_pte() */
+
+ return 0;
+}
+
+/*
+ * kdbm_pte
+ *
+ * This function implements the 'pte' command. Print all pte_t structures
+ * that map to the given virtual address range (<address> through <address>
+ * plus <nbytes>) for the given process. The default value for nbytes is
+ * one.
+ *
+ * pte -m <mm> <address> [<nbytes>] Print all pte_t structures for
+ * virtual <address> in address space
+ * of <mm> which is a pointer to a
+ * mm_struct
+ * pte -p <pid> <address> [<nbytes>] Print all pte_t structures for
+ * virtual <address> in address space
+ * of <pid>
+ */
+
+static int
+kdbm_pte(int argc, const char **argv)
+{
+ unsigned long addr;
+ long offset = 0;
+ int nextarg;
+ unsigned long nbytes = 1;
+ long npgs;
+ int diag;
+ int found;
+ pid_t pid;
+ struct task_struct *tp;
+ struct mm_struct *mm, copy_of_mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (argc < 3 || argc > 4) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[1], "-p") == 0) {
+ if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
+ return diag;
+ }
+
+ found = 0;
+ for_each_process(tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ found = 1;
+ break;
+ }
+ kdb_printf("task structure's mm field is NULL\n");
+ return 0;
+ }
+ }
+
+ if (!found) {
+ kdb_printf("No process with pid == %d found\n", pid);
+ return 0;
+ }
+ mm = tp->mm;
+ } else if (strcmp(argv[1], "-m") == 0) {
+
+
+ nextarg = 2;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(copy_of_mm, addr)))
+ return (diag);
+ mm = ©_of_mm;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdbgetularg(argv[3], &addr))) {
+ return diag;
+ }
+
+ if (argc == 4) {
+ if ((diag = kdbgetularg(argv[4], &nbytes))) {
+ return diag;
+ }
+ }
+
+ kdb_printf("vaddr pte\n");
+
+ npgs = ((((addr & ~PAGE_MASK) + nbytes) + ~PAGE_MASK) >> PAGE_SHIFT);
+ while (npgs-- > 0) {
+
+ kdb_printf("0x%p ", (void *) (addr & PAGE_MASK));
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (pmd_present(*pmd)) {
+ pte = pte_offset_map(pmd, addr);
+ if (pte_present(*pte)) {
+ kdbm_print_pte(pte);
+ }
+ }
+ }
+ }
+
+ kdb_printf("\n");
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+/*
+ * kdbm_rpte
+ *
+ * This function implements the 'rpte' command. Print all pte_t structures
+ * that contain the given physical page range (<pfn> through <pfn>
+ * plus <npages>) for the given process. The default value for npages is
+ * one.
+ *
+ * rpte -m <mm> <pfn> [<npages>] Print all pte_t structures for
+ * physical page <pfn> in address space
+ * of <mm> which is a pointer to a
+ * mm_struct
+ * rpte -p <pid> <pfn> [<npages>] Print all pte_t structures for
+ * physical page <pfn> in address space
+ * of <pid>
+ */
+
+static int
+kdbm_rpte(int argc, const char **argv)
+{
+ unsigned long addr;
+ unsigned long pfn;
+ long offset = 0;
+ int nextarg;
+ unsigned long npages = 1;
+ int diag;
+ int found;
+ pid_t pid;
+ struct task_struct *tp;
+ struct mm_struct *mm, copy_of_mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long g, u, m, t;
+
+ if (argc < 3 || argc > 4) {
+ return KDB_ARGCOUNT;
+ }
+
+ if (strcmp(argv[1], "-p") == 0) {
+ if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
+ return diag;
+ }
+
+ found = 0;
+ for_each_process(tp) {
+ if (tp->pid == pid) {
+ if (tp->mm != NULL) {
+ found = 1;
+ break;
+ }
+ kdb_printf("task structure's mm field is NULL\n");
+ return 0;
+ }
+ }
+
+ if (!found) {
+ kdb_printf("No process with pid == %d found\n", pid);
+ return 0;
+ }
+ mm = tp->mm;
+ } else if (strcmp(argv[1], "-m") == 0) {
+
+
+ nextarg = 2;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
+ NULL))
+ || (diag = kdb_getarea(copy_of_mm, addr)))
+ return (diag);
+ mm = ©_of_mm;
+ } else {
+ return KDB_ARGCOUNT;
+ }
+
+ if ((diag = kdbgetularg(argv[3], &pfn))) {
+ return diag;
+ }
+
+ if (argc == 4) {
+ if ((diag = kdbgetularg(argv[4], &npages))) {
+ return diag;
+ }
+ }
+
+ /* spaces after vaddr depends on sizeof(unsigned long) */
+ kdb_printf("pfn vaddr%*s pte\n",
+ (int)(2*sizeof(unsigned long) + 2 - 5), " ");
+
+ for (g = 0, pgd = pgd_offset(mm, 0UL); g < PTRS_PER_PGD; ++g, ++pgd) {
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ continue;
+ for (u = 0, pud = pud_offset(pgd, 0UL); u < PTRS_PER_PUD; ++u, ++pud) {
+ if (pud_none(*pud) || pud_bad(*pud))
+ continue;
+ for (m = 0, pmd = pmd_offset(pud, 0UL); m < PTRS_PER_PMD; ++m, ++pmd) {
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ continue;
+ for (t = 0, pte = pte_offset_map(pmd, 0UL); t < PTRS_PER_PTE; ++t, ++pte) {
+ if (pte_none(*pte))
+ continue;
+ if (pte_pfn(*pte) < pfn || pte_pfn(*pte) >= (pfn + npages))
+ continue;
+ addr = g << PGDIR_SHIFT;
+#ifdef __ia64__
+ /* IA64 plays tricks with the pgd mapping to save space.
+ * This reverses pgd_index().
+ */
+ {
+ unsigned long region = g >> (PAGE_SHIFT - 6);
+ unsigned long l1index = g - (region << (PAGE_SHIFT - 6));
+ addr = (region << 61) + (l1index << PGDIR_SHIFT);
+ }
+#endif
+ addr += (m << PMD_SHIFT) + (t << PAGE_SHIFT);
+ kdb_printf("0x%-14lx " kdb_bfd_vma_fmt0 " ",
+ pte_pfn(*pte), addr);
+ kdbm_print_pte(pte);
+ kdb_printf("\n");
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+kdbm_print_dentry(unsigned long daddr)
+{
+ struct dentry d;
+ int diag;
+ char buf[256];
+
+ kdb_printf("Dentry at 0x%lx\n", daddr);
+ if ((diag = kdb_getarea(d, (unsigned long)daddr)))
+ return diag;
+
+ if ((d.d_name.len > sizeof(buf)) || (diag = kdb_getarea_size(buf, (unsigned long)(d.d_name.name), d.d_name.len)))
+ kdb_printf(" d_name.len = %d d_name.name = 0x%p\n",
+ d.d_name.len, d.d_name.name);
+ else
+ kdb_printf(" d_name.len = %d d_name.name = 0x%p <%.*s>\n",
+ d.d_name.len, d.d_name.name,
+ (int)(d.d_name.len), d.d_name.name);
+
+ kdb_printf(" d_count = %d d_flags = 0x%x d_inode = 0x%p\n",
+ atomic_read(&d.d_count), d.d_flags, d.d_inode);
+
+ kdb_printf(" d_parent = 0x%p\n", d.d_parent);
+
+ kdb_printf(" d_hash.nxt = 0x%p d_hash.prv = 0x%p\n",
+ d.d_hash.next, d.d_hash.pprev);
+
+ kdb_printf(" d_lru.nxt = 0x%p d_lru.prv = 0x%p\n",
+ d.d_lru.next, d.d_lru.prev);
+
+ kdb_printf(" d_child.nxt = 0x%p d_child.prv = 0x%p\n",
+ d.d_u.d_child.next, d.d_u.d_child.prev);
+
+ kdb_printf(" d_subdirs.nxt = 0x%p d_subdirs.prv = 0x%p\n",
+ d.d_subdirs.next, d.d_subdirs.prev);
+
+ kdb_printf(" d_alias.nxt = 0x%p d_alias.prv = 0x%p\n",
+ d.d_alias.next, d.d_alias.prev);
+
+ kdb_printf(" d_op = 0x%p d_sb = 0x%p d_fsdata = 0x%p\n",
+ d.d_op, d.d_sb, d.d_fsdata);
+
+ kdb_printf(" d_iname = %s\n",
+ d.d_iname);
+
+ if (d.d_inode) {
+ struct inode i;
+ kdb_printf("\nInode Entry at 0x%p\n", d.d_inode);
+ if ((diag = kdb_getarea(i, (unsigned long)d.d_inode)))
+ return diag;
+ kdb_printf(" i_mode = 0%o i_nlink = %d i_rdev = 0x%x\n",
+ i.i_mode, i.i_nlink, i.i_rdev);
+
+ kdb_printf(" i_ino = %ld i_count = %d\n",
+ i.i_ino, atomic_read(&i.i_count));
+
+ kdb_printf(" i_hash.nxt = 0x%p i_hash.prv = 0x%p\n",
+ i.i_hash.next, i.i_hash.pprev);
+
+ kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
+ i.i_list.next, i.i_list.prev);
+
+ kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
+ i.i_dentry.next, i.i_dentry.prev);
+
+ }
+ kdb_printf("\n");
+ return 0;
+}
+
+static int
+kdbm_filp(int argc, const char **argv)
+{
+ struct file f;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(f, addr)))
+ return diag;
+
+ kdb_printf("File Pointer at 0x%lx\n", addr);
+
+ kdb_printf(" fu_list.nxt = 0x%p fu_list.prv = 0x%p\n",
+ f.f_u.fu_list.next, f.f_u.fu_list.prev);
+
+ kdb_printf(" f_dentry = 0x%p f_vfsmnt = 0x%p f_op = 0x%p\n",
+ f.f_dentry, f.f_vfsmnt, f.f_op);
+
+ kdb_printf(" f_count = %ld f_flags = 0x%x f_mode = 0x%x\n",
+ atomic_long_read(&f.f_count), f.f_flags, f.f_mode);
+
+ kdb_printf(" f_pos = %Ld\n", f.f_pos);
+#ifdef CONFIG_SECURITY
+ kdb_printf(" security = 0x%p\n", f.f_security);
+#endif
+
+ kdb_printf(" private_data = 0x%p f_mapping = 0x%p\n\n",
+ f.private_data, f.f_mapping);
+
+ return kdbm_print_dentry((unsigned long)f.f_dentry);
+}
+
+static int
+kdbm_fl(int argc, const char **argv)
+{
+ struct file_lock fl;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(fl, addr)))
+ return diag;
+
+ kdb_printf("File_lock at 0x%lx\n", addr);
+
+ kdb_printf(" fl_next = 0x%p fl_link.nxt = 0x%p fl_link.prv = 0x%p\n",
+ fl.fl_next, fl.fl_link.next, fl.fl_link.prev);
+ kdb_printf(" fl_block.nxt = 0x%p fl_block.prv = 0x%p\n",
+ fl.fl_block.next, fl.fl_block.prev);
+ kdb_printf(" fl_owner = 0x%p fl_pid = %d fl_wait = 0x%p\n",
+ fl.fl_owner, fl.fl_pid, &fl.fl_wait);
+ kdb_printf(" fl_file = 0x%p fl_flags = 0x%x\n",
+ fl.fl_file, fl.fl_flags);
+ kdb_printf(" fl_type = %d fl_start = 0x%llx fl_end = 0x%llx\n",
+ fl.fl_type, fl.fl_start, fl.fl_end);
+
+ kdb_printf(" file_lock_operations");
+ if (fl.fl_ops)
+ kdb_printf("\n fl_copy_lock = 0x%p fl_release_private = 0x%p\n",
+ fl.fl_ops->fl_copy_lock, fl.fl_ops->fl_release_private);
+ else
+ kdb_printf(" empty\n");
+
+ kdb_printf(" lock_manager_operations");
+ if (fl.fl_lmops)
+ kdb_printf("\n fl_compare_owner = 0x%p fl_notify = 0x%p\n",
+ fl.fl_lmops->fl_compare_owner, fl.fl_lmops->fl_notify);
+ else
+ kdb_printf(" empty\n");
+
+ kdb_printf(" fl_fasync = 0x%p fl_break 0x%lx\n",
+ fl.fl_fasync, fl.fl_break_time);
+
+ return 0;
+}
+
+
+static int
+kdbm_dentry(int argc, const char **argv)
+{
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ return diag;
+
+ return kdbm_print_dentry(addr);
+}
+
+static int
+kdbm_kobject(int argc, const char **argv)
+{
+ struct kobject k;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+ int diag;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(k, addr)))
+ return diag;
+
+
+ kdb_printf("kobject at 0x%lx\n", addr);
+
+ if (k.name) {
+ char c;
+ kdb_printf(" name 0x%p", k.name);
+ if (kdb_getarea(c, (unsigned long)k.name) == 0)
+ kdb_printf(" '%s'", k.name);
+ kdb_printf("\n");
+ }
+
+ if (k.name != kobject_name((struct kobject *)addr))
+ kdb_printf(" name '%.20s'\n", k.name);
+
+ kdb_printf(" kref.refcount %d'\n", atomic_read(&k.kref.refcount));
+
+ kdb_printf(" entry.next = 0x%p entry.prev = 0x%p\n",
+ k.entry.next, k.entry.prev);
+
+ kdb_printf(" parent = 0x%p kset = 0x%p ktype = 0x%p sd = 0x%p\n",
+ k.parent, k.kset, k.ktype, k.sd);
+
+ return 0;
+}
+
+static int
+kdbm_sh(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct Scsi_Host sh;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
+ (diag = kdb_getarea(sh, addr)))
+ return diag;
+
+ kdb_printf("Scsi_Host at 0x%lx\n", addr);
+ kdb_printf("host_queue = 0x%p\n", sh.__devices.next);
+ kdb_printf("ehandler = 0x%p eh_action = 0x%p\n",
+ sh.ehandler, sh.eh_action);
+ kdb_printf("host_wait = 0x%p hostt = 0x%p\n",
+ &sh.host_wait, sh.hostt);
+ kdb_printf("host_failed = %d host_no = %d resetting = %d\n",
+ sh.host_failed, sh.host_no, sh.resetting);
+ kdb_printf("max id/lun/channel = [%d/%d/%d] this_id = %d\n",
+ sh.max_id, sh.max_lun, sh.max_channel, sh.this_id);
+ kdb_printf("can_queue = %d cmd_per_lun = %d sg_tablesize = %d u_isa_dma = %d\n",
+ sh.can_queue, sh.cmd_per_lun, sh.sg_tablesize, sh.unchecked_isa_dma);
+ kdb_printf("host_blocked = %d reverse_ordering = %d \n",
+ sh.host_blocked, sh.reverse_ordering);
+
+ return 0;
+}
+
+static int
+kdbm_sd(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct scsi_device *sd = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sd = kmalloc(sizeof(*sd), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sd: cannot kmalloc sd\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sd, addr)))
+ goto out;
+
+ kdb_printf("scsi_device at 0x%lx\n", addr);
+ kdb_printf("next = 0x%p prev = 0x%p host = 0x%p\n",
+ sd->siblings.next, sd->siblings.prev, sd->host);
+ kdb_printf("device_busy = %d current_cmnd 0x%p\n",
+ sd->device_busy, sd->current_cmnd);
+ kdb_printf("id/lun/chan = [%d/%d/%d] single_lun = %d device_blocked = %d\n",
+ sd->id, sd->lun, sd->channel, sd->sdev_target->single_lun, sd->device_blocked);
+ kdb_printf("queue_depth = %d current_tag = %d scsi_level = %d\n",
+ sd->queue_depth, sd->current_tag, sd->scsi_level);
+ kdb_printf("%8.8s %16.16s %4.4s\n", sd->vendor, sd->model, sd->rev);
+out:
+ if (sd)
+ kfree(sd);
+ return diag;
+}
+
+static int
+kdbm_sc(int argc, const char **argv)
+{
+ int diag;
+ int nextarg;
+ unsigned long addr;
+ long offset = 0L;
+ struct scsi_cmnd *sc = NULL;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
+ goto out;
+ if (!(sc = kmalloc(sizeof(*sc), GFP_ATOMIC))) {
+ kdb_printf("kdbm_sc: cannot kmalloc sc\n");
+ goto out;
+ }
+ if ((diag = kdb_getarea(*sc, addr)))
+ goto out;
+
+ kdb_printf("scsi_cmnd at 0x%lx\n", addr);
+ kdb_printf("device = 0x%p next = 0x%p\n",
+ sc->device, sc->list.next);
+ kdb_printf("serial_number = %ld retries = %d\n",
+ sc->serial_number, sc->retries);
+ kdb_printf("cmd_len = %d\n", sc->cmd_len);
+ kdb_printf("cmnd = [%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x]\n",
+ sc->cmnd[0], sc->cmnd[1], sc->cmnd[2], sc->cmnd[3], sc->cmnd[4],
+ sc->cmnd[5], sc->cmnd[6], sc->cmnd[7], sc->cmnd[8], sc->cmnd[9],
+ sc->cmnd[10], sc->cmnd[11]);
+ kdb_printf("request_buffer = 0x%p request_bufflen = %d\n",
+ scsi_sglist(sc), scsi_bufflen(sc));
+ kdb_printf("use_sg = %d\n", scsi_sg_count(sc));
+ kdb_printf("underflow = %d transfersize = %d\n",
+ sc->underflow, sc->transfersize);
+ kdb_printf("tag = %d\n", sc->tag);
+
+out:
+ if (sc)
+ kfree(sc);
+ return diag;
+}
+
+static int __init kdbm_vm_init(void)
+{
+ kdb_register("vm", kdbm_vm, "[-v] <vaddr>", "Display vm_area_struct", 0);
+ kdb_register("vmp", kdbm_vm, "[-v] <pid>", "Display all vm_area_struct for <pid>", 0);
+#ifdef CONFIG_NUMA
+ kdb_register("mempolicy", kdbm_mpol, "<vaddr>", "Display mempolicy structure", 0);
+ kdb_register("pgdat", kdbm_pgdat, "<node_id>", "Display pglist_data node structure", 0);
+#else
+ kdb_register("pgdat", kdbm_pgdat, "", "Display pglist_data node structure", 0);
+#endif
+ kdb_register("pte", kdbm_pte, "( -m <mm> | -p <pid> ) <vaddr> [<nbytes>]", "Display pte_t for mm_struct or pid", 0);
+ kdb_register("rpte", kdbm_rpte, "( -m <mm> | -p <pid> ) <pfn> [<npages>]", "Find pte_t containing pfn for mm_struct or pid", 0);
+ kdb_register("dentry", kdbm_dentry, "<dentry>", "Display interesting dentry stuff", 0);
+ kdb_register("kobject", kdbm_kobject, "<kobject>", "Display interesting kobject stuff", 0);
+ kdb_register("filp", kdbm_filp, "<filp>", "Display interesting filp stuff", 0);
+ kdb_register("fl", kdbm_fl, "<fl>", "Display interesting file_lock stuff", 0);
+ kdb_register("sh", kdbm_sh, "<vaddr>", "Show scsi_host", 0);
+ kdb_register("sd", kdbm_sd, "<vaddr>", "Show scsi_device", 0);
+ kdb_register("sc", kdbm_sc, "<vaddr>", "Show scsi_cmnd", 0);
+
+ return 0;
+}
+
+static void __exit kdbm_vm_exit(void)
+{
+ kdb_unregister("vm");
+ kdb_unregister("vmp");
+#ifdef CONFIG_NUMA
+ kdb_unregister("mempolicy");
+#endif
+ kdb_unregister("pgdat");
+ kdb_unregister("pte");
+ kdb_unregister("rpte");
+ kdb_unregister("dentry");
+ kdb_unregister("kobject");
+ kdb_unregister("filp");
+ kdb_unregister("fl");
+ kdb_unregister("sh");
+ kdb_unregister("sd");
+ kdb_unregister("sc");
+}
+
+module_init(kdbm_vm_init)
+module_exit(kdbm_vm_exit)
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
+ unsigned int unwindex = 0;
struct module *mod;
long err = 0;
- void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+ void *ptr = NULL; /* Stops spurious gcc warning */
unsigned long symoffs, stroffs, *strmap;
mm_segment_t old_fs;
--- /dev/null
+/*
+ * Copyright (C) 2002-2006 Novell, Inc.
+ * Jan Beulich <jbeulich@novell.com>
+ * This code is released under version 2 of the GNU GPL.
+ *
+ * A simple API for unwinding kernel stacks. This is used for
+ * debugging and error reporting purposes. The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whistles, so there
+ * is not much point in implementing the full Dwarf2 unwind API.
+ */
+
+#include <linux/unwind.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/sort.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+#include <asm/unaligned.h>
++#include <linux/slab.h>
+
+extern const char __start_unwind[], __end_unwind[];
+extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
+
+#define MAX_STACK_DEPTH 8
+
+#define EXTRA_INFO(f) { \
+ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+ % FIELD_SIZEOF(struct unwind_frame_info, f)) \
+ + offsetof(struct unwind_frame_info, f) \
+ / FIELD_SIZEOF(struct unwind_frame_info, f), \
+ FIELD_SIZEOF(struct unwind_frame_info, f) \
+ }
+#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+
+static const struct {
+ unsigned offs:BITS_PER_LONG / 2;
+ unsigned width:BITS_PER_LONG / 2;
+} reg_info[] = {
+ UNW_REGISTER_INFO
+};
+
+#undef PTREGS_INFO
+#undef EXTRA_INFO
+
+#ifndef REG_INVALID
+#define REG_INVALID(r) (reg_info[r].width == 0)
+#endif
+
+#define DW_CFA_nop 0x00
+#define DW_CFA_set_loc 0x01
+#define DW_CFA_advance_loc1 0x02
+#define DW_CFA_advance_loc2 0x03
+#define DW_CFA_advance_loc4 0x04
+#define DW_CFA_offset_extended 0x05
+#define DW_CFA_restore_extended 0x06
+#define DW_CFA_undefined 0x07
+#define DW_CFA_same_value 0x08
+#define DW_CFA_register 0x09
+#define DW_CFA_remember_state 0x0a
+#define DW_CFA_restore_state 0x0b
+#define DW_CFA_def_cfa 0x0c
+#define DW_CFA_def_cfa_register 0x0d
+#define DW_CFA_def_cfa_offset 0x0e
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_expression 0x10
+#define DW_CFA_offset_extended_sf 0x11
+#define DW_CFA_def_cfa_sf 0x12
+#define DW_CFA_def_cfa_offset_sf 0x13
+#define DW_CFA_val_offset 0x14
+#define DW_CFA_val_offset_sf 0x15
+#define DW_CFA_val_expression 0x16
+#define DW_CFA_lo_user 0x1c
+#define DW_CFA_GNU_window_save 0x2d
+#define DW_CFA_GNU_args_size 0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user 0x3f
+
+#define DW_EH_PE_FORM 0x07
+#define DW_EH_PE_native 0x00
+#define DW_EH_PE_leb128 0x01
+#define DW_EH_PE_data2 0x02
+#define DW_EH_PE_data4 0x03
+#define DW_EH_PE_data8 0x04
+#define DW_EH_PE_signed 0x08
+#define DW_EH_PE_ADJUST 0x70
+#define DW_EH_PE_abs 0x00
+#define DW_EH_PE_pcrel 0x10
+#define DW_EH_PE_textrel 0x20
+#define DW_EH_PE_datarel 0x30
+#define DW_EH_PE_funcrel 0x40
+#define DW_EH_PE_aligned 0x50
+#define DW_EH_PE_indirect 0x80
+#define DW_EH_PE_omit 0xff
+
+typedef unsigned long uleb128_t;
+typedef signed long sleb128_t;
+#define sleb128abs __builtin_labs
+
+static struct unwind_table {
+ struct {
+ unsigned long pc;
+ unsigned long range;
+ } core, init;
+ const void *address;
+ unsigned long size;
+ const unsigned char *header;
+ unsigned long hdrsz;
+ struct unwind_table *link;
+ const char *name;
+} root_table;
+
+struct unwind_item {
+ enum item_location {
+ Nowhere,
+ Memory,
+ Register,
+ Value
+ } where;
+ uleb128_t value;
+};
+
+struct unwind_state {
+ uleb128_t loc, org;
+ const u8 *cieStart, *cieEnd;
+ uleb128_t codeAlign;
+ sleb128_t dataAlign;
+ struct cfa {
+ uleb128_t reg, offs;
+ } cfa;
+ struct unwind_item regs[ARRAY_SIZE(reg_info)];
+ unsigned stackDepth:8;
+ unsigned version:8;
+ const u8 *label;
+ const u8 *stack[MAX_STACK_DEPTH];
+};
+
+static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+
+static unsigned unwind_debug;
+static int __init unwind_debug_setup(char *s)
+{
+ unwind_debug = simple_strtoul(s, NULL, 0);
+ return 1;
+}
+__setup("unwind_debug=", unwind_debug_setup);
+#define dprintk(lvl, fmt, args...) \
+ ((void)(lvl > unwind_debug \
+ || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
+
+static struct unwind_table *find_table(unsigned long pc)
+{
+ struct unwind_table *table;
+
+ for (table = &root_table; table; table = table->link)
+ if ((pc >= table->core.pc
+ && pc < table->core.pc + table->core.range)
+ || (pc >= table->init.pc
+ && pc < table->init.pc + table->init.range))
+ break;
+
+ return table;
+}
+
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType,
+ unsigned long text_base,
+ unsigned long data_base);
+
+static void init_unwind_table(struct unwind_table *table,
+ const char *name,
+ const void *core_start,
+ unsigned long core_size,
+ const void *init_start,
+ unsigned long init_size,
+ const void *table_start,
+ unsigned long table_size,
+ const u8 *header_start,
+ unsigned long header_size)
+{
+ const u8 *ptr = header_start + 4;
+ const u8 *end = header_start + header_size;
+
+ table->core.pc = (unsigned long)core_start;
+ table->core.range = core_size;
+ table->init.pc = (unsigned long)init_start;
+ table->init.range = init_size;
+ table->address = table_start;
+ table->size = table_size;
+ /* See if the linker provided table looks valid. */
+ if (header_size <= 4
+ || header_start[0] != 1
+ || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
+ != table_start
+ || !read_pointer(&ptr, end, header_start[2], 0, 0)
+ || !read_pointer(&ptr, end, header_start[3], 0,
+ (unsigned long)header_start)
+ || !read_pointer(&ptr, end, header_start[3], 0,
+ (unsigned long)header_start))
+ header_start = NULL;
+ table->hdrsz = header_size;
+ smp_wmb();
+ table->header = header_start;
+ table->link = NULL;
+ table->name = name;
+}
+
+void __init unwind_init(void)
+{
+ init_unwind_table(&root_table, "kernel",
+ _text, _end - _text,
+ NULL, 0,
+ __start_unwind, __end_unwind - __start_unwind,
+ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+ unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+ const struct eh_frame_hdr_table_entry *e1 = p1;
+ const struct eh_frame_hdr_table_entry *e2 = p2;
+
+ return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+ struct eh_frame_hdr_table_entry *e1 = p1;
+ struct eh_frame_hdr_table_entry *e2 = p2;
+ unsigned long v;
+
+ v = e1->start;
+ e1->start = e2->start;
+ e2->start = v;
+ v = e1->fde;
+ e1->fde = e2->fde;
+ e2->fde = v;
+}
+
+static void __init setup_unwind_table(struct unwind_table *table,
+ void *(*alloc)(unsigned long))
+{
+ const u8 *ptr;
+ unsigned long tableSize = table->size, hdrSize;
+ unsigned n;
+ const u32 *fde;
+ struct {
+ u8 version;
+ u8 eh_frame_ptr_enc;
+ u8 fde_count_enc;
+ u8 table_enc;
+ unsigned long eh_frame_ptr;
+ unsigned int fde_count;
+ struct eh_frame_hdr_table_entry table[];
+ } __attribute__((__packed__)) *header;
+
+ if (table->header)
+ return;
+
+ if (table->hdrsz)
+ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
+ table->name);
+
+ if (tableSize & (sizeof(*fde) - 1))
+ return;
+
+ for (fde = table->address, n = 0;
+ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = cie_for_fde(fde, table);
+ signed ptrType;
+
+ if (cie == ¬_fde)
+ continue;
+ if (cie == NULL
+ || cie == &bad_cie
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ return;
+ ptr = (const u8 *)(fde + 2);
+ if (!read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType, 0, 0))
+ return;
+ ++n;
+ }
+
+ if (tableSize || !n)
+ return;
+
+ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+ + 2 * n * sizeof(unsigned long);
+ dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
+ header = alloc(hdrSize);
+ if (!header)
+ return;
+ header->version = 1;
+ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
+ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+ % __alignof(typeof(header->fde_count)));
+ header->fde_count = n;
+
+ BUILD_BUG_ON(offsetof(typeof(*header), table)
+ % __alignof(typeof(*header->table)));
+ for (fde = table->address, tableSize = table->size, n = 0;
+ tableSize;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
+
+ if (!fde[1])
+ continue; /* this is a CIE */
+ ptr = (const u8 *)(fde + 2);
+ header->table[n].start = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ fde_pointer_type(cie), 0, 0);
+ header->table[n].fde = (unsigned long)fde;
+ ++n;
+ }
+ WARN_ON(n != header->fde_count);
+
+ sort(header->table,
+ n,
+ sizeof(*header->table),
+ cmp_eh_frame_hdr_table_entries,
+ swap_eh_frame_hdr_table_entries);
+
+ table->hdrsz = hdrSize;
+ smp_wmb();
+ table->header = (const void *)header;
+}
+
+static void *__init balloc(unsigned long sz)
+{
+ return __alloc_bootmem_nopanic(sz,
+ sizeof(unsigned int),
+ __pa(MAX_DMA_ADDRESS));
+}
+
+void __init unwind_setup(void)
+{
+ setup_unwind_table(&root_table, balloc);
+}
+
+#ifdef CONFIG_MODULES
+
+static struct unwind_table *last_table;
+
+/* Must be called with module_mutex held. */
+void *unwind_add_table(struct module *module,
+ const void *table_start,
+ unsigned long table_size)
+{
+ struct unwind_table *table;
+
+ if (table_size <= 0)
+ return NULL;
+
+ table = kmalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ init_unwind_table(table, module->name,
+ module->module_core, module->core_size,
+ module->module_init, module->init_size,
+ table_start, table_size,
+ NULL, 0);
+
+ if (last_table)
+ last_table->link = table;
+ else
+ root_table.link = table;
+ last_table = table;
+
+ return table;
+}
+
+struct unlink_table_info
+{
+ struct unwind_table *table;
+ int init_only;
+};
+
+static int unlink_table(void *arg)
+{
+ struct unlink_table_info *info = arg;
+ struct unwind_table *table = info->table, *prev;
+
+ for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
+ ;
+
+ if (prev->link) {
+ if (info->init_only) {
+ table->init.pc = 0;
+ table->init.range = 0;
+ info->table = NULL;
+ } else {
+ prev->link = table->link;
+ if (!prev->link)
+ last_table = prev;
+ }
+ } else
+ info->table = NULL;
+
+ return 0;
+}
+
+/* Must be called with module_mutex held. */
+void unwind_remove_table(void *handle, int init_only)
+{
+ struct unwind_table *table = handle;
+ struct unlink_table_info info;
+
+ if (!table || table == &root_table)
+ return;
+
+ if (init_only && table == last_table) {
+ table->init.pc = 0;
+ table->init.range = 0;
+ return;
+ }
+
+ info.table = table;
+ info.init_only = init_only;
+ stop_machine(unlink_table, &info, NULL);
+
+ if (info.table)
+ kfree(table);
+}
+
+#endif /* CONFIG_MODULES */
+
+static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ uleb128_t value;
+ unsigned shift;
+
+ for (shift = 0, value = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (uleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur++ & 0x80))
+ break;
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ sleb128_t value;
+ unsigned shift;
+
+ for (shift = 0, value = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (sleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur & 0x80)) {
+ value |= -(*cur++ & 0x40) << shift;
+ break;
+ }
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+ const u32 *cie;
+
+ if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ return &bad_cie;
+ if (!fde[1])
+ return ¬_fde; /* this is a CIE */
+ if ((fde[1] & (sizeof(*fde) - 1))
+ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
+ return NULL; /* this is not a valid FDE */
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+ if (*cie <= sizeof(*cie) + 4
+ || *cie >= fde[1] - sizeof(*fde)
+ || (*cie & (sizeof(*cie) - 1))
+ || cie[1])
+ return NULL; /* this is not a (valid) CIE */
+ return cie;
+}
+
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType,
+ unsigned long text_base,
+ unsigned long data_base)
+{
+ unsigned long value = 0;
+ union {
+ const u8 *p8;
+ const u16 *p16u;
+ const s16 *p16s;
+ const u32 *p32u;
+ const s32 *p32s;
+ const unsigned long *pul;
+ } ptr;
+
+ if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
+ dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
+ return 0;
+ }
+ ptr.p8 = *pLoc;
+ switch (ptrType & DW_EH_PE_FORM) {
+ case DW_EH_PE_data2:
+ if (end < (const void *)(ptr.p16u + 1)) {
+ dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ if (ptrType & DW_EH_PE_signed)
+ value = get_unaligned(ptr.p16s++);
+ else
+ value = get_unaligned(ptr.p16u++);
+ break;
+ case DW_EH_PE_data4:
+#ifdef CONFIG_64BIT
+ if (end < (const void *)(ptr.p32u + 1)) {
+ dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ if (ptrType & DW_EH_PE_signed)
+ value = get_unaligned(ptr.p32s++);
+ else
+ value = get_unaligned(ptr.p32u++);
+ break;
+ case DW_EH_PE_data8:
+ BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+#else
+ BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+#endif
+ case DW_EH_PE_native:
+ if (end < (const void *)(ptr.pul + 1)) {
+ dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ value = get_unaligned(ptr.pul++);
+ break;
+ case DW_EH_PE_leb128:
+ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+ value = ptrType & DW_EH_PE_signed
+ ? get_sleb128(&ptr.p8, end)
+ : get_uleb128(&ptr.p8, end);
+ if ((const void *)ptr.p8 > end) {
+ dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ break;
+ default:
+ dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
+ ptrType, ptr.p8, end);
+ return 0;
+ }
+ switch (ptrType & DW_EH_PE_ADJUST) {
+ case DW_EH_PE_abs:
+ break;
+ case DW_EH_PE_pcrel:
+ value += (unsigned long)*pLoc;
+ break;
+ case DW_EH_PE_textrel:
+ if (likely(text_base)) {
+ value += text_base;
+ break;
+ }
+ dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
+ ptrType, *pLoc, end);
+ return 0;
+ case DW_EH_PE_datarel:
+ if (likely(data_base)) {
+ value += data_base;
+ break;
+ }
+ dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
+ ptrType, *pLoc, end);
+ return 0;
+ default:
+ dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
+ ptrType, *pLoc, end);
+ return 0;
+ }
+ if ((ptrType & DW_EH_PE_indirect)
+ && probe_kernel_address(value, value)) {
+ dprintk(1, "Cannot read indirect value %lx (%p,%p).",
+ value, *pLoc, end);
+ return 0;
+ }
+ *pLoc = ptr.p8;
+
+ return value;
+}
+
+static signed fde_pointer_type(const u32 *cie)
+{
+ const u8 *ptr = (const u8 *)(cie + 2);
+ unsigned version = *ptr;
+
+ if (version != 1)
+ return -1; /* unsupported */
+ if (*++ptr) {
+ const char *aug;
+ const u8 *end = (const u8 *)(cie + 1) + *cie;
+ uleb128_t len;
+
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr != 'z')
+ return -1;
+ /* check if augmentation string is nul-terminated */
+ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
+ return -1;
+ ++ptr; /* skip terminator */
+ get_uleb128(&ptr, end); /* skip code alignment */
+ get_sleb128(&ptr, end); /* skip data alignment */
+ /* skip return address column */
+ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
+ len = get_uleb128(&ptr, end); /* augmentation length */
+ if (ptr + len < ptr || ptr + len > end)
+ return -1;
+ end = ptr + len;
+ while (*++aug) {
+ if (ptr >= end)
+ return -1;
+ switch (*aug) {
+ case 'L':
+ ++ptr;
+ break;
+ case 'P': {
+ signed ptrType = *ptr++;
+
+ if (!read_pointer(&ptr, end, ptrType, 0, 0)
+ || ptr > end)
+ return -1;
+ }
+ break;
+ case 'R':
+ return *ptr;
+ default:
+ return -1;
+ }
+ }
+ }
+ return DW_EH_PE_native|DW_EH_PE_abs;
+}
+
+static int advance_loc(unsigned long delta, struct unwind_state *state)
+{
+ state->loc += delta * state->codeAlign;
+
+ return delta > 0;
+}
+
+static void set_rule(uleb128_t reg,
+ enum item_location where,
+ uleb128_t value,
+ struct unwind_state *state)
+{
+ if (reg < ARRAY_SIZE(state->regs)) {
+ state->regs[reg].where = where;
+ state->regs[reg].value = value;
+ }
+}
+
+static int processCFI(const u8 *start,
+ const u8 *end,
+ unsigned long targetLoc,
+ signed ptrType,
+ struct unwind_state *state)
+{
+ union {
+ const u8 *p8;
+ const u16 *p16;
+ const u32 *p32;
+ } ptr;
+ int result = 1;
+
+ if (start != state->cieStart) {
+ state->loc = state->org;
+ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
+ if (targetLoc == 0 && state->label == NULL)
+ return result;
+ }
+ for (ptr.p8 = start; result && ptr.p8 < end; ) {
+ switch (*ptr.p8 >> 6) {
+ uleb128_t value;
+
+ case 0:
+ switch (*ptr.p8++) {
+ case DW_CFA_nop:
+ break;
+ case DW_CFA_set_loc:
+ state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
+ if (state->loc == 0)
+ result = 0;
+ break;
+ case DW_CFA_advance_loc1:
+ result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
+ break;
+ case DW_CFA_advance_loc2:
+ result = ptr.p8 <= end + 2
+ && advance_loc(*ptr.p16++, state);
+ break;
+ case DW_CFA_advance_loc4:
+ result = ptr.p8 <= end + 4
+ && advance_loc(*ptr.p32++, state);
+ break;
+ case DW_CFA_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_val_offset:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_offset_extended_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_val_offset_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_restore_extended:
+ case DW_CFA_undefined:
+ case DW_CFA_same_value:
+ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
+ break;
+ case DW_CFA_register:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value,
+ Register,
+ get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_remember_state:
+ if (ptr.p8 == state->label) {
+ state->label = NULL;
+ return 1;
+ }
+ if (state->stackDepth >= MAX_STACK_DEPTH) {
+ dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ state->stack[state->stackDepth++] = ptr.p8;
+ break;
+ case DW_CFA_restore_state:
+ if (state->stackDepth) {
+ const uleb128_t loc = state->loc;
+ const u8 *label = state->label;
+
+ state->label = state->stack[state->stackDepth - 1];
+ memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
+ memset(state->regs, 0, sizeof(state->regs));
+ state->stackDepth = 0;
+ result = processCFI(start, end, 0, ptrType, state);
+ state->loc = loc;
+ state->label = label;
+ } else {
+ dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
+ return 0;
+ }
+ break;
+ case DW_CFA_def_cfa:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ /*nobreak*/
+ case DW_CFA_def_cfa_offset:
+ state->cfa.offs = get_uleb128(&ptr.p8, end);
+ break;
+ case DW_CFA_def_cfa_sf:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ /*nobreak*/
+ case DW_CFA_def_cfa_offset_sf:
+ state->cfa.offs = get_sleb128(&ptr.p8, end)
+ * state->dataAlign;
+ break;
+ case DW_CFA_def_cfa_register:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ break;
+ /*todo case DW_CFA_def_cfa_expression: */
+ /*todo case DW_CFA_expression: */
+ /*todo case DW_CFA_val_expression: */
+ case DW_CFA_GNU_args_size:
+ get_uleb128(&ptr.p8, end);
+ break;
+ case DW_CFA_GNU_negative_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value,
+ Memory,
+ (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_GNU_window_save:
+ default:
+ dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
+ result = 0;
+ break;
+ }
+ break;
+ case 1:
+ result = advance_loc(*ptr.p8++ & 0x3f, state);
+ break;
+ case 2:
+ value = *ptr.p8++ & 0x3f;
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ break;
+ case 3:
+ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+ break;
+ }
+ if (ptr.p8 > end) {
+ dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
+ result = 0;
+ }
+ if (result && targetLoc != 0 && targetLoc < state->loc)
+ return 1;
+ }
+
+ if (result && ptr.p8 < end)
+ dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
+
+ return result
+ && ptr.p8 == end
+ && (targetLoc == 0
+ || (/*todo While in theory this should apply, gcc in practice omits
+ everything past the function prolog, and hence the location
+ never reaches the end of the function.
+ targetLoc < state->loc &&*/ state->label == NULL));
+}
+
+/* Unwind to previous to frame. Returns 0 if successful, negative
+ * number in case of an error. */
+int unwind(struct unwind_frame_info *frame)
+{
+#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+ const u32 *fde = NULL, *cie = NULL;
+ const u8 *ptr = NULL, *end = NULL;
+ unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
+ unsigned long startLoc = 0, endLoc = 0, cfa;
+ unsigned i;
+ signed ptrType = -1;
+ uleb128_t retAddrReg = 0;
+ const struct unwind_table *table;
+ struct unwind_state state;
+
+ if (UNW_PC(frame) == 0)
+ return -EINVAL;
+ if ((table = find_table(pc)) != NULL
+ && !(table->size & (sizeof(*fde) - 1))) {
+ const u8 *hdr = table->header;
+ unsigned long tableSize;
+
+ smp_rmb();
+ if (hdr && hdr[0] == 1) {
+ switch (hdr[3] & DW_EH_PE_FORM) {
+ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
+ case DW_EH_PE_data2: tableSize = 2; break;
+ case DW_EH_PE_data4: tableSize = 4; break;
+ case DW_EH_PE_data8: tableSize = 8; break;
+ default: tableSize = 0; break;
+ }
+ ptr = hdr + 4;
+ end = hdr + table->hdrsz;
+ if (tableSize
+ && read_pointer(&ptr, end, hdr[1], 0, 0)
+ == (unsigned long)table->address
+ && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
+ && i == (end - ptr) / (2 * tableSize)
+ && !((end - ptr) % (2 * tableSize))) {
+ do {
+ const u8 *cur = ptr + (i / 2) * (2 * tableSize);
+
+ startLoc = read_pointer(&cur,
+ cur + tableSize,
+ hdr[3], 0,
+ (unsigned long)hdr);
+ if (pc < startLoc)
+ i /= 2;
+ else {
+ ptr = cur - tableSize;
+ i = (i + 1) / 2;
+ }
+ } while (startLoc && i > 1);
+ if (i == 1
+ && (startLoc = read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3], 0,
+ (unsigned long)hdr)) != 0
+ && pc >= startLoc)
+ fde = (void *)read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3], 0,
+ (unsigned long)hdr);
+ }
+ }
+ if (hdr && !fde)
+ dprintk(3, "Binary lookup for %lx failed.", pc);
+
+ if (fde != NULL) {
+ cie = cie_for_fde(fde, table);
+ ptr = (const u8 *)(fde + 2);
+ if (cie != NULL
+ && cie != &bad_cie
+ && cie != ¬_fde
+ && (ptrType = fde_pointer_type(cie)) >= 0
+ && read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType, 0, 0) == startLoc) {
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType, 0, 0);
+ if (pc >= endLoc)
+ fde = NULL;
+ } else
+ fde = NULL;
+ if (!fde)
+ dprintk(1, "Binary lookup result for %lx discarded.", pc);
+ }
+ if (fde == NULL) {
+ for (fde = table->address, tableSize = table->size;
+ cie = NULL, tableSize > sizeof(*fde)
+ && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde,
+ fde += 1 + *fde / sizeof(*fde)) {
+ cie = cie_for_fde(fde, table);
+ if (cie == &bad_cie) {
+ cie = NULL;
+ break;
+ }
+ if (cie == NULL
+ || cie == ¬_fde
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ continue;
+ ptr = (const u8 *)(fde + 2);
+ startLoc = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType, 0, 0);
+ if (!startLoc)
+ continue;
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType, 0, 0);
+ if (pc >= startLoc && pc < endLoc)
+ break;
+ }
+ if (!fde)
+ dprintk(3, "Linear lookup for %lx failed.", pc);
+ }
+ }
+ if (cie != NULL) {
+ memset(&state, 0, sizeof(state));
+ state.cieEnd = ptr; /* keep here temporarily */
+ ptr = (const u8 *)(cie + 2);
+ end = (const u8 *)(cie + 1) + *cie;
+ frame->call_frame = 1;
+ if ((state.version = *ptr) != 1)
+ cie = NULL; /* unsupported version */
+ else if (*++ptr) {
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr == 'z') {
+ while (++ptr < end && *ptr) {
+ switch (*ptr) {
+ /* check for ignorable (or already handled)
+ * nul-terminated augmentation string */
+ case 'L':
+ case 'P':
+ case 'R':
+ continue;
+ case 'S':
+ frame->call_frame = 0;
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ if (ptr >= end || *ptr)
+ cie = NULL;
+ }
+ if (!cie)
+ dprintk(1, "CIE unusable (%p,%p).", ptr, end);
+ ++ptr;
+ }
+ if (cie != NULL) {
+ /* get code aligment factor */
+ state.codeAlign = get_uleb128(&ptr, end);
+ /* get data aligment factor */
+ state.dataAlign = get_sleb128(&ptr, end);
+ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+ cie = NULL;
+ else if (UNW_PC(frame) % state.codeAlign
+ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+ dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
+ UNW_PC(frame), UNW_SP(frame));
+ return -EPERM;
+ } else {
+ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+
+ ptr += augSize;
+ }
+ if (ptr > end
+ || retAddrReg >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(retAddrReg)
+ || reg_info[retAddrReg].width != sizeof(unsigned long))
+ cie = NULL;
+ }
+ if (!cie)
+ dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
+ }
+ if (cie != NULL) {
+ state.cieStart = ptr;
+ ptr = state.cieEnd;
+ state.cieEnd = end;
+ end = (const u8 *)(fde + 1) + *fde;
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+
+ if ((ptr += augSize) > end)
+ fde = NULL;
+ }
+ if (!fde)
+ dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
+ }
+#ifdef CONFIG_FRAME_POINTER
+ if (cie == NULL || fde == NULL) {
+ unsigned long top = TSK_STACK_TOP(frame->task);
+ unsigned long bottom = STACK_BOTTOM(frame->task);
+ unsigned long fp = UNW_FP(frame);
+ unsigned long sp = UNW_SP(frame);
+ unsigned long link;
+
+ if ((sp | fp) & sizeof(unsigned long))
+ return -EPERM;
+
+# if FRAME_RETADDR_OFFSET < 0
+ if (!(sp < top && fp <= sp && bottom < fp))
+# else
+ if (!(sp < top && fp >= sp && bottom < fp))
+# endif
+ return -ENXIO;
+
+ if (probe_kernel_address(fp + FRAME_LINK_OFFSET, link))
+ return -ENXIO;
+
+# if FRAME_RETADDR_OFFSET < 0
+ if (!(link > bottom && link < fp))
+# else
+ if (!(link > bottom && link > fp))
+# endif
+ return -ENXIO;
+
+ if (link & (sizeof(unsigned long) - 1))
+ return -ENXIO;
+
+ fp += FRAME_RETADDR_OFFSET;
+ if (probe_kernel_address(fp, UNW_PC(frame)))
+ return -ENXIO;
+
+ /* Ok, we can use it */
+# if FRAME_RETADDR_OFFSET < 0
+ UNW_SP(frame) = fp - sizeof(UNW_PC(frame));
+# else
+ UNW_SP(frame) = fp + sizeof(UNW_PC(frame));
+# endif
+ UNW_FP(frame) = link;
+ return 0;
+ }
+#endif
+ state.org = startLoc;
+ memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+ /* process instructions */
+ if (!processCFI(ptr, end, pc, ptrType, &state)
+ || state.loc > endLoc
+ || state.regs[retAddrReg].where == Nowhere
+ || state.cfa.reg >= ARRAY_SIZE(reg_info)
+ || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+ || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
+ || state.cfa.offs % sizeof(unsigned long)) {
+ dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
+ return -EIO;
+ }
+ /* update frame */
+#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
+ if (frame->call_frame
+ && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
+ frame->call_frame = 0;
+#endif
+ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+ startLoc = min((unsigned long)UNW_SP(frame), cfa);
+ endLoc = max((unsigned long)UNW_SP(frame), cfa);
+ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+ startLoc = min(STACK_LIMIT(cfa), cfa);
+ endLoc = max(STACK_LIMIT(cfa), cfa);
+ }
+#ifndef CONFIG_64BIT
+# define CASES CASE(8); CASE(16); CASE(32)
+#else
+# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
+#endif
+ pc = UNW_PC(frame);
+ sp = UNW_SP(frame);
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ if (REG_INVALID(i)) {
+ if (state.regs[i].where == Nowhere)
+ continue;
+ dprintk(1, "Cannot restore register %u (%d).",
+ i, state.regs[i].where);
+ return -EIO;
+ }
+ switch (state.regs[i].where) {
+ default:
+ break;
+ case Register:
+ if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(state.regs[i].value)
+ || reg_info[i].width > reg_info[state.regs[i].value].width) {
+ dprintk(1, "Cannot restore register %u from register %lu.",
+ i, state.regs[i].value);
+ return -EIO;
+ }
+ switch (reg_info[state.regs[i].value].width) {
+#define CASE(n) \
+ case sizeof(u##n): \
+ state.regs[i].value = FRAME_REG(state.regs[i].value, \
+ const u##n); \
+ break
+ CASES;
+#undef CASE
+ default:
+ dprintk(1, "Unsupported register size %u (%lu).",
+ reg_info[state.regs[i].value].width,
+ state.regs[i].value);
+ return -EIO;
+ }
+ break;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ if (REG_INVALID(i))
+ continue;
+ switch (state.regs[i].where) {
+ case Nowhere:
+ if (reg_info[i].width != sizeof(UNW_SP(frame))
+ || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+ != &UNW_SP(frame))
+ continue;
+ UNW_SP(frame) = cfa;
+ break;
+ case Register:
+ switch (reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ FRAME_REG(i, u##n) = state.regs[i].value; \
+ break
+ CASES;
+#undef CASE
+ default:
+ dprintk(1, "Unsupported register size %u (%u).",
+ reg_info[i].width, i);
+ return -EIO;
+ }
+ break;
+ case Value:
+ if (reg_info[i].width != sizeof(unsigned long)) {
+ dprintk(1, "Unsupported value size %u (%u).",
+ reg_info[i].width, i);
+ return -EIO;
+ }
+ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
+ * state.dataAlign;
+ break;
+ case Memory: {
+ unsigned long addr = cfa + state.regs[i].value
+ * state.dataAlign;
+
+ if ((state.regs[i].value * state.dataAlign)
+ % sizeof(unsigned long)
+ || addr < startLoc
+ || addr + sizeof(unsigned long) < addr
+ || addr + sizeof(unsigned long) > endLoc) {
+ dprintk(1, "Bad memory location %lx (%lx).",
+ addr, state.regs[i].value);
+ return -EIO;
+ }
+ switch (reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ if (probe_kernel_address(addr, \
+ FRAME_REG(i, u##n))) \
+ return -EFAULT; \
+ break
+ CASES;
+#undef CASE
+ default:
+ dprintk(1, "Unsupported memory size %u (%u).",
+ reg_info[i].width, i);
+ return -EIO;
+ }
+ }
+ break;
+ }
+ }
+
+ if (UNW_PC(frame) % state.codeAlign
+ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+ dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
+ UNW_PC(frame), UNW_SP(frame));
+ return -EIO;
+ }
+ if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
+ dprintk(1, "No progress (%lx,%lx).", pc, sp);
+ return -EIO;
+ }
+
+ return 0;
+#undef CASES
+#undef FRAME_REG
+}
+EXPORT_SYMBOL_GPL(unwind);
+
+int unwind_init_frame_info(struct unwind_frame_info *info,
+ struct task_struct *tsk,
+ /*const*/ struct pt_regs *regs)
+{
+ info->task = tsk;
+ info->call_frame = 0;
+ arch_unw_init_frame_info(info, regs);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(unwind_init_frame_info);
+
+/*
+ * Prepare to unwind a blocked task.
+ */
+int unwind_init_blocked(struct unwind_frame_info *info,
+ struct task_struct *tsk)
+{
+ info->task = tsk;
+ info->call_frame = 0;
+ arch_unw_init_blocked(info);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(unwind_init_blocked);
+
+/*
+ * Prepare to unwind the currently running thread.
+ */
+int unwind_init_running(struct unwind_frame_info *info,
+ asmlinkage unwind_callback_fn callback,
+ const struct stacktrace_ops *ops, void *data)
+{
+ info->task = current;
+ info->call_frame = 0;
+
+ return arch_unwind_init_running(info, callback, ops, data);
+}
+EXPORT_SYMBOL_GPL(unwind_init_running);
+
+/*
+ * Unwind until the return pointer is in user-land (or until an error
+ * occurs). Returns 0 if successful, negative number in case of
+ * error.
+ */
+int unwind_to_user(struct unwind_frame_info *info)
+{
+ while (!arch_unw_user_mode(info)) {
+ int err = unwind(info);
+
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(unwind_to_user);
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+ #include <linux/slab.h>
+#include <linux/reserve.h>
#include <net/inet_frag.h>
#include <linux/random.h>
#include <linux/jhash.h>
#include <linux/skbuff.h>
+ #include <linux/slab.h>
+#include <linux/reserve.h>
#include <net/sock.h>
#include <net/snmp.h>